]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
MFV r368746:
[FreeBSD/FreeBSD.git] / contrib / llvm-project / llvm / lib / Target / X86 / X86InstrAVX512.td
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
12 //
13 //===----------------------------------------------------------------------===//
14
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT).  These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                       string suffix = ""> {
22   RegisterClass RC = rc;
23   ValueType EltVT = eltvt;
24   int NumElts = numelts;
25
26   // Corresponding mask register class.
27   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29   // Corresponding mask register pair class.
30   RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                               !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33   // Corresponding write-mask register class.
34   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36   // The mask VT.
37   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39   // Suffix used in the instruction mnemonic.
40   string Suffix = suffix;
41
42   // VTName is a string name for vector VT. For vector types it will be
43   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44   // It is a little bit complex for scalar types, where NumElts = 1.
45   // In this case we build v4f32 or v2f64
46   string VTName = "v" # !if (!eq (NumElts, 1),
47                         !if (!eq (EltVT.Size, 32), 4,
48                         !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
49
50   // The vector VT.
51   ValueType VT = !cast<ValueType>(VTName);
52
53   string EltTypeName = !cast<string>(EltVT);
54   // Size of the element type in bits, e.g. 32 for v16i32.
55   string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56   int EltSize = EltVT.Size;
57
58   // "i" for integer types and "f" for floating-point types
59   string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
60
61   // Size of RC in bits, e.g. 512 for VR512.
62   int Size = VT.Size;
63
64   // The corresponding memory operand, e.g. i512mem for VR512.
65   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67   // FP scalar memory operand for intrinsics - ssmem/sdmem.
68   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
70
71   // Load patterns
72   PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
73
74   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
75
76   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77   PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
78
79   PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"),
80                                            !cast<PatFrags>("sse_load_f32"),
81                                !if (!eq (EltTypeName, "f64"),
82                                      !cast<PatFrags>("sse_load_f64"),
83                                ?));
84
85   // The string to specify embedded broadcast in assembly.
86   string BroadcastStr = "{1to" # NumElts # "}";
87
88   // 8-bit compressed displacement tuple/subvector format.  This is only
89   // defined for NumElts <= 8.
90   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                           !if (!eq (Size, 256), sub_ymm, ?));
95
96   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                      SSEPackedInt));
99
100   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101
102   dag ImmAllZerosV = (VT immAllZerosV);
103
104   string ZSuffix = !if (!eq (Size, 128), "Z128",
105                    !if (!eq (Size, 256), "Z256", "Z"));
106 }
107
108 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
109 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
112 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
114
115 // "x" in v32i8x_info means RC = VR256X
116 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
117 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
119 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
120 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
121 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
122
123 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
124 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
125 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
126 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
127 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
128 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
129
130 // We map scalar types to the smallest (128-bit) vector type
131 // with the appropriate element type. This allows to use the same masking logic.
132 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
133 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
134 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
135 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
136
137 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138                            X86VectorVTInfo i128> {
139   X86VectorVTInfo info512 = i512;
140   X86VectorVTInfo info256 = i256;
141   X86VectorVTInfo info128 = i128;
142 }
143
144 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145                                              v16i8x_info>;
146 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147                                              v8i16x_info>;
148 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149                                              v4i32x_info>;
150 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151                                              v2i64x_info>;
152 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153                                              v4f32x_info>;
154 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
155                                              v2f64x_info>;
156
157 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158                        ValueType _vt> {
159   RegisterClass KRC = _krc;
160   RegisterClass KRCWM = _krcwm;
161   ValueType KVT = _vt;
162 }
163
164 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171
172 // Used for matching masked operations. Ensures the operation part only has a
173 // single use.
174 def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
175                            (vselect node:$mask, node:$src1, node:$src2), [{
176   return isProfitableToFormMaskedOp(N);
177 }]>;
178
179 def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
180                               (X86selects node:$mask, node:$src1, node:$src2), [{
181   return isProfitableToFormMaskedOp(N);
182 }]>;
183
184 // This multiclass generates the masking variants from the non-masking
185 // variant.  It only provides the assembly pieces for the masking variants.
186 // It assumes custom ISel patterns for masking which can be provided as
187 // template arguments.
188 multiclass AVX512_maskable_custom<bits<8> O, Format F,
189                                   dag Outs,
190                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
191                                   string OpcodeStr,
192                                   string AttSrcAsm, string IntelSrcAsm,
193                                   list<dag> Pattern,
194                                   list<dag> MaskingPattern,
195                                   list<dag> ZeroMaskingPattern,
196                                   string MaskingConstraint = "",
197                                   bit IsCommutable = 0,
198                                   bit IsKCommutable = 0,
199                                   bit IsKZCommutable = IsCommutable> {
200   let isCommutable = IsCommutable in
201     def NAME: AVX512<O, F, Outs, Ins,
202                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
203                                      "$dst, "#IntelSrcAsm#"}",
204                        Pattern>;
205
206   // Prefer over VMOV*rrk Pat<>
207   let isCommutable = IsKCommutable in
208     def NAME#k: AVX512<O, F, Outs, MaskingIns,
209                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
210                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
211                        MaskingPattern>,
212               EVEX_K {
213       // In case of the 3src subclass this is overridden with a let.
214       string Constraints = MaskingConstraint;
215     }
216
217   // Zero mask does not add any restrictions to commute operands transformation.
218   // So, it is Ok to use IsCommutable instead of IsKCommutable.
219   let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
220     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
221                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
222                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
223                        ZeroMaskingPattern>,
224               EVEX_KZ;
225 }
226
227
228 // Common base class of AVX512_maskable and AVX512_maskable_3src.
229 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
230                                   dag Outs,
231                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
232                                   string OpcodeStr,
233                                   string AttSrcAsm, string IntelSrcAsm,
234                                   dag RHS, dag MaskingRHS,
235                                   SDPatternOperator Select = vselect_mask,
236                                   string MaskingConstraint = "",
237                                   bit IsCommutable = 0,
238                                   bit IsKCommutable = 0,
239                                   bit IsKZCommutable = IsCommutable> :
240   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
241                          AttSrcAsm, IntelSrcAsm,
242                          [(set _.RC:$dst, RHS)],
243                          [(set _.RC:$dst, MaskingRHS)],
244                          [(set _.RC:$dst,
245                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
246                          MaskingConstraint, IsCommutable,
247                          IsKCommutable, IsKZCommutable>;
248
249 // This multiclass generates the unconditional/non-masking, the masking and
250 // the zero-masking variant of the vector instruction.  In the masking case, the
251 // preserved vector elements come from a new dummy input operand tied to $dst.
252 // This version uses a separate dag for non-masking and masking.
253 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
254                            dag Outs, dag Ins, string OpcodeStr,
255                            string AttSrcAsm, string IntelSrcAsm,
256                            dag RHS, dag MaskRHS,
257                            bit IsCommutable = 0, bit IsKCommutable = 0,
258                            bit IsKZCommutable = IsCommutable> :
259    AVX512_maskable_custom<O, F, Outs, Ins,
260                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
261                           !con((ins _.KRCWM:$mask), Ins),
262                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
263                           [(set _.RC:$dst, RHS)],
264                           [(set _.RC:$dst,
265                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
266                           [(set _.RC:$dst,
267                               (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
268                           "$src0 = $dst", IsCommutable, IsKCommutable,
269                           IsKZCommutable>;
270
271 // This multiclass generates the unconditional/non-masking, the masking and
272 // the zero-masking variant of the vector instruction.  In the masking case, the
273 // preserved vector elements come from a new dummy input operand tied to $dst.
274 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
275                            dag Outs, dag Ins, string OpcodeStr,
276                            string AttSrcAsm, string IntelSrcAsm,
277                            dag RHS,
278                            bit IsCommutable = 0, bit IsKCommutable = 0,
279                            bit IsKZCommutable = IsCommutable,
280                            SDPatternOperator Select = vselect_mask> :
281    AVX512_maskable_common<O, F, _, Outs, Ins,
282                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
283                           !con((ins _.KRCWM:$mask), Ins),
284                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
285                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
286                           Select, "$src0 = $dst", IsCommutable, IsKCommutable,
287                           IsKZCommutable>;
288
289 // This multiclass generates the unconditional/non-masking, the masking and
290 // the zero-masking variant of the scalar instruction.
291 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
292                            dag Outs, dag Ins, string OpcodeStr,
293                            string AttSrcAsm, string IntelSrcAsm,
294                            dag RHS> :
295    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
296                    RHS, 0, 0, 0, X86selects_mask>;
297
298 // Similar to AVX512_maskable but in this case one of the source operands
299 // ($src1) is already tied to $dst so we just use that for the preserved
300 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
301 // $src1.
302 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
303                                 dag Outs, dag NonTiedIns, string OpcodeStr,
304                                 string AttSrcAsm, string IntelSrcAsm,
305                                 dag RHS,
306                                 bit IsCommutable = 0,
307                                 bit IsKCommutable = 0,
308                                 SDPatternOperator Select = vselect_mask,
309                                 bit MaskOnly = 0> :
310    AVX512_maskable_common<O, F, _, Outs,
311                           !con((ins _.RC:$src1), NonTiedIns),
312                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
313                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
314                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
315                           !if(MaskOnly, (null_frag), RHS),
316                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
317                           Select, "", IsCommutable, IsKCommutable>;
318
319 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
320 // operand differs from the output VT. This requires a bitconvert on
321 // the preserved vector going into the vselect.
322 // NOTE: The unmasked pattern is disabled.
323 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
324                                      X86VectorVTInfo InVT,
325                                      dag Outs, dag NonTiedIns, string OpcodeStr,
326                                      string AttSrcAsm, string IntelSrcAsm,
327                                      dag RHS, bit IsCommutable = 0> :
328    AVX512_maskable_common<O, F, OutVT, Outs,
329                           !con((ins InVT.RC:$src1), NonTiedIns),
330                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
331                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
332                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
333                           (vselect_mask InVT.KRCWM:$mask, RHS,
334                            (bitconvert InVT.RC:$src1)),
335                            vselect_mask, "", IsCommutable>;
336
337 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
338                                      dag Outs, dag NonTiedIns, string OpcodeStr,
339                                      string AttSrcAsm, string IntelSrcAsm,
340                                      dag RHS,
341                                      bit IsCommutable = 0,
342                                      bit IsKCommutable = 0,
343                                      bit MaskOnly = 0> :
344    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
345                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
346                         X86selects_mask, MaskOnly>;
347
348 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
349                                   dag Outs, dag Ins,
350                                   string OpcodeStr,
351                                   string AttSrcAsm, string IntelSrcAsm,
352                                   list<dag> Pattern> :
353    AVX512_maskable_custom<O, F, Outs, Ins,
354                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
355                           !con((ins _.KRCWM:$mask), Ins),
356                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
357                           "$src0 = $dst">;
358
359 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
360                                        dag Outs, dag NonTiedIns,
361                                        string OpcodeStr,
362                                        string AttSrcAsm, string IntelSrcAsm,
363                                        list<dag> Pattern> :
364    AVX512_maskable_custom<O, F, Outs,
365                           !con((ins _.RC:$src1), NonTiedIns),
366                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
367                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
368                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
369                           "">;
370
371 // Instruction with mask that puts result in mask register,
372 // like "compare" and "vptest"
373 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
374                                   dag Outs,
375                                   dag Ins, dag MaskingIns,
376                                   string OpcodeStr,
377                                   string AttSrcAsm, string IntelSrcAsm,
378                                   list<dag> Pattern,
379                                   list<dag> MaskingPattern,
380                                   bit IsCommutable = 0> {
381     let isCommutable = IsCommutable in {
382     def NAME: AVX512<O, F, Outs, Ins,
383                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
384                                      "$dst, "#IntelSrcAsm#"}",
385                        Pattern>;
386
387     def NAME#k: AVX512<O, F, Outs, MaskingIns,
388                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
389                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
390                        MaskingPattern>, EVEX_K;
391     }
392 }
393
394 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
395                                   dag Outs,
396                                   dag Ins, dag MaskingIns,
397                                   string OpcodeStr,
398                                   string AttSrcAsm, string IntelSrcAsm,
399                                   dag RHS, dag MaskingRHS,
400                                   bit IsCommutable = 0> :
401   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
402                          AttSrcAsm, IntelSrcAsm,
403                          [(set _.KRC:$dst, RHS)],
404                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
405
406 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
407                            dag Outs, dag Ins, string OpcodeStr,
408                            string AttSrcAsm, string IntelSrcAsm,
409                            dag RHS, dag RHS_su, bit IsCommutable = 0> :
410    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
411                           !con((ins _.KRCWM:$mask), Ins),
412                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
413                           (and _.KRCWM:$mask, RHS_su), IsCommutable>;
414
415 // Used by conversion instructions.
416 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
417                                   dag Outs,
418                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
419                                   string OpcodeStr,
420                                   string AttSrcAsm, string IntelSrcAsm,
421                                   dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
422   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
423                          AttSrcAsm, IntelSrcAsm,
424                          [(set _.RC:$dst, RHS)],
425                          [(set _.RC:$dst, MaskingRHS)],
426                          [(set _.RC:$dst, ZeroMaskingRHS)],
427                          "$src0 = $dst">;
428
429 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
430                                dag Outs, dag NonTiedIns, string OpcodeStr,
431                                string AttSrcAsm, string IntelSrcAsm,
432                                dag RHS, dag MaskingRHS, bit IsCommutable,
433                                bit IsKCommutable> :
434    AVX512_maskable_custom<O, F, Outs,
435                           !con((ins _.RC:$src1), NonTiedIns),
436                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
437                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
438                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
439                           [(set _.RC:$dst, RHS)],
440                           [(set _.RC:$dst,
441                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
442                           [(set _.RC:$dst,
443                             (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
444                           "", IsCommutable, IsKCommutable>;
445
446 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
447 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
448 // swizzled by ExecutionDomainFix to pxor.
449 // We set canFoldAsLoad because this can be converted to a constant-pool
450 // load of an all-zeros value if folding it would be beneficial.
451 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
452     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
453 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
454                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
455 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
456                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
457 }
458
459 let Predicates = [HasAVX512] in {
460 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
461 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
462 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
463 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
464 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
465 }
466
467 // Alias instructions that allow VPTERNLOG to be used with a mask to create
468 // a mix of all ones and all zeros elements. This is done this way to force
469 // the same register to be used as input for all three sources.
470 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
471 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
472                                 (ins VK16WM:$mask), "",
473                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
474                                                       (v16i32 immAllOnesV),
475                                                       (v16i32 immAllZerosV)))]>;
476 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
477                                 (ins VK8WM:$mask), "",
478                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
479                                            (v8i64 immAllOnesV),
480                                            (v8i64 immAllZerosV)))]>;
481 }
482
483 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
484     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
485 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
486                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
487 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
488                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
489 }
490
491 let Predicates = [HasAVX512] in {
492 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
493 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
494 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
495 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
496 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
497 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
498 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
499 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
500 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
501 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
502 }
503
504 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
505 // This is expanded by ExpandPostRAPseudos.
506 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
507     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
508   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
509                           [(set FR32X:$dst, fp32imm0)]>;
510   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
511                           [(set FR64X:$dst, fp64imm0)]>;
512   def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
513                             [(set VR128X:$dst, fp128imm0)]>;
514 }
515
516 //===----------------------------------------------------------------------===//
517 // AVX-512 - VECTOR INSERT
518 //
519
520 // Supports two different pattern operators for mask and unmasked ops. Allows
521 // null_frag to be passed for one.
522 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
523                                   X86VectorVTInfo To,
524                                   SDPatternOperator vinsert_insert,
525                                   SDPatternOperator vinsert_for_mask,
526                                   X86FoldableSchedWrite sched> {
527   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
528     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
529                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
530                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
531                    "$src3, $src2, $src1", "$src1, $src2, $src3",
532                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
533                                          (From.VT From.RC:$src2),
534                                          (iPTR imm)),
535                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
536                                            (From.VT From.RC:$src2),
537                                            (iPTR imm))>,
538                    AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
539     let mayLoad = 1 in
540     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
541                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
542                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
543                    "$src3, $src2, $src1", "$src1, $src2, $src3",
544                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
545                                (From.VT (From.LdFrag addr:$src2)),
546                                (iPTR imm)),
547                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
548                                (From.VT (From.LdFrag addr:$src2)),
549                                (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
550                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
551                    Sched<[sched.Folded, sched.ReadAfterFold]>;
552   }
553 }
554
555 // Passes the same pattern operator for masked and unmasked ops.
556 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
557                             X86VectorVTInfo To,
558                             SDPatternOperator vinsert_insert,
559                             X86FoldableSchedWrite sched> :
560   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
561
562 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
563                        X86VectorVTInfo To, PatFrag vinsert_insert,
564                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
565   let Predicates = p in {
566     def : Pat<(vinsert_insert:$ins
567                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
568               (To.VT (!cast<Instruction>(InstrStr#"rr")
569                      To.RC:$src1, From.RC:$src2,
570                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
571
572     def : Pat<(vinsert_insert:$ins
573                   (To.VT To.RC:$src1),
574                   (From.VT (From.LdFrag addr:$src2)),
575                   (iPTR imm)),
576               (To.VT (!cast<Instruction>(InstrStr#"rm")
577                   To.RC:$src1, addr:$src2,
578                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
579   }
580 }
581
582 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
583                             ValueType EltVT64, int Opcode256,
584                             X86FoldableSchedWrite sched> {
585
586   let Predicates = [HasVLX] in
587     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
588                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
589                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
590                                  vinsert128_insert, sched>, EVEX_V256;
591
592   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
593                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
594                                  X86VectorVTInfo<16, EltVT32, VR512>,
595                                  vinsert128_insert, sched>, EVEX_V512;
596
597   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
598                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
599                                  X86VectorVTInfo< 8, EltVT64, VR512>,
600                                  vinsert256_insert, sched>, VEX_W, EVEX_V512;
601
602   // Even with DQI we'd like to only use these instructions for masking.
603   let Predicates = [HasVLX, HasDQI] in
604     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
605                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
606                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
607                                    null_frag, vinsert128_insert, sched>,
608                                    VEX_W1X, EVEX_V256;
609
610   // Even with DQI we'd like to only use these instructions for masking.
611   let Predicates = [HasDQI] in {
612     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
613                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
614                                  X86VectorVTInfo< 8, EltVT64, VR512>,
615                                  null_frag, vinsert128_insert, sched>,
616                                  VEX_W, EVEX_V512;
617
618     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
619                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
620                                    X86VectorVTInfo<16, EltVT32, VR512>,
621                                    null_frag, vinsert256_insert, sched>,
622                                    EVEX_V512;
623   }
624 }
625
626 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
627 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
628 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
629
630 // Codegen pattern with the alternative types,
631 // Even with AVX512DQ we'll still use these for unmasked operations.
632 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
633               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
634 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
635               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
636
637 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
638               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
639 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
640               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
641
642 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
643               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
644 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
645               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
646
647 // Codegen pattern with the alternative types insert VEC128 into VEC256
648 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
649               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
650 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
651               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
652 // Codegen pattern with the alternative types insert VEC128 into VEC512
653 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
654               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
655 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
656                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
657 // Codegen pattern with the alternative types insert VEC256 into VEC512
658 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
659               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
660 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
661               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
662
663
664 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
665                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
666                                  PatFrag vinsert_insert,
667                                  SDNodeXForm INSERT_get_vinsert_imm,
668                                  list<Predicate> p> {
669 let Predicates = p in {
670   def : Pat<(Cast.VT
671              (vselect_mask Cast.KRCWM:$mask,
672                            (bitconvert
673                             (vinsert_insert:$ins (To.VT To.RC:$src1),
674                                                  (From.VT From.RC:$src2),
675                                                  (iPTR imm))),
676                            Cast.RC:$src0)),
677             (!cast<Instruction>(InstrStr#"rrk")
678              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
679              (INSERT_get_vinsert_imm To.RC:$ins))>;
680   def : Pat<(Cast.VT
681              (vselect_mask Cast.KRCWM:$mask,
682                            (bitconvert
683                             (vinsert_insert:$ins (To.VT To.RC:$src1),
684                                                  (From.VT
685                                                   (bitconvert
686                                                    (From.LdFrag addr:$src2))),
687                                                  (iPTR imm))),
688                            Cast.RC:$src0)),
689             (!cast<Instruction>(InstrStr#"rmk")
690              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
691              (INSERT_get_vinsert_imm To.RC:$ins))>;
692
693   def : Pat<(Cast.VT
694              (vselect_mask Cast.KRCWM:$mask,
695                            (bitconvert
696                             (vinsert_insert:$ins (To.VT To.RC:$src1),
697                                                  (From.VT From.RC:$src2),
698                                                  (iPTR imm))),
699                            Cast.ImmAllZerosV)),
700             (!cast<Instruction>(InstrStr#"rrkz")
701              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
702              (INSERT_get_vinsert_imm To.RC:$ins))>;
703   def : Pat<(Cast.VT
704              (vselect_mask Cast.KRCWM:$mask,
705                            (bitconvert
706                             (vinsert_insert:$ins (To.VT To.RC:$src1),
707                                                  (From.VT (From.LdFrag addr:$src2)),
708                                                  (iPTR imm))),
709                            Cast.ImmAllZerosV)),
710             (!cast<Instruction>(InstrStr#"rmkz")
711              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
712              (INSERT_get_vinsert_imm To.RC:$ins))>;
713 }
714 }
715
716 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
717                              v8f32x_info, vinsert128_insert,
718                              INSERT_get_vinsert128_imm, [HasVLX]>;
719 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
720                              v4f64x_info, vinsert128_insert,
721                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
722
723 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
724                              v8i32x_info, vinsert128_insert,
725                              INSERT_get_vinsert128_imm, [HasVLX]>;
726 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
727                              v8i32x_info, vinsert128_insert,
728                              INSERT_get_vinsert128_imm, [HasVLX]>;
729 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
730                              v8i32x_info, vinsert128_insert,
731                              INSERT_get_vinsert128_imm, [HasVLX]>;
732 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
733                              v4i64x_info, vinsert128_insert,
734                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
735 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
736                              v4i64x_info, vinsert128_insert,
737                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
738 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
739                              v4i64x_info, vinsert128_insert,
740                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
741
742 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
743                              v16f32_info, vinsert128_insert,
744                              INSERT_get_vinsert128_imm, [HasAVX512]>;
745 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
746                              v8f64_info, vinsert128_insert,
747                              INSERT_get_vinsert128_imm, [HasDQI]>;
748
749 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
750                              v16i32_info, vinsert128_insert,
751                              INSERT_get_vinsert128_imm, [HasAVX512]>;
752 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
753                              v16i32_info, vinsert128_insert,
754                              INSERT_get_vinsert128_imm, [HasAVX512]>;
755 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
756                              v16i32_info, vinsert128_insert,
757                              INSERT_get_vinsert128_imm, [HasAVX512]>;
758 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
759                              v8i64_info, vinsert128_insert,
760                              INSERT_get_vinsert128_imm, [HasDQI]>;
761 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
762                              v8i64_info, vinsert128_insert,
763                              INSERT_get_vinsert128_imm, [HasDQI]>;
764 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
765                              v8i64_info, vinsert128_insert,
766                              INSERT_get_vinsert128_imm, [HasDQI]>;
767
768 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
769                              v16f32_info, vinsert256_insert,
770                              INSERT_get_vinsert256_imm, [HasDQI]>;
771 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
772                              v8f64_info, vinsert256_insert,
773                              INSERT_get_vinsert256_imm, [HasAVX512]>;
774
775 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
776                              v16i32_info, vinsert256_insert,
777                              INSERT_get_vinsert256_imm, [HasDQI]>;
778 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
779                              v16i32_info, vinsert256_insert,
780                              INSERT_get_vinsert256_imm, [HasDQI]>;
781 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
782                              v16i32_info, vinsert256_insert,
783                              INSERT_get_vinsert256_imm, [HasDQI]>;
784 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
785                              v8i64_info, vinsert256_insert,
786                              INSERT_get_vinsert256_imm, [HasAVX512]>;
787 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
788                              v8i64_info, vinsert256_insert,
789                              INSERT_get_vinsert256_imm, [HasAVX512]>;
790 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
791                              v8i64_info, vinsert256_insert,
792                              INSERT_get_vinsert256_imm, [HasAVX512]>;
793
794 // vinsertps - insert f32 to XMM
795 let ExeDomain = SSEPackedSingle in {
796 let isCommutable = 1 in
797 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
798       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
799       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
800       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
801       EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
802 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
803       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
804       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
805       [(set VR128X:$dst, (X86insertps VR128X:$src1,
806                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
807                           timm:$src3))]>,
808       EVEX_4V, EVEX_CD8<32, CD8VT1>,
809       Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
810 }
811
812 //===----------------------------------------------------------------------===//
813 // AVX-512 VECTOR EXTRACT
814 //---
815
816 // Supports two different pattern operators for mask and unmasked ops. Allows
817 // null_frag to be passed for one.
818 multiclass vextract_for_size_split<int Opcode,
819                                    X86VectorVTInfo From, X86VectorVTInfo To,
820                                    SDPatternOperator vextract_extract,
821                                    SDPatternOperator vextract_for_mask,
822                                    SchedWrite SchedRR, SchedWrite SchedMR> {
823
824   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
825     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
826                 (ins From.RC:$src1, u8imm:$idx),
827                 "vextract" # To.EltTypeName # "x" # To.NumElts,
828                 "$idx, $src1", "$src1, $idx",
829                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
830                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
831                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
832
833     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
834                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
835                     "vextract" # To.EltTypeName # "x" # To.NumElts #
836                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
837                     [(store (To.VT (vextract_extract:$idx
838                                     (From.VT From.RC:$src1), (iPTR imm))),
839                              addr:$dst)]>, EVEX,
840                     Sched<[SchedMR]>;
841
842     let mayStore = 1, hasSideEffects = 0 in
843     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
844                     (ins To.MemOp:$dst, To.KRCWM:$mask,
845                                         From.RC:$src1, u8imm:$idx),
846                      "vextract" # To.EltTypeName # "x" # To.NumElts #
847                           "\t{$idx, $src1, $dst {${mask}}|"
848                           "$dst {${mask}}, $src1, $idx}", []>,
849                     EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
850   }
851 }
852
853 // Passes the same pattern operator for masked and unmasked ops.
854 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
855                              X86VectorVTInfo To,
856                              SDPatternOperator vextract_extract,
857                              SchedWrite SchedRR, SchedWrite SchedMR> :
858   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
859
860 // Codegen pattern for the alternative types
861 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
862                 X86VectorVTInfo To, PatFrag vextract_extract,
863                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
864   let Predicates = p in {
865      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
866                (To.VT (!cast<Instruction>(InstrStr#"rr")
867                           From.RC:$src1,
868                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
869      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
870                               (iPTR imm))), addr:$dst),
871                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
872                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
873   }
874 }
875
876 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
877                              ValueType EltVT64, int Opcode256,
878                              SchedWrite SchedRR, SchedWrite SchedMR> {
879   let Predicates = [HasAVX512] in {
880     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
881                                    X86VectorVTInfo<16, EltVT32, VR512>,
882                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
883                                    vextract128_extract, SchedRR, SchedMR>,
884                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
885     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
886                                    X86VectorVTInfo< 8, EltVT64, VR512>,
887                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
888                                    vextract256_extract, SchedRR, SchedMR>,
889                                        VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
890   }
891   let Predicates = [HasVLX] in
892     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
893                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
894                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
895                                  vextract128_extract, SchedRR, SchedMR>,
896                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
897
898   // Even with DQI we'd like to only use these instructions for masking.
899   let Predicates = [HasVLX, HasDQI] in
900     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
901                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
902                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
903                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
904                                      VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
905
906   // Even with DQI we'd like to only use these instructions for masking.
907   let Predicates = [HasDQI] in {
908     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
909                                  X86VectorVTInfo< 8, EltVT64, VR512>,
910                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
911                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
912                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
913     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
914                                  X86VectorVTInfo<16, EltVT32, VR512>,
915                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
916                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
917                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
918   }
919 }
920
921 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
922 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
923 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
924
925 // extract_subvector codegen patterns with the alternative types.
926 // Even with AVX512DQ we'll still use these for unmasked operations.
927 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
928           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
929 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
930           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
931
932 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
933           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
934 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
935           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
936
937 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
938           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
939 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
940           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
941
942 // Codegen pattern with the alternative types extract VEC128 from VEC256
943 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
944           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
945 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
946           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
947
948 // Codegen pattern with the alternative types extract VEC128 from VEC512
949 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
950                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
951 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
952                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
953 // Codegen pattern with the alternative types extract VEC256 from VEC512
954 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
955                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
956 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
957                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
958
959
960 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
961 // smaller extract to enable EVEX->VEX.
962 let Predicates = [NoVLX] in {
963 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
964           (v2i64 (VEXTRACTI128rr
965                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
966                   (iPTR 1)))>;
967 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
968           (v2f64 (VEXTRACTF128rr
969                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
970                   (iPTR 1)))>;
971 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
972           (v4i32 (VEXTRACTI128rr
973                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
974                   (iPTR 1)))>;
975 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
976           (v4f32 (VEXTRACTF128rr
977                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
978                   (iPTR 1)))>;
979 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
980           (v8i16 (VEXTRACTI128rr
981                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
982                   (iPTR 1)))>;
983 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
984           (v16i8 (VEXTRACTI128rr
985                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
986                   (iPTR 1)))>;
987 }
988
989 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
990 // smaller extract to enable EVEX->VEX.
991 let Predicates = [HasVLX] in {
992 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
993           (v2i64 (VEXTRACTI32x4Z256rr
994                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
995                   (iPTR 1)))>;
996 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
997           (v2f64 (VEXTRACTF32x4Z256rr
998                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
999                   (iPTR 1)))>;
1000 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1001           (v4i32 (VEXTRACTI32x4Z256rr
1002                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1003                   (iPTR 1)))>;
1004 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1005           (v4f32 (VEXTRACTF32x4Z256rr
1006                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1007                   (iPTR 1)))>;
1008 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1009           (v8i16 (VEXTRACTI32x4Z256rr
1010                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1011                   (iPTR 1)))>;
1012 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1013           (v16i8 (VEXTRACTI32x4Z256rr
1014                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1015                   (iPTR 1)))>;
1016 }
1017
1018
1019 // Additional patterns for handling a bitcast between the vselect and the
1020 // extract_subvector.
1021 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1022                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
1023                                   PatFrag vextract_extract,
1024                                   SDNodeXForm EXTRACT_get_vextract_imm,
1025                                   list<Predicate> p> {
1026 let Predicates = p in {
1027   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1028                                    (bitconvert
1029                                     (To.VT (vextract_extract:$ext
1030                                             (From.VT From.RC:$src), (iPTR imm)))),
1031                                    To.RC:$src0)),
1032             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1033                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1034                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1035
1036   def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1037                                    (bitconvert
1038                                     (To.VT (vextract_extract:$ext
1039                                             (From.VT From.RC:$src), (iPTR imm)))),
1040                                    Cast.ImmAllZerosV)),
1041             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1042                       Cast.KRCWM:$mask, From.RC:$src,
1043                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1044 }
1045 }
1046
1047 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1048                               v4f32x_info, vextract128_extract,
1049                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1050 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1051                               v2f64x_info, vextract128_extract,
1052                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1053
1054 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1055                               v4i32x_info, vextract128_extract,
1056                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1057 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1058                               v4i32x_info, vextract128_extract,
1059                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1060 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1061                               v4i32x_info, vextract128_extract,
1062                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1063 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1064                               v2i64x_info, vextract128_extract,
1065                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1067                               v2i64x_info, vextract128_extract,
1068                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1069 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1070                               v2i64x_info, vextract128_extract,
1071                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1072
1073 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1074                               v4f32x_info, vextract128_extract,
1075                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1077                               v2f64x_info, vextract128_extract,
1078                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1079
1080 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1081                               v4i32x_info, vextract128_extract,
1082                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1083 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1084                               v4i32x_info, vextract128_extract,
1085                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1086 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1087                               v4i32x_info, vextract128_extract,
1088                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1089 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1090                               v2i64x_info, vextract128_extract,
1091                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1092 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1093                               v2i64x_info, vextract128_extract,
1094                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1095 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1096                               v2i64x_info, vextract128_extract,
1097                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1098
1099 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1100                               v8f32x_info, vextract256_extract,
1101                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1102 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1103                               v4f64x_info, vextract256_extract,
1104                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1105
1106 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1107                               v8i32x_info, vextract256_extract,
1108                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1109 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1110                               v8i32x_info, vextract256_extract,
1111                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1112 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1113                               v8i32x_info, vextract256_extract,
1114                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1115 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1116                               v4i64x_info, vextract256_extract,
1117                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1118 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1119                               v4i64x_info, vextract256_extract,
1120                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1121 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1122                               v4i64x_info, vextract256_extract,
1123                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1124
1125 // vextractps - extract 32 bits from XMM
1126 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1127       (ins VR128X:$src1, u8imm:$src2),
1128       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1129       [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1130       EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1131
1132 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1133       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1134       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1135       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1136                           addr:$dst)]>,
1137       EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1138
1139 //===---------------------------------------------------------------------===//
1140 // AVX-512 BROADCAST
1141 //---
1142 // broadcast with a scalar argument.
1143 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1144                             string Name,
1145                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1146   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1147             (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1148              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1149   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1150                                        (X86VBroadcast SrcInfo.FRC:$src),
1151                                        DestInfo.RC:$src0)),
1152             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1153              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1154              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1155   def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1156                                        (X86VBroadcast SrcInfo.FRC:$src),
1157                                        DestInfo.ImmAllZerosV)),
1158             (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1159              DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1160 }
1161
1162 // Split version to allow mask and broadcast node to be different types. This
1163 // helps support the 32x2 broadcasts.
1164 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1165                                      string Name,
1166                                      SchedWrite SchedRR, SchedWrite SchedRM,
1167                                      X86VectorVTInfo MaskInfo,
1168                                      X86VectorVTInfo DestInfo,
1169                                      X86VectorVTInfo SrcInfo,
1170                                      bit IsConvertibleToThreeAddress,
1171                                      SDPatternOperator UnmaskedOp = X86VBroadcast,
1172                                      SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1173   let hasSideEffects = 0 in
1174   def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1175                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1176                     [(set MaskInfo.RC:$dst,
1177                       (MaskInfo.VT
1178                        (bitconvert
1179                         (DestInfo.VT
1180                          (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1181                     DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1182   def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1183                       (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1184                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1185                        "${dst} {${mask}} {z}, $src}"),
1186                        [(set MaskInfo.RC:$dst,
1187                          (vselect_mask MaskInfo.KRCWM:$mask,
1188                           (MaskInfo.VT
1189                            (bitconvert
1190                             (DestInfo.VT
1191                              (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1192                           MaskInfo.ImmAllZerosV))],
1193                        DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1194   let Constraints = "$src0 = $dst" in
1195   def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1196                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1197                           SrcInfo.RC:$src),
1198                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1199                      "${dst} {${mask}}, $src}"),
1200                      [(set MaskInfo.RC:$dst,
1201                        (vselect_mask MaskInfo.KRCWM:$mask,
1202                         (MaskInfo.VT
1203                          (bitconvert
1204                           (DestInfo.VT
1205                            (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1206                         MaskInfo.RC:$src0))],
1207                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1208
1209   let hasSideEffects = 0, mayLoad = 1 in
1210   def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1211                     (ins SrcInfo.ScalarMemOp:$src),
1212                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1213                     [(set MaskInfo.RC:$dst,
1214                       (MaskInfo.VT
1215                        (bitconvert
1216                         (DestInfo.VT
1217                          (UnmaskedBcastOp addr:$src)))))],
1218                     DestInfo.ExeDomain>, T8PD, EVEX,
1219                     EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1220
1221   def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1222                       (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1223                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1224                        "${dst} {${mask}} {z}, $src}"),
1225                        [(set MaskInfo.RC:$dst,
1226                          (vselect_mask MaskInfo.KRCWM:$mask,
1227                           (MaskInfo.VT
1228                            (bitconvert
1229                             (DestInfo.VT
1230                              (SrcInfo.BroadcastLdFrag addr:$src)))),
1231                           MaskInfo.ImmAllZerosV))],
1232                        DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1233                        EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1234
1235   let Constraints = "$src0 = $dst",
1236       isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1237   def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1238                      (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1239                           SrcInfo.ScalarMemOp:$src),
1240                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1241                      "${dst} {${mask}}, $src}"),
1242                      [(set MaskInfo.RC:$dst,
1243                        (vselect_mask MaskInfo.KRCWM:$mask,
1244                         (MaskInfo.VT
1245                          (bitconvert
1246                           (DestInfo.VT
1247                            (SrcInfo.BroadcastLdFrag addr:$src)))),
1248                         MaskInfo.RC:$src0))],
1249                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1250                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1251 }
1252
1253 // Helper class to force mask and broadcast result to same type.
1254 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1255                                SchedWrite SchedRR, SchedWrite SchedRM,
1256                                X86VectorVTInfo DestInfo,
1257                                X86VectorVTInfo SrcInfo,
1258                                bit IsConvertibleToThreeAddress> :
1259   avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1260                             DestInfo, DestInfo, SrcInfo,
1261                             IsConvertibleToThreeAddress>;
1262
1263 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1264                                                        AVX512VLVectorVTInfo _> {
1265   let Predicates = [HasAVX512] in {
1266     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1267                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1268               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1269                                       _.info128>,
1270               EVEX_V512;
1271   }
1272
1273   let Predicates = [HasVLX] in {
1274     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1275                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1276                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1277                                          _.info128>,
1278                  EVEX_V256;
1279   }
1280 }
1281
1282 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1283                                                        AVX512VLVectorVTInfo _> {
1284   let Predicates = [HasAVX512] in {
1285     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1286                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1287               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1288                                       _.info128>,
1289               EVEX_V512;
1290   }
1291
1292   let Predicates = [HasVLX] in {
1293     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1294                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1295                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1296                                          _.info128>,
1297                  EVEX_V256;
1298     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1299                                      WriteFShuffle256Ld, _.info128, _.info128, 1>,
1300                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1301                                          _.info128>,
1302                  EVEX_V128;
1303   }
1304 }
1305 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1306                                        avx512vl_f32_info>;
1307 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1308                                        avx512vl_f64_info>, VEX_W1X;
1309
1310 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1311                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1312                                     RegisterClass SrcRC> {
1313   // Fold with a mask even if it has multiple uses since it is cheap.
1314   let ExeDomain = _.ExeDomain in
1315   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1316                           (ins SrcRC:$src),
1317                           "vpbroadcast"#_.Suffix, "$src", "$src",
1318                           (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1319                           /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1320                           T8PD, EVEX, Sched<[SchedRR]>;
1321 }
1322
1323 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1324                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1325                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1326   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1327   defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1328                          (outs _.RC:$dst), (ins GR32:$src),
1329                          !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1330                          !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1331                          "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1332                          "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1333
1334   def : Pat <(_.VT (OpNode SrcRC:$src)),
1335              (!cast<Instruction>(Name#rr)
1336               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1337
1338   // Fold with a mask even if it has multiple uses since it is cheap.
1339   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1340              (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1341               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1342
1343   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1344              (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1345               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1346 }
1347
1348 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1349                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1350                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1351   let Predicates = [prd] in
1352     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1353               OpNode, SrcRC, Subreg>, EVEX_V512;
1354   let Predicates = [prd, HasVLX] in {
1355     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1356               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1357     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1358               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1359   }
1360 }
1361
1362 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1363                                        SDPatternOperator OpNode,
1364                                        RegisterClass SrcRC, Predicate prd> {
1365   let Predicates = [prd] in
1366     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1367                                       SrcRC>, EVEX_V512;
1368   let Predicates = [prd, HasVLX] in {
1369     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1370                                          SrcRC>, EVEX_V256;
1371     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1372                                          SrcRC>, EVEX_V128;
1373   }
1374 }
1375
1376 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1377                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1378 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1379                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1380                        HasBWI>;
1381 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1382                                                  X86VBroadcast, GR32, HasAVX512>;
1383 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1384                                                  X86VBroadcast, GR64, HasAVX512>, VEX_W;
1385
1386 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1387                                         AVX512VLVectorVTInfo _, Predicate prd,
1388                                         bit IsConvertibleToThreeAddress> {
1389   let Predicates = [prd] in {
1390     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1391                                    WriteShuffle256Ld, _.info512, _.info128,
1392                                    IsConvertibleToThreeAddress>,
1393                                   EVEX_V512;
1394   }
1395   let Predicates = [prd, HasVLX] in {
1396     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1397                                     WriteShuffle256Ld, _.info256, _.info128,
1398                                     IsConvertibleToThreeAddress>,
1399                                  EVEX_V256;
1400     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1401                                     WriteShuffleXLd, _.info128, _.info128,
1402                                     IsConvertibleToThreeAddress>,
1403                                  EVEX_V128;
1404   }
1405 }
1406
1407 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1408                                            avx512vl_i8_info, HasBWI, 0>;
1409 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1410                                            avx512vl_i16_info, HasBWI, 0>;
1411 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1412                                            avx512vl_i32_info, HasAVX512, 1>;
1413 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1414                                            avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1415
1416 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1417                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1418   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1419                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1420                            (_Dst.VT (X86SubVBroadcast
1421                              (_Src.VT (_Src.LdFrag addr:$src))))>,
1422                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1423                            AVX5128IBase, EVEX;
1424 }
1425
1426 // This should be used for the AVX512DQ broadcast instructions. It disables
1427 // the unmasked patterns so that we only use the DQ instructions when masking
1428 //  is requested.
1429 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1430                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1431   let hasSideEffects = 0, mayLoad = 1 in
1432   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1433                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1434                            (null_frag),
1435                            (_Dst.VT (X86SubVBroadcast
1436                              (_Src.VT (_Src.LdFrag addr:$src))))>,
1437                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1438                            AVX5128IBase, EVEX;
1439 }
1440
1441 //===----------------------------------------------------------------------===//
1442 // AVX-512 BROADCAST SUBVECTORS
1443 //
1444
1445 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1446                        v16i32_info, v4i32x_info>,
1447                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1448 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1449                        v16f32_info, v4f32x_info>,
1450                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1451 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1452                        v8i64_info, v4i64x_info>, VEX_W,
1453                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1454 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1455                        v8f64_info, v4f64x_info>, VEX_W,
1456                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1457
1458 let Predicates = [HasAVX512] in {
1459 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1460           (VBROADCASTF64X4rm addr:$src)>;
1461 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1462           (VBROADCASTI64X4rm addr:$src)>;
1463 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1464           (VBROADCASTI64X4rm addr:$src)>;
1465 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1466           (VBROADCASTI64X4rm addr:$src)>;
1467
1468 // Provide fallback in case the load node that is used in the patterns above
1469 // is used by additional users, which prevents the pattern selection.
1470 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1471           (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1472                            (v4f64 VR256X:$src), 1)>;
1473 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1474           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1475                            (v8f32 VR256X:$src), 1)>;
1476 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1477           (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1478                            (v4i64 VR256X:$src), 1)>;
1479 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1480           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1481                            (v8i32 VR256X:$src), 1)>;
1482 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1483           (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1484                            (v16i16 VR256X:$src), 1)>;
1485 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1486           (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1487                            (v32i8 VR256X:$src), 1)>;
1488
1489 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1490           (VBROADCASTF32X4rm addr:$src)>;
1491 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1492           (VBROADCASTI32X4rm addr:$src)>;
1493 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1494           (VBROADCASTI32X4rm addr:$src)>;
1495 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1496           (VBROADCASTI32X4rm addr:$src)>;
1497
1498 // Patterns for selects of bitcasted operations.
1499 def : Pat<(vselect_mask VK16WM:$mask,
1500                         (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1501                         (v16f32 immAllZerosV)),
1502           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1503 def : Pat<(vselect_mask VK16WM:$mask,
1504                         (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1505                         VR512:$src0),
1506           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1507 def : Pat<(vselect_mask VK16WM:$mask,
1508                         (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1509                         (v16i32 immAllZerosV)),
1510           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1511 def : Pat<(vselect_mask VK16WM:$mask,
1512                         (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1513                         VR512:$src0),
1514           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1515
1516 def : Pat<(vselect_mask VK8WM:$mask,
1517                         (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1518                         (v8f64 immAllZerosV)),
1519           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1520 def : Pat<(vselect_mask VK8WM:$mask,
1521                         (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1522                         VR512:$src0),
1523           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1524 def : Pat<(vselect_mask VK8WM:$mask,
1525                         (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1526                         (v8i64 immAllZerosV)),
1527           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1528 def : Pat<(vselect_mask VK8WM:$mask,
1529                         (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1530                         VR512:$src0),
1531           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1532 }
1533
1534 let Predicates = [HasVLX] in {
1535 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1536                            v8i32x_info, v4i32x_info>,
1537                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1538 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1539                            v8f32x_info, v4f32x_info>,
1540                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1541
1542 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1543           (VBROADCASTF32X4Z256rm addr:$src)>;
1544 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1545           (VBROADCASTI32X4Z256rm addr:$src)>;
1546 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1547           (VBROADCASTI32X4Z256rm addr:$src)>;
1548 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1549           (VBROADCASTI32X4Z256rm addr:$src)>;
1550
1551 // Patterns for selects of bitcasted operations.
1552 def : Pat<(vselect_mask VK8WM:$mask,
1553                         (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1554                         (v8f32 immAllZerosV)),
1555           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1556 def : Pat<(vselect_mask VK8WM:$mask,
1557                         (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1558                         VR256X:$src0),
1559           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1560 def : Pat<(vselect_mask VK8WM:$mask,
1561                         (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1562                         (v8i32 immAllZerosV)),
1563           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1564 def : Pat<(vselect_mask VK8WM:$mask,
1565                         (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1566                         VR256X:$src0),
1567           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1568
1569
1570 // Provide fallback in case the load node that is used in the patterns above
1571 // is used by additional users, which prevents the pattern selection.
1572 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1573           (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1574                               (v2f64 VR128X:$src), 1)>;
1575 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1576           (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1577                               (v4f32 VR128X:$src), 1)>;
1578 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1579           (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1580                               (v2i64 VR128X:$src), 1)>;
1581 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1582           (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1583                               (v4i32 VR128X:$src), 1)>;
1584 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1585           (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1586                               (v8i16 VR128X:$src), 1)>;
1587 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1588           (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1589                               (v16i8 VR128X:$src), 1)>;
1590 }
1591
1592 let Predicates = [HasVLX, HasDQI] in {
1593 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1594                            v4i64x_info, v2i64x_info>, VEX_W1X,
1595                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1596 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1597                            v4f64x_info, v2f64x_info>, VEX_W1X,
1598                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1599
1600 // Patterns for selects of bitcasted operations.
1601 def : Pat<(vselect_mask VK4WM:$mask,
1602                         (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1603                         (v4f64 immAllZerosV)),
1604           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1605 def : Pat<(vselect_mask VK4WM:$mask,
1606                         (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1607                         VR256X:$src0),
1608           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1609 def : Pat<(vselect_mask VK4WM:$mask,
1610                         (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1611                         (v4i64 immAllZerosV)),
1612           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1613 def : Pat<(vselect_mask VK4WM:$mask,
1614                         (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1615                         VR256X:$src0),
1616           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1617 }
1618
1619 let Predicates = [HasDQI] in {
1620 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1621                        v8i64_info, v2i64x_info>, VEX_W,
1622                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1623 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1624                        v16i32_info, v8i32x_info>,
1625                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1626 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1627                        v8f64_info, v2f64x_info>, VEX_W,
1628                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1629 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1630                        v16f32_info, v8f32x_info>,
1631                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1632
1633 // Patterns for selects of bitcasted operations.
1634 def : Pat<(vselect_mask VK16WM:$mask,
1635                         (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1636                         (v16f32 immAllZerosV)),
1637           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1638 def : Pat<(vselect_mask VK16WM:$mask,
1639                         (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1640                         VR512:$src0),
1641           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1642 def : Pat<(vselect_mask VK16WM:$mask,
1643                         (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1644                         (v16i32 immAllZerosV)),
1645           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1646 def : Pat<(vselect_mask VK16WM:$mask,
1647                         (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1648                         VR512:$src0),
1649           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1650
1651 def : Pat<(vselect_mask VK8WM:$mask,
1652                         (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1653                         (v8f64 immAllZerosV)),
1654           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1655 def : Pat<(vselect_mask VK8WM:$mask,
1656                         (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1657                         VR512:$src0),
1658           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1659 def : Pat<(vselect_mask VK8WM:$mask,
1660                         (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1661                         (v8i64 immAllZerosV)),
1662           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1663 def : Pat<(vselect_mask VK8WM:$mask,
1664                         (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1665                         VR512:$src0),
1666           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1667 }
1668
1669 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1670                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1671   let Predicates = [HasDQI] in
1672     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1673                                           WriteShuffle256Ld, _Dst.info512,
1674                                           _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1675                                           EVEX_V512;
1676   let Predicates = [HasDQI, HasVLX] in
1677     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1678                                           WriteShuffle256Ld, _Dst.info256,
1679                                           _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1680                                           EVEX_V256;
1681 }
1682
1683 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1684                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1685   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1686
1687   let Predicates = [HasDQI, HasVLX] in
1688     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1689                                           WriteShuffleXLd, _Dst.info128,
1690                                           _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1691                                           EVEX_V128;
1692 }
1693
1694 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1695                                           avx512vl_i32_info, avx512vl_i64_info>;
1696 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1697                                           avx512vl_f32_info, avx512vl_f64_info>;
1698
1699 //===----------------------------------------------------------------------===//
1700 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1701 //---
1702 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1703                                   X86VectorVTInfo _, RegisterClass KRC> {
1704   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1705                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1706                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1707                   EVEX, Sched<[WriteShuffle]>;
1708 }
1709
1710 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1711                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1712   let Predicates = [HasCDI] in
1713     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1714   let Predicates = [HasCDI, HasVLX] in {
1715     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1716     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1717   }
1718 }
1719
1720 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1721                                                avx512vl_i32_info, VK16>;
1722 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1723                                                avx512vl_i64_info, VK8>, VEX_W;
1724
1725 //===----------------------------------------------------------------------===//
1726 // -- VPERMI2 - 3 source operands form --
1727 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1728                          X86FoldableSchedWrite sched,
1729                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1730 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1731     hasSideEffects = 0 in {
1732   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1733           (ins _.RC:$src2, _.RC:$src3),
1734           OpcodeStr, "$src3, $src2", "$src2, $src3",
1735           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1736           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1737
1738   let mayLoad = 1 in
1739   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1740             (ins _.RC:$src2, _.MemOp:$src3),
1741             OpcodeStr, "$src3, $src2", "$src2, $src3",
1742             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1743                    (_.VT (_.LdFrag addr:$src3)))), 1>,
1744             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1745   }
1746 }
1747
1748 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1749                             X86FoldableSchedWrite sched,
1750                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1751   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1752       hasSideEffects = 0, mayLoad = 1 in
1753   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1754               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1755               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1756               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1757               (_.VT (X86VPermt2 _.RC:$src2,
1758                IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1759               AVX5128IBase, EVEX_4V, EVEX_B,
1760               Sched<[sched.Folded, sched.ReadAfterFold]>;
1761 }
1762
1763 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1764                                X86FoldableSchedWrite sched,
1765                                AVX512VLVectorVTInfo VTInfo,
1766                                AVX512VLVectorVTInfo ShuffleMask> {
1767   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1768                            ShuffleMask.info512>,
1769             avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1770                              ShuffleMask.info512>, EVEX_V512;
1771   let Predicates = [HasVLX] in {
1772   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1773                                ShuffleMask.info128>,
1774                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1775                                   ShuffleMask.info128>, EVEX_V128;
1776   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1777                                ShuffleMask.info256>,
1778                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1779                                   ShuffleMask.info256>, EVEX_V256;
1780   }
1781 }
1782
1783 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1784                                   X86FoldableSchedWrite sched,
1785                                   AVX512VLVectorVTInfo VTInfo,
1786                                   AVX512VLVectorVTInfo Idx,
1787                                   Predicate Prd> {
1788   let Predicates = [Prd] in
1789   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1790                            Idx.info512>, EVEX_V512;
1791   let Predicates = [Prd, HasVLX] in {
1792   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1793                                Idx.info128>, EVEX_V128;
1794   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1795                                Idx.info256>,  EVEX_V256;
1796   }
1797 }
1798
1799 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1800                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1801 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1802                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1803 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1804                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1805                   VEX_W, EVEX_CD8<16, CD8VF>;
1806 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1807                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1808                   EVEX_CD8<8, CD8VF>;
1809 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1810                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1811 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1812                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1813
1814 // Extra patterns to deal with extra bitcasts due to passthru and index being
1815 // different types on the fp versions.
1816 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1817                                   X86VectorVTInfo IdxVT,
1818                                   X86VectorVTInfo CastVT> {
1819   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1820                                 (X86VPermt2 (_.VT _.RC:$src2),
1821                                             (IdxVT.VT (bitconvert
1822                                                        (CastVT.VT _.RC:$src1))),
1823                                             _.RC:$src3),
1824                                 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1825             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1826                                                 _.RC:$src2, _.RC:$src3)>;
1827   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1828                                 (X86VPermt2 _.RC:$src2,
1829                                             (IdxVT.VT (bitconvert
1830                                                        (CastVT.VT _.RC:$src1))),
1831                                             (_.LdFrag addr:$src3)),
1832                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1833             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1834                                                 _.RC:$src2, addr:$src3)>;
1835   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1836                                  (X86VPermt2 _.RC:$src2,
1837                                              (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1838                                              (_.BroadcastLdFrag addr:$src3)),
1839                                  (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1840             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1841                                                  _.RC:$src2, addr:$src3)>;
1842 }
1843
1844 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1845 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1846 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1847 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1848
1849 // VPERMT2
1850 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1851                          X86FoldableSchedWrite sched,
1852                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1853 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1854   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1855           (ins IdxVT.RC:$src2, _.RC:$src3),
1856           OpcodeStr, "$src3, $src2", "$src2, $src3",
1857           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1858           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1859
1860   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1861             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1862             OpcodeStr, "$src3, $src2", "$src2, $src3",
1863             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1864                    (_.LdFrag addr:$src3))), 1>,
1865             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1866   }
1867 }
1868 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1869                             X86FoldableSchedWrite sched,
1870                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1871   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1872   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1873               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1874               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1875               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1876               (_.VT (X86VPermt2 _.RC:$src1,
1877                IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1878               AVX5128IBase, EVEX_4V, EVEX_B,
1879               Sched<[sched.Folded, sched.ReadAfterFold]>;
1880 }
1881
1882 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1883                                X86FoldableSchedWrite sched,
1884                                AVX512VLVectorVTInfo VTInfo,
1885                                AVX512VLVectorVTInfo ShuffleMask> {
1886   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1887                               ShuffleMask.info512>,
1888             avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1889                               ShuffleMask.info512>, EVEX_V512;
1890   let Predicates = [HasVLX] in {
1891   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1892                               ShuffleMask.info128>,
1893                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1894                               ShuffleMask.info128>, EVEX_V128;
1895   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1896                               ShuffleMask.info256>,
1897                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1898                               ShuffleMask.info256>, EVEX_V256;
1899   }
1900 }
1901
1902 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1903                                   X86FoldableSchedWrite sched,
1904                                   AVX512VLVectorVTInfo VTInfo,
1905                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
1906   let Predicates = [Prd] in
1907   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1908                            Idx.info512>, EVEX_V512;
1909   let Predicates = [Prd, HasVLX] in {
1910   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1911                                Idx.info128>, EVEX_V128;
1912   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1913                                Idx.info256>, EVEX_V256;
1914   }
1915 }
1916
1917 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1918                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1919 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1920                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1921 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1922                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1923                   VEX_W, EVEX_CD8<16, CD8VF>;
1924 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1925                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1926                   EVEX_CD8<8, CD8VF>;
1927 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1928                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1929 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1930                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1931
1932 //===----------------------------------------------------------------------===//
1933 // AVX-512 - BLEND using mask
1934 //
1935
1936 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1937                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1938   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1939   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1940              (ins _.RC:$src1, _.RC:$src2),
1941              !strconcat(OpcodeStr,
1942              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1943              EVEX_4V, Sched<[sched]>;
1944   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1945              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1946              !strconcat(OpcodeStr,
1947              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1948              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1949   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1950              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1951              !strconcat(OpcodeStr,
1952              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1953              []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1954   let mayLoad = 1 in {
1955   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1956              (ins _.RC:$src1, _.MemOp:$src2),
1957              !strconcat(OpcodeStr,
1958              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1959              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1960              Sched<[sched.Folded, sched.ReadAfterFold]>;
1961   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1962              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1963              !strconcat(OpcodeStr,
1964              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1965              []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1966              Sched<[sched.Folded, sched.ReadAfterFold]>;
1967   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1968              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1969              !strconcat(OpcodeStr,
1970              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1971              []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1972              Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1973   }
1974   }
1975 }
1976 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1977                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1978   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1979   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1980       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1981        !strconcat(OpcodeStr,
1982             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1983             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1984       EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1985       Sched<[sched.Folded, sched.ReadAfterFold]>;
1986
1987   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1988       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1989        !strconcat(OpcodeStr,
1990             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1991             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1992       EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1993       Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1994
1995   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1996       (ins _.RC:$src1, _.ScalarMemOp:$src2),
1997        !strconcat(OpcodeStr,
1998             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1999             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2000       EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2001       Sched<[sched.Folded, sched.ReadAfterFold]>;
2002   }
2003 }
2004
2005 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2006                         AVX512VLVectorVTInfo VTInfo> {
2007   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2008            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2009                                  EVEX_V512;
2010
2011   let Predicates = [HasVLX] in {
2012     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2013                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2014                                       EVEX_V256;
2015     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2016                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2017                                       EVEX_V128;
2018   }
2019 }
2020
2021 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2022                         AVX512VLVectorVTInfo VTInfo> {
2023   let Predicates = [HasBWI] in
2024     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025                                EVEX_V512;
2026
2027   let Predicates = [HasBWI, HasVLX] in {
2028     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2029                                   EVEX_V256;
2030     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2031                                   EVEX_V128;
2032   }
2033 }
2034
2035 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2036                               avx512vl_f32_info>;
2037 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2038                               avx512vl_f64_info>, VEX_W;
2039 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2040                               avx512vl_i32_info>;
2041 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2042                               avx512vl_i64_info>, VEX_W;
2043 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2044                               avx512vl_i8_info>;
2045 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2046                               avx512vl_i16_info>, VEX_W;
2047
2048 //===----------------------------------------------------------------------===//
2049 // Compare Instructions
2050 //===----------------------------------------------------------------------===//
2051
2052 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2053
2054 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2055                              PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2056                              X86FoldableSchedWrite sched> {
2057   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2058                       (outs _.KRC:$dst),
2059                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2060                       "vcmp"#_.Suffix,
2061                       "$cc, $src2, $src1", "$src1, $src2, $cc",
2062                       (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2063                       (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2064                                  timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2065   let mayLoad = 1 in
2066   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2067                     (outs _.KRC:$dst),
2068                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2069                     "vcmp"#_.Suffix,
2070                     "$cc, $src2, $src1", "$src1, $src2, $cc",
2071                     (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2072                         timm:$cc),
2073                     (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2074                         timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2075                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2076
2077   let Uses = [MXCSR] in
2078   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2079                      (outs _.KRC:$dst),
2080                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2081                      "vcmp"#_.Suffix,
2082                      "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2083                      (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2084                                 timm:$cc),
2085                      (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2086                                    timm:$cc)>,
2087                      EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2088
2089   let isCodeGenOnly = 1 in {
2090     let isCommutable = 1 in
2091     def rr : AVX512Ii8<0xC2, MRMSrcReg,
2092                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2093                 !strconcat("vcmp", _.Suffix,
2094                            "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2095                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2096                                           _.FRC:$src2,
2097                                           timm:$cc))]>,
2098                 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2099     def rm : AVX512Ii8<0xC2, MRMSrcMem,
2100               (outs _.KRC:$dst),
2101               (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2102               !strconcat("vcmp", _.Suffix,
2103                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2104               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2105                                         (_.ScalarLdFrag addr:$src2),
2106                                         timm:$cc))]>,
2107               EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2108               Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2109   }
2110 }
2111
2112 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2113                           (X86cmpms node:$src1, node:$src2, node:$cc), [{
2114   return N->hasOneUse();
2115 }]>;
2116 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2117                           (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2118   return N->hasOneUse();
2119 }]>;
2120
2121 let Predicates = [HasAVX512] in {
2122   let ExeDomain = SSEPackedSingle in
2123   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2124                                    X86cmpms_su, X86cmpmsSAE_su,
2125                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2126   let ExeDomain = SSEPackedDouble in
2127   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2128                                    X86cmpms_su, X86cmpmsSAE_su,
2129                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2130 }
2131
2132 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2133                               X86FoldableSchedWrite sched,
2134                               X86VectorVTInfo _, bit IsCommutable> {
2135   let isCommutable = IsCommutable, hasSideEffects = 0 in
2136   def rr : AVX512BI<opc, MRMSrcReg,
2137              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2138              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2139              []>, EVEX_4V, Sched<[sched]>;
2140   let mayLoad = 1, hasSideEffects = 0 in
2141   def rm : AVX512BI<opc, MRMSrcMem,
2142              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2143              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2144              []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2145   let isCommutable = IsCommutable, hasSideEffects = 0 in
2146   def rrk : AVX512BI<opc, MRMSrcReg,
2147               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2148               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2149                           "$dst {${mask}}, $src1, $src2}"),
2150               []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2151   let mayLoad = 1, hasSideEffects = 0 in
2152   def rmk : AVX512BI<opc, MRMSrcMem,
2153               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2154               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2155                           "$dst {${mask}}, $src1, $src2}"),
2156               []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2157 }
2158
2159 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2160                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
2161                                   bit IsCommutable> :
2162            avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2163   let mayLoad = 1, hasSideEffects = 0 in {
2164   def rmb : AVX512BI<opc, MRMSrcMem,
2165               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2166               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2167                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2168               []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2169   def rmbk : AVX512BI<opc, MRMSrcMem,
2170                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2171                                        _.ScalarMemOp:$src2),
2172                !strconcat(OpcodeStr,
2173                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2174                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2175                []>, EVEX_4V, EVEX_K, EVEX_B,
2176                Sched<[sched.Folded, sched.ReadAfterFold]>;
2177   }
2178 }
2179
2180 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2181                                  X86SchedWriteWidths sched,
2182                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
2183                                  bit IsCommutable = 0> {
2184   let Predicates = [prd] in
2185   defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2186                               VTInfo.info512, IsCommutable>, EVEX_V512;
2187
2188   let Predicates = [prd, HasVLX] in {
2189     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2190                                    VTInfo.info256, IsCommutable>, EVEX_V256;
2191     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2192                                    VTInfo.info128, IsCommutable>, EVEX_V128;
2193   }
2194 }
2195
2196 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2197                                      X86SchedWriteWidths sched,
2198                                      AVX512VLVectorVTInfo VTInfo,
2199                                      Predicate prd, bit IsCommutable = 0> {
2200   let Predicates = [prd] in
2201   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2202                                   VTInfo.info512, IsCommutable>, EVEX_V512;
2203
2204   let Predicates = [prd, HasVLX] in {
2205     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2206                                        VTInfo.info256, IsCommutable>, EVEX_V256;
2207     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2208                                        VTInfo.info128, IsCommutable>, EVEX_V128;
2209   }
2210 }
2211
2212 // This fragment treats X86cmpm as commutable to help match loads in both
2213 // operands for PCMPEQ.
2214 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2215 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2216                          (setcc node:$src1, node:$src2, SETGT)>;
2217
2218 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2219 // increase the pattern complexity the way an immediate would.
2220 let AddedComplexity = 2 in {
2221 // FIXME: Is there a better scheduler class for VPCMP?
2222 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2223                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2224                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2225
2226 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2227                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2228                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2229
2230 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2231                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2232                 EVEX_CD8<32, CD8VF>;
2233
2234 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2235                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2236                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2237
2238 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2239                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2240                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2241
2242 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2243                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2244                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2245
2246 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2247                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2248                 EVEX_CD8<32, CD8VF>;
2249
2250 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2251                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2252                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2253 }
2254
2255 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2256                           PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2257                           X86FoldableSchedWrite sched,
2258                           X86VectorVTInfo _, string Name> {
2259   let isCommutable = 1 in
2260   def rri : AVX512AIi8<opc, MRMSrcReg,
2261              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2262              !strconcat("vpcmp", Suffix,
2263                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2264              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2265                                                 (_.VT _.RC:$src2),
2266                                                 cond)))]>,
2267              EVEX_4V, Sched<[sched]>;
2268   def rmi : AVX512AIi8<opc, MRMSrcMem,
2269              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2270              !strconcat("vpcmp", Suffix,
2271                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2272              [(set _.KRC:$dst, (_.KVT
2273                                 (Frag:$cc
2274                                  (_.VT _.RC:$src1),
2275                                  (_.VT (_.LdFrag addr:$src2)),
2276                                  cond)))]>,
2277              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2278   let isCommutable = 1 in
2279   def rrik : AVX512AIi8<opc, MRMSrcReg,
2280               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2281                                       u8imm:$cc),
2282               !strconcat("vpcmp", Suffix,
2283                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2284                          "$dst {${mask}}, $src1, $src2, $cc}"),
2285               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2286                                      (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2287                                                          (_.VT _.RC:$src2),
2288                                                          cond))))]>,
2289               EVEX_4V, EVEX_K, Sched<[sched]>;
2290   def rmik : AVX512AIi8<opc, MRMSrcMem,
2291               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2292                                     u8imm:$cc),
2293               !strconcat("vpcmp", Suffix,
2294                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2295                          "$dst {${mask}}, $src1, $src2, $cc}"),
2296               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2297                                      (_.KVT
2298                                       (Frag_su:$cc
2299                                        (_.VT _.RC:$src1),
2300                                        (_.VT (_.LdFrag addr:$src2)),
2301                                        cond))))]>,
2302               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2303
2304   def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2305                                  (_.VT _.RC:$src1), cond)),
2306             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2307              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2308
2309   def : Pat<(and _.KRCWM:$mask,
2310                  (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2311                                       (_.VT _.RC:$src1), cond))),
2312             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2313              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2314              (CommFrag.OperandTransform $cc))>;
2315 }
2316
2317 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2318                               PatFrag Frag_su, PatFrag CommFrag,
2319                               PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2320                               X86VectorVTInfo _, string Name> :
2321            avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2322                           sched, _, Name> {
2323   def rmib : AVX512AIi8<opc, MRMSrcMem,
2324              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2325                                      u8imm:$cc),
2326              !strconcat("vpcmp", Suffix,
2327                         "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2328                         "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2329              [(set _.KRC:$dst, (_.KVT (Frag:$cc
2330                                        (_.VT _.RC:$src1),
2331                                        (_.BroadcastLdFrag addr:$src2),
2332                                        cond)))]>,
2333              EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2334   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2335               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2336                                        _.ScalarMemOp:$src2, u8imm:$cc),
2337               !strconcat("vpcmp", Suffix,
2338                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2339                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2340               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2341                                      (_.KVT (Frag_su:$cc
2342                                              (_.VT _.RC:$src1),
2343                                              (_.BroadcastLdFrag addr:$src2),
2344                                              cond))))]>,
2345               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2346
2347   def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2348                     (_.VT _.RC:$src1), cond)),
2349             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2350              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2351
2352   def : Pat<(and _.KRCWM:$mask,
2353                  (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2354                                       (_.VT _.RC:$src1), cond))),
2355             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2356              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2357              (CommFrag_su.OperandTransform $cc))>;
2358 }
2359
2360 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2361                              PatFrag Frag_su, PatFrag CommFrag,
2362                              PatFrag CommFrag_su, X86SchedWriteWidths sched,
2363                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2364   let Predicates = [prd] in
2365   defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2366                           sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2367
2368   let Predicates = [prd, HasVLX] in {
2369     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2370                                sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2371     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2372                                sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2373   }
2374 }
2375
2376 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2377                                  PatFrag Frag_su, PatFrag CommFrag,
2378                                  PatFrag CommFrag_su, X86SchedWriteWidths sched,
2379                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2380   let Predicates = [prd] in
2381   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2382                               sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2383
2384   let Predicates = [prd, HasVLX] in {
2385     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2386                                    sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2387     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2388                                    sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2389   }
2390 }
2391
2392 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2393   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2394   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2395   return getI8Imm(SSECC, SDLoc(N));
2396 }]>;
2397
2398 // Swapped operand version of the above.
2399 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2400   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2401   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2402   SSECC = X86::getSwappedVPCMPImm(SSECC);
2403   return getI8Imm(SSECC, SDLoc(N));
2404 }]>;
2405
2406 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2407                        (setcc node:$src1, node:$src2, node:$cc), [{
2408   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2409   return !ISD::isUnsignedIntSetCC(CC);
2410 }], X86pcmpm_imm>;
2411
2412 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2413                           (setcc node:$src1, node:$src2, node:$cc), [{
2414   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2415   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2416 }], X86pcmpm_imm>;
2417
2418 // Same as above, but commutes immediate. Use for load folding.
2419 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2420                                (setcc node:$src1, node:$src2, node:$cc), [{
2421   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2422   return !ISD::isUnsignedIntSetCC(CC);
2423 }], X86pcmpm_imm_commute>;
2424
2425 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2426                                   (setcc node:$src1, node:$src2, node:$cc), [{
2427   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2428   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2429 }], X86pcmpm_imm_commute>;
2430
2431 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2432                         (setcc node:$src1, node:$src2, node:$cc), [{
2433   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2434   return ISD::isUnsignedIntSetCC(CC);
2435 }], X86pcmpm_imm>;
2436
2437 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2438                            (setcc node:$src1, node:$src2, node:$cc), [{
2439   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2440   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2441 }], X86pcmpm_imm>;
2442
2443 // Same as above, but commutes immediate. Use for load folding.
2444 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2445                                 (setcc node:$src1, node:$src2, node:$cc), [{
2446   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2447   return ISD::isUnsignedIntSetCC(CC);
2448 }], X86pcmpm_imm_commute>;
2449
2450 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2451                                    (setcc node:$src1, node:$src2, node:$cc), [{
2452   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2453   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2454 }], X86pcmpm_imm_commute>;
2455
2456 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2457 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2458                                 X86pcmpm_commute, X86pcmpm_commute_su,
2459                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2460                                 EVEX_CD8<8, CD8VF>;
2461 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2462                                  X86pcmpum_commute, X86pcmpum_commute_su,
2463                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2464                                  EVEX_CD8<8, CD8VF>;
2465
2466 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2467                                 X86pcmpm_commute, X86pcmpm_commute_su,
2468                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2469                                 VEX_W, EVEX_CD8<16, CD8VF>;
2470 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2471                                  X86pcmpum_commute, X86pcmpum_commute_su,
2472                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2473                                  VEX_W, EVEX_CD8<16, CD8VF>;
2474
2475 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2476                                     X86pcmpm_commute, X86pcmpm_commute_su,
2477                                     SchedWriteVecALU, avx512vl_i32_info,
2478                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2479 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2480                                      X86pcmpum_commute, X86pcmpum_commute_su,
2481                                      SchedWriteVecALU, avx512vl_i32_info,
2482                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
2483
2484 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2485                                     X86pcmpm_commute, X86pcmpm_commute_su,
2486                                     SchedWriteVecALU, avx512vl_i64_info,
2487                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2488 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2489                                      X86pcmpum_commute, X86pcmpum_commute_su,
2490                                      SchedWriteVecALU, avx512vl_i64_info,
2491                                      HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2492
2493 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2494                          (X86cmpm node:$src1, node:$src2, node:$cc), [{
2495   return N->hasOneUse();
2496 }]>;
2497 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2498                             (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2499   return N->hasOneUse();
2500 }]>;
2501
2502 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2503   uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2504   return getI8Imm(Imm, SDLoc(N));
2505 }]>;
2506
2507 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2508                               string Name> {
2509 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2510   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2511                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2512                    "vcmp"#_.Suffix,
2513                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2514                    (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2515                    (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2516                    1>, Sched<[sched]>;
2517
2518   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2519                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2520                 "vcmp"#_.Suffix,
2521                 "$cc, $src2, $src1", "$src1, $src2, $cc",
2522                 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2523                              timm:$cc),
2524                 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2525                             timm:$cc)>,
2526                 Sched<[sched.Folded, sched.ReadAfterFold]>;
2527
2528   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2529                 (outs _.KRC:$dst),
2530                 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2531                 "vcmp"#_.Suffix,
2532                 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2533                 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2534                 (X86any_cmpm (_.VT _.RC:$src1),
2535                              (_.VT (_.BroadcastLdFrag addr:$src2)),
2536                              timm:$cc),
2537                 (X86cmpm_su (_.VT _.RC:$src1),
2538                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2539                             timm:$cc)>,
2540                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2541   }
2542
2543   // Patterns for selecting with loads in other operand.
2544   def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2545                          timm:$cc),
2546             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2547                                                       (X86cmpm_imm_commute timm:$cc))>;
2548
2549   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2550                                             (_.VT _.RC:$src1),
2551                                             timm:$cc)),
2552             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2553                                                        _.RC:$src1, addr:$src2,
2554                                                        (X86cmpm_imm_commute timm:$cc))>;
2555
2556   def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2557                          (_.VT _.RC:$src1), timm:$cc),
2558             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2559                                                        (X86cmpm_imm_commute timm:$cc))>;
2560
2561   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2562                                             (_.VT _.RC:$src1),
2563                                             timm:$cc)),
2564             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2565                                                         _.RC:$src1, addr:$src2,
2566                                                         (X86cmpm_imm_commute timm:$cc))>;
2567 }
2568
2569 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2570   // comparison code form (VCMP[EQ/LT/LE/...]
2571   let Uses = [MXCSR] in
2572   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2573                      (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2574                      "vcmp"#_.Suffix,
2575                      "$cc, {sae}, $src2, $src1",
2576                      "$src1, $src2, {sae}, $cc",
2577                      (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2578                      (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2579                                     timm:$cc)>,
2580                      EVEX_B, Sched<[sched]>;
2581 }
2582
2583 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2584   let Predicates = [HasAVX512] in {
2585     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2586                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2587
2588   }
2589   let Predicates = [HasAVX512,HasVLX] in {
2590    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2591    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2592   }
2593 }
2594
2595 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2596                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2597 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2598                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2599
2600 // Patterns to select fp compares with load as first operand.
2601 let Predicates = [HasAVX512] in {
2602   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2603                             timm:$cc)),
2604             (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2605
2606   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2607                             timm:$cc)),
2608             (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2609 }
2610
2611 // ----------------------------------------------------------------
2612 // FPClass
2613
2614 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2615                               (X86Vfpclasss node:$src1, node:$src2), [{
2616   return N->hasOneUse();
2617 }]>;
2618
2619 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2620                              (X86Vfpclass node:$src1, node:$src2), [{
2621   return N->hasOneUse();
2622 }]>;
2623
2624 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2625 //                                    op(mem_scalar,imm)
2626 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2627                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2628                                  Predicate prd> {
2629   let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2630       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2631                       (ins _.RC:$src1, i32u8imm:$src2),
2632                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2633                       [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2634                               (i32 timm:$src2)))]>,
2635                       Sched<[sched]>;
2636       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2637                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2638                       OpcodeStr#_.Suffix#
2639                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2640                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2641                                       (X86Vfpclasss_su (_.VT _.RC:$src1),
2642                                       (i32 timm:$src2))))]>,
2643                       EVEX_K, Sched<[sched]>;
2644     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2645                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2646                     OpcodeStr#_.Suffix#
2647                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2648                     [(set _.KRC:$dst,
2649                           (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2650                                         (i32 timm:$src2)))]>,
2651                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2652     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2653                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2654                     OpcodeStr#_.Suffix#
2655                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2656                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
2657                         (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2658                             (i32 timm:$src2))))]>,
2659                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2660   }
2661 }
2662
2663 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2664 //                                  fpclass(reg_vec, mem_vec, imm)
2665 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2666 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2667                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2668                                  string mem>{
2669   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2670   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2671                       (ins _.RC:$src1, i32u8imm:$src2),
2672                       OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2673                       [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2674                                        (i32 timm:$src2)))]>,
2675                       Sched<[sched]>;
2676   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2677                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2678                       OpcodeStr#_.Suffix#
2679                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2680                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2681                                        (X86Vfpclass_su (_.VT _.RC:$src1),
2682                                        (i32 timm:$src2))))]>,
2683                       EVEX_K, Sched<[sched]>;
2684   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2685                     (ins _.MemOp:$src1, i32u8imm:$src2),
2686                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2687                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2688                     [(set _.KRC:$dst,(X86Vfpclass
2689                                      (_.VT (_.LdFrag addr:$src1)),
2690                                      (i32 timm:$src2)))]>,
2691                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2692   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2693                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2694                     OpcodeStr#_.Suffix#"{"#mem#"}"#
2695                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2696                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2697                                   (_.VT (_.LdFrag addr:$src1)),
2698                                   (i32 timm:$src2))))]>,
2699                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2700   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2701                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2702                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2703                                       _.BroadcastStr#", $dst|$dst, ${src1}"
2704                                                   #_.BroadcastStr#", $src2}",
2705                     [(set _.KRC:$dst,(X86Vfpclass
2706                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2707                                      (i32 timm:$src2)))]>,
2708                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2709   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2710                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2711                     OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2712                           _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2713                                                    _.BroadcastStr#", $src2}",
2714                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2715                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2716                                      (i32 timm:$src2))))]>,
2717                     EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2718   }
2719
2720   // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2721   // the memory form.
2722   def : InstAlias<OpcodeStr#_.Suffix#mem#
2723                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2724                   (!cast<Instruction>(NAME#"rr")
2725                    _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2726   def : InstAlias<OpcodeStr#_.Suffix#mem#
2727                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2728                   (!cast<Instruction>(NAME#"rrk")
2729                    _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2730   def : InstAlias<OpcodeStr#_.Suffix#mem#
2731                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2732                   _.BroadcastStr#", $src2}",
2733                   (!cast<Instruction>(NAME#"rmb")
2734                    _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2735   def : InstAlias<OpcodeStr#_.Suffix#mem#
2736                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2737                   "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2738                   (!cast<Instruction>(NAME#"rmbk")
2739                    _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2740 }
2741
2742 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2743                                      bits<8> opc, X86SchedWriteWidths sched,
2744                                      Predicate prd>{
2745   let Predicates = [prd] in {
2746     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2747                                       _.info512, "z">, EVEX_V512;
2748   }
2749   let Predicates = [prd, HasVLX] in {
2750     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2751                                       _.info128, "x">, EVEX_V128;
2752     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2753                                       _.info256, "y">, EVEX_V256;
2754   }
2755 }
2756
2757 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2758                                  bits<8> opcScalar, X86SchedWriteWidths sched,
2759                                  Predicate prd> {
2760   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2761                                       sched, prd>,
2762                                       EVEX_CD8<32, CD8VF>;
2763   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2764                                       sched, prd>,
2765                                       EVEX_CD8<64, CD8VF> , VEX_W;
2766   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2767                                    sched.Scl, f32x_info, prd>, VEX_LIG,
2768                                    EVEX_CD8<32, CD8VT1>;
2769   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2770                                    sched.Scl, f64x_info, prd>, VEX_LIG,
2771                                    EVEX_CD8<64, CD8VT1>, VEX_W;
2772 }
2773
2774 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2775                                       HasDQI>, AVX512AIi8Base, EVEX;
2776
2777 //-----------------------------------------------------------------
2778 // Mask register copy, including
2779 // - copy between mask registers
2780 // - load/store mask registers
2781 // - copy from GPR to mask register and vice versa
2782 //
2783 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2784                          string OpcodeStr, RegisterClass KRC,
2785                          ValueType vvt, X86MemOperand x86memop> {
2786   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2787   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2788              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2789              Sched<[WriteMove]>;
2790   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2791              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2792              [(set KRC:$dst, (vvt (load addr:$src)))]>,
2793              Sched<[WriteLoad]>;
2794   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2795              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2796              [(store KRC:$src, addr:$dst)]>,
2797              Sched<[WriteStore]>;
2798 }
2799
2800 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2801                              string OpcodeStr,
2802                              RegisterClass KRC, RegisterClass GRC> {
2803   let hasSideEffects = 0 in {
2804     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2805                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2806                Sched<[WriteMove]>;
2807     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2808                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2809                Sched<[WriteMove]>;
2810   }
2811 }
2812
2813 let Predicates = [HasDQI] in
2814   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2815                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2816                VEX, PD;
2817
2818 let Predicates = [HasAVX512] in
2819   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2820                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2821                VEX, PS;
2822
2823 let Predicates = [HasBWI] in {
2824   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2825                VEX, PD, VEX_W;
2826   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2827                VEX, XD;
2828   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2829                VEX, PS, VEX_W;
2830   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2831                VEX, XD, VEX_W;
2832 }
2833
2834 // GR from/to mask register
2835 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2836           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2837 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2838           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2839
2840 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2841           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2842 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2843           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2844
2845 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2846           (KMOVWrk VK16:$src)>;
2847 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2848           (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2849 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2850           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2851 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2852           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2853
2854 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2855           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2856 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2857           (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2858 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2859           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2860 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2861           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2862
2863 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2864           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2865 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2866           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2867 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2868           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2869 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2870           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2871
2872 // Load/store kreg
2873 let Predicates = [HasDQI] in {
2874   def : Pat<(store VK1:$src, addr:$dst),
2875             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2876
2877   def : Pat<(v1i1 (load addr:$src)),
2878             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2879   def : Pat<(v2i1 (load addr:$src)),
2880             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2881   def : Pat<(v4i1 (load addr:$src)),
2882             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2883 }
2884
2885 let Predicates = [HasAVX512] in {
2886   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2887             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2888   def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2889             (KMOVWkm addr:$src)>;
2890 }
2891
2892 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2893                          SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2894                                               SDTCVecEltisVT<1, i1>,
2895                                               SDTCisPtrTy<2>]>>;
2896
2897 let Predicates = [HasAVX512] in {
2898   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2899     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2900               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2901
2902     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2903               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2904
2905     def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2906               (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2907
2908     def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2909               (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2910   }
2911
2912   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2913   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2914   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2915   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2916   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2917   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2918   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2919
2920   def : Pat<(insert_subvector (v16i1 immAllZerosV),
2921                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2922             (COPY_TO_REGCLASS
2923              (KMOVWkr (AND32ri8
2924                        (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2925                        (i32 1))), VK16)>;
2926 }
2927
2928 // Mask unary operation
2929 // - KNOT
2930 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2931                             RegisterClass KRC, SDPatternOperator OpNode,
2932                             X86FoldableSchedWrite sched, Predicate prd> {
2933   let Predicates = [prd] in
2934     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2935                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2936                [(set KRC:$dst, (OpNode KRC:$src))]>,
2937                Sched<[sched]>;
2938 }
2939
2940 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2941                                 SDPatternOperator OpNode,
2942                                 X86FoldableSchedWrite sched> {
2943   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2944                             sched, HasDQI>, VEX, PD;
2945   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2946                             sched, HasAVX512>, VEX, PS;
2947   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2948                             sched, HasBWI>, VEX, PD, VEX_W;
2949   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2950                             sched, HasBWI>, VEX, PS, VEX_W;
2951 }
2952
2953 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2954 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2955
2956 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2957 let Predicates = [HasAVX512, NoDQI] in
2958 def : Pat<(vnot VK8:$src),
2959           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2960
2961 def : Pat<(vnot VK4:$src),
2962           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2963 def : Pat<(vnot VK2:$src),
2964           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2965 def : Pat<(vnot VK1:$src),
2966           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2967
2968 // Mask binary operation
2969 // - KAND, KANDN, KOR, KXNOR, KXOR
2970 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2971                            RegisterClass KRC, SDPatternOperator OpNode,
2972                            X86FoldableSchedWrite sched, Predicate prd,
2973                            bit IsCommutable> {
2974   let Predicates = [prd], isCommutable = IsCommutable in
2975     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2976                !strconcat(OpcodeStr,
2977                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2978                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2979                Sched<[sched]>;
2980 }
2981
2982 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2983                                  SDPatternOperator OpNode,
2984                                  X86FoldableSchedWrite sched, bit IsCommutable,
2985                                  Predicate prdW = HasAVX512> {
2986   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2987                              sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2988   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2989                              sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2990   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2991                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2992   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2993                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2994 }
2995
2996 // These nodes use 'vnot' instead of 'not' to support vectors.
2997 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2998 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2999
3000 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3001 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3002 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3003 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3004 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3005 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3006 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3007
3008 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3009                             Instruction Inst> {
3010   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3011   // for the DQI set, this type is legal and KxxxB instruction is used
3012   let Predicates = [NoDQI] in
3013   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3014             (COPY_TO_REGCLASS
3015               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3016                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3017
3018   // All types smaller than 8 bits require conversion anyway
3019   def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3020         (COPY_TO_REGCLASS (Inst
3021                            (COPY_TO_REGCLASS VK1:$src1, VK16),
3022                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3023   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3024         (COPY_TO_REGCLASS (Inst
3025                            (COPY_TO_REGCLASS VK2:$src1, VK16),
3026                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3027   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3028         (COPY_TO_REGCLASS (Inst
3029                            (COPY_TO_REGCLASS VK4:$src1, VK16),
3030                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3031 }
3032
3033 defm : avx512_binop_pat<and,   KANDWrr>;
3034 defm : avx512_binop_pat<vandn, KANDNWrr>;
3035 defm : avx512_binop_pat<or,    KORWrr>;
3036 defm : avx512_binop_pat<vxnor, KXNORWrr>;
3037 defm : avx512_binop_pat<xor,   KXORWrr>;
3038
3039 // Mask unpacking
3040 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3041                              X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3042                              Predicate prd> {
3043   let Predicates = [prd] in {
3044     let hasSideEffects = 0 in
3045     def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3046                (ins Src.KRC:$src1, Src.KRC:$src2),
3047                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3048                VEX_4V, VEX_L, Sched<[sched]>;
3049
3050     def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3051               (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3052   }
3053 }
3054
3055 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3056 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3057 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3058
3059 // Mask bit testing
3060 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3061                               SDNode OpNode, X86FoldableSchedWrite sched,
3062                               Predicate prd> {
3063   let Predicates = [prd], Defs = [EFLAGS] in
3064     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3065                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3066                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3067                Sched<[sched]>;
3068 }
3069
3070 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3071                                 X86FoldableSchedWrite sched,
3072                                 Predicate prdW = HasAVX512> {
3073   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3074                                                                 VEX, PD;
3075   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3076                                                                 VEX, PS;
3077   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3078                                                                 VEX, PS, VEX_W;
3079   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3080                                                                 VEX, PD, VEX_W;
3081 }
3082
3083 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3084 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3085 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3086
3087 // Mask shift
3088 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3089                                SDNode OpNode, X86FoldableSchedWrite sched> {
3090   let Predicates = [HasAVX512] in
3091     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3092                  !strconcat(OpcodeStr,
3093                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3094                             [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3095                  Sched<[sched]>;
3096 }
3097
3098 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3099                                  SDNode OpNode, X86FoldableSchedWrite sched> {
3100   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3101                                sched>, VEX, TAPD, VEX_W;
3102   let Predicates = [HasDQI] in
3103   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3104                                sched>, VEX, TAPD;
3105   let Predicates = [HasBWI] in {
3106   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3107                                sched>, VEX, TAPD, VEX_W;
3108   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3109                                sched>, VEX, TAPD;
3110   }
3111 }
3112
3113 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3114 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3115
3116 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3117 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3118                                                  string InstStr,
3119                                                  X86VectorVTInfo Narrow,
3120                                                  X86VectorVTInfo Wide> {
3121 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3122                                 (Narrow.VT Narrow.RC:$src2), cond)),
3123           (COPY_TO_REGCLASS
3124            (!cast<Instruction>(InstStr#"Zrri")
3125             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3126             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3127             (Frag.OperandTransform $cc)), Narrow.KRC)>;
3128
3129 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3130                            (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3131                                                     (Narrow.VT Narrow.RC:$src2),
3132                                                     cond)))),
3133           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3134            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3135            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3136            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3137            (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3138 }
3139
3140 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3141                                                      PatFrag CommFrag, PatFrag CommFrag_su,
3142                                                      string InstStr,
3143                                                      X86VectorVTInfo Narrow,
3144                                                      X86VectorVTInfo Wide> {
3145 // Broadcast load.
3146 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3147                                 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3148           (COPY_TO_REGCLASS
3149            (!cast<Instruction>(InstStr#"Zrmib")
3150             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3151             addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3152
3153 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3154                            (Narrow.KVT
3155                             (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3156                                          (Narrow.BroadcastLdFrag addr:$src2),
3157                                          cond)))),
3158           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3159            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3160            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3161            addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3162
3163 // Commuted with broadcast load.
3164 def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3165                                     (Narrow.VT Narrow.RC:$src1),
3166                                     cond)),
3167           (COPY_TO_REGCLASS
3168            (!cast<Instruction>(InstStr#"Zrmib")
3169             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3170             addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3171
3172 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3173                            (Narrow.KVT
3174                             (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3175                                              (Narrow.VT Narrow.RC:$src1), 
3176                                              cond)))),
3177           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3178            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3179            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3180            addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3181 }
3182
3183 // Same as above, but for fp types which don't use PatFrags.
3184 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3185                                                 X86VectorVTInfo Narrow,
3186                                                 X86VectorVTInfo Wide> {
3187 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3188                                (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3189           (COPY_TO_REGCLASS
3190            (!cast<Instruction>(InstStr#"Zrri")
3191             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3192             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3193             timm:$cc), Narrow.KRC)>;
3194
3195 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3196                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3197                                        (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3198           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3199            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3200            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3201            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3202            timm:$cc), Narrow.KRC)>;
3203
3204 // Broadcast load.
3205 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3206                                (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3207           (COPY_TO_REGCLASS
3208            (!cast<Instruction>(InstStr#"Zrmbi")
3209             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3210             addr:$src2, timm:$cc), Narrow.KRC)>;
3211
3212 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3213                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3214                                        (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3215           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3216            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3217            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3218            addr:$src2, timm:$cc), Narrow.KRC)>;
3219
3220 // Commuted with broadcast load.
3221 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3222                                (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3223           (COPY_TO_REGCLASS
3224            (!cast<Instruction>(InstStr#"Zrmbi")
3225             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3226             addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3227
3228 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3229                            (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3230                                        (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3231           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3232            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3233            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3234            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3235 }
3236
3237 let Predicates = [HasAVX512, NoVLX] in {
3238   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3239   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3240
3241   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3242   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3243
3244   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3245   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3246
3247   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3248   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3249
3250   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3251   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3252
3253   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3254   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3255
3256   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3257   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3258
3259   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3260   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3261
3262   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3263   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3264   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3265   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3266 }
3267
3268 let Predicates = [HasBWI, NoVLX] in {
3269   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3270   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3271
3272   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3273   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3274
3275   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3276   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3277
3278   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3279   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3280 }
3281
3282 // Mask setting all 0s or 1s
3283 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3284   let Predicates = [HasAVX512] in
3285     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3286         SchedRW = [WriteZero] in
3287       def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3288                      [(set KRC:$dst, (VT Val))]>;
3289 }
3290
3291 multiclass avx512_mask_setop_w<PatFrag Val> {
3292   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3293   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3294   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3295 }
3296
3297 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3298 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3299
3300 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3301 let Predicates = [HasAVX512] in {
3302   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3303   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3304   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3305   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3306   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3307   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3308   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3309   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3310 }
3311
3312 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3313 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3314                                              RegisterClass RC, ValueType VT> {
3315   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3316             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3317
3318   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3319             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3320 }
3321 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3322 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3323 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3324 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3325 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3326 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3327
3328 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3329 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3330 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3331 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3332 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3333
3334 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3335 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3336 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3337 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3338
3339 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3340 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3341 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3342
3343 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3344 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3345
3346 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3347
3348 //===----------------------------------------------------------------------===//
3349 // AVX-512 - Aligned and unaligned load and store
3350 //
3351
3352 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3353                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3354                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3355                        bit NoRMPattern = 0,
3356                        SDPatternOperator SelectOprr = vselect> {
3357   let hasSideEffects = 0 in {
3358   let isMoveReg = 1 in
3359   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3360                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3361                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3362                     EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3363   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3364                       (ins _.KRCWM:$mask,  _.RC:$src),
3365                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3366                        "${dst} {${mask}} {z}, $src}"),
3367                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3368                                            (_.VT _.RC:$src),
3369                                            _.ImmAllZerosV)))], _.ExeDomain>,
3370                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3371
3372   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3373   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3374                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3375                     !if(NoRMPattern, [],
3376                         [(set _.RC:$dst,
3377                           (_.VT (ld_frag addr:$src)))]),
3378                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3379                     EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3380
3381   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3382     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3383                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3384                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3385                       "${dst} {${mask}}, $src1}"),
3386                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3387                                           (_.VT _.RC:$src1),
3388                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3389                        EVEX, EVEX_K, Sched<[Sched.RR]>;
3390     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3391                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3392                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3393                       "${dst} {${mask}}, $src1}"),
3394                      [(set _.RC:$dst, (_.VT
3395                          (vselect_mask _.KRCWM:$mask,
3396                           (_.VT (ld_frag addr:$src1)),
3397                            (_.VT _.RC:$src0))))], _.ExeDomain>,
3398                      EVEX, EVEX_K, Sched<[Sched.RM]>;
3399   }
3400   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3401                   (ins _.KRCWM:$mask, _.MemOp:$src),
3402                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3403                                 "${dst} {${mask}} {z}, $src}",
3404                   [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3405                     (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3406                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3407   }
3408   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3409             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3410
3411   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3412             (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3413
3414   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3415             (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3416              _.KRCWM:$mask, addr:$ptr)>;
3417 }
3418
3419 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3420                                  AVX512VLVectorVTInfo _, Predicate prd,
3421                                  X86SchedWriteMoveLSWidths Sched,
3422                                  string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3423   let Predicates = [prd] in
3424   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3425                        _.info512.AlignedLdFrag, masked_load_aligned,
3426                        Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3427
3428   let Predicates = [prd, HasVLX] in {
3429   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3430                           _.info256.AlignedLdFrag, masked_load_aligned,
3431                           Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3432   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3433                           _.info128.AlignedLdFrag, masked_load_aligned,
3434                           Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3435   }
3436 }
3437
3438 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3439                           AVX512VLVectorVTInfo _, Predicate prd,
3440                           X86SchedWriteMoveLSWidths Sched,
3441                           string EVEX2VEXOvrd, bit NoRMPattern = 0,
3442                           SDPatternOperator SelectOprr = vselect> {
3443   let Predicates = [prd] in
3444   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3445                        masked_load, Sched.ZMM, "",
3446                        NoRMPattern, SelectOprr>, EVEX_V512;
3447
3448   let Predicates = [prd, HasVLX] in {
3449   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3450                          masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3451                          NoRMPattern, SelectOprr>, EVEX_V256;
3452   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3453                          masked_load, Sched.XMM, EVEX2VEXOvrd,
3454                          NoRMPattern, SelectOprr>, EVEX_V128;
3455   }
3456 }
3457
3458 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3459                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3460                         X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3461                         bit NoMRPattern = 0> {
3462   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3463   let isMoveReg = 1 in
3464   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3465                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
3466                          [], _.ExeDomain>, EVEX,
3467                          FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3468                          EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3469   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3470                          (ins _.KRCWM:$mask, _.RC:$src),
3471                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3472                          "${dst} {${mask}}, $src}",
3473                          [], _.ExeDomain>,  EVEX, EVEX_K,
3474                          FoldGenData<BaseName#_.ZSuffix#rrk>,
3475                          Sched<[Sched.RR]>;
3476   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3477                           (ins _.KRCWM:$mask, _.RC:$src),
3478                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3479                           "${dst} {${mask}} {z}, $src}",
3480                           [], _.ExeDomain>, EVEX, EVEX_KZ,
3481                           FoldGenData<BaseName#_.ZSuffix#rrkz>,
3482                           Sched<[Sched.RR]>;
3483   }
3484
3485   let hasSideEffects = 0, mayStore = 1 in
3486   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3487                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3488                     !if(NoMRPattern, [],
3489                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3490                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3491                     EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3492   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3493                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3494               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3495                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3496                NotMemoryFoldable;
3497
3498   def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3499            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3500                                                         _.KRCWM:$mask, _.RC:$src)>;
3501
3502   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3503                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3504                    _.RC:$dst, _.RC:$src), 0>;
3505   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3506                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3507                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3508   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3509                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3510                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3511 }
3512
3513 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3514                             AVX512VLVectorVTInfo _, Predicate prd,
3515                             X86SchedWriteMoveLSWidths Sched,
3516                             string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3517   let Predicates = [prd] in
3518   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3519                         masked_store, Sched.ZMM, "",
3520                         NoMRPattern>, EVEX_V512;
3521   let Predicates = [prd, HasVLX] in {
3522     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3523                              masked_store, Sched.YMM,
3524                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3525     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3526                              masked_store, Sched.XMM, EVEX2VEXOvrd,
3527                              NoMRPattern>, EVEX_V128;
3528   }
3529 }
3530
3531 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3532                                   AVX512VLVectorVTInfo _, Predicate prd,
3533                                   X86SchedWriteMoveLSWidths Sched,
3534                                   string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3535   let Predicates = [prd] in
3536   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3537                         masked_store_aligned, Sched.ZMM, "",
3538                         NoMRPattern>, EVEX_V512;
3539
3540   let Predicates = [prd, HasVLX] in {
3541     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3542                              masked_store_aligned, Sched.YMM,
3543                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3544     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3545                              masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3546                              NoMRPattern>, EVEX_V128;
3547   }
3548 }
3549
3550 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3551                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3552                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3553                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3554                PS, EVEX_CD8<32, CD8VF>;
3555
3556 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3557                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3558                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3559                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3560                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3561
3562 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3563                               SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3564                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3565                                SchedWriteFMoveLS, "VMOVUPS">,
3566                                PS, EVEX_CD8<32, CD8VF>;
3567
3568 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3569                               SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3570                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3571                                SchedWriteFMoveLS, "VMOVUPD">,
3572                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3573
3574 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3575                                        HasAVX512, SchedWriteVecMoveLS,
3576                                        "VMOVDQA", 1>,
3577                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3578                                         HasAVX512, SchedWriteVecMoveLS,
3579                                         "VMOVDQA", 1>,
3580                  PD, EVEX_CD8<32, CD8VF>;
3581
3582 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3583                                        HasAVX512, SchedWriteVecMoveLS,
3584                                        "VMOVDQA">,
3585                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3586                                         HasAVX512, SchedWriteVecMoveLS,
3587                                         "VMOVDQA">,
3588                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
3589
3590 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3591                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3592                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3593                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3594                 XD, EVEX_CD8<8, CD8VF>;
3595
3596 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3597                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3598                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3599                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3600                  XD, VEX_W, EVEX_CD8<16, CD8VF>;
3601
3602 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3603                                 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3604                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3605                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3606                  XS, EVEX_CD8<32, CD8VF>;
3607
3608 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3609                                 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3610                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3611                                  SchedWriteVecMoveLS, "VMOVDQU">,
3612                  XS, VEX_W, EVEX_CD8<64, CD8VF>;
3613
3614 // Special instructions to help with spilling when we don't have VLX. We need
3615 // to load or store from a ZMM register instead. These are converted in
3616 // expandPostRAPseudos.
3617 let isReMaterializable = 1, canFoldAsLoad = 1,
3618     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3619 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3620                             "", []>, Sched<[WriteFLoadX]>;
3621 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3622                             "", []>, Sched<[WriteFLoadY]>;
3623 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3624                             "", []>, Sched<[WriteFLoadX]>;
3625 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3626                             "", []>, Sched<[WriteFLoadY]>;
3627 }
3628
3629 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3630 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3631                             "", []>, Sched<[WriteFStoreX]>;
3632 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3633                             "", []>, Sched<[WriteFStoreY]>;
3634 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3635                             "", []>, Sched<[WriteFStoreX]>;
3636 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3637                             "", []>, Sched<[WriteFStoreY]>;
3638 }
3639
3640 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3641                           (v8i64 VR512:$src))),
3642    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3643                                               VK8), VR512:$src)>;
3644
3645 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3646                            (v16i32 VR512:$src))),
3647                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3648
3649 // These patterns exist to prevent the above patterns from introducing a second
3650 // mask inversion when one already exists.
3651 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3652                           (v8i64 immAllZerosV),
3653                           (v8i64 VR512:$src))),
3654                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3655 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3656                            (v16i32 immAllZerosV),
3657                            (v16i32 VR512:$src))),
3658                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3659
3660 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3661                               X86VectorVTInfo Wide> {
3662  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3663                                Narrow.RC:$src1, Narrow.RC:$src0)),
3664            (EXTRACT_SUBREG
3665             (Wide.VT
3666              (!cast<Instruction>(InstrStr#"rrk")
3667               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3668               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3669               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3670             Narrow.SubRegIdx)>;
3671
3672  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3673                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3674            (EXTRACT_SUBREG
3675             (Wide.VT
3676              (!cast<Instruction>(InstrStr#"rrkz")
3677               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3678               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3679             Narrow.SubRegIdx)>;
3680 }
3681
3682 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3683 // available. Use a 512-bit operation and extract.
3684 let Predicates = [HasAVX512, NoVLX] in {
3685   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3686   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3687   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3688   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3689
3690   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3691   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3692   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3693   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3694 }
3695
3696 let Predicates = [HasBWI, NoVLX] in {
3697   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3698   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3699
3700   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3701   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3702 }
3703
3704 let Predicates = [HasAVX512] in {
3705   // 512-bit load.
3706   def : Pat<(alignedloadv16i32 addr:$src),
3707             (VMOVDQA64Zrm addr:$src)>;
3708   def : Pat<(alignedloadv32i16 addr:$src),
3709             (VMOVDQA64Zrm addr:$src)>;
3710   def : Pat<(alignedloadv64i8 addr:$src),
3711             (VMOVDQA64Zrm addr:$src)>;
3712   def : Pat<(loadv16i32 addr:$src),
3713             (VMOVDQU64Zrm addr:$src)>;
3714   def : Pat<(loadv32i16 addr:$src),
3715             (VMOVDQU64Zrm addr:$src)>;
3716   def : Pat<(loadv64i8 addr:$src),
3717             (VMOVDQU64Zrm addr:$src)>;
3718
3719   // 512-bit store.
3720   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3721             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3722   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3723             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3724   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3725             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3726   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3727             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3728   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3729             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3730   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3731             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3732 }
3733
3734 let Predicates = [HasVLX] in {
3735   // 128-bit load.
3736   def : Pat<(alignedloadv4i32 addr:$src),
3737             (VMOVDQA64Z128rm addr:$src)>;
3738   def : Pat<(alignedloadv8i16 addr:$src),
3739             (VMOVDQA64Z128rm addr:$src)>;
3740   def : Pat<(alignedloadv16i8 addr:$src),
3741             (VMOVDQA64Z128rm addr:$src)>;
3742   def : Pat<(loadv4i32 addr:$src),
3743             (VMOVDQU64Z128rm addr:$src)>;
3744   def : Pat<(loadv8i16 addr:$src),
3745             (VMOVDQU64Z128rm addr:$src)>;
3746   def : Pat<(loadv16i8 addr:$src),
3747             (VMOVDQU64Z128rm addr:$src)>;
3748
3749   // 128-bit store.
3750   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3751             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3752   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3753             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3754   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3755             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3756   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3757             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3758   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3759             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3760   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3761             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3762
3763   // 256-bit load.
3764   def : Pat<(alignedloadv8i32 addr:$src),
3765             (VMOVDQA64Z256rm addr:$src)>;
3766   def : Pat<(alignedloadv16i16 addr:$src),
3767             (VMOVDQA64Z256rm addr:$src)>;
3768   def : Pat<(alignedloadv32i8 addr:$src),
3769             (VMOVDQA64Z256rm addr:$src)>;
3770   def : Pat<(loadv8i32 addr:$src),
3771             (VMOVDQU64Z256rm addr:$src)>;
3772   def : Pat<(loadv16i16 addr:$src),
3773             (VMOVDQU64Z256rm addr:$src)>;
3774   def : Pat<(loadv32i8 addr:$src),
3775             (VMOVDQU64Z256rm addr:$src)>;
3776
3777   // 256-bit store.
3778   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3779             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3780   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3781             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3782   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3783             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3784   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3785             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3786   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3787             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3788   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3789             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3790 }
3791
3792 // Move Int Doubleword to Packed Double Int
3793 //
3794 let ExeDomain = SSEPackedInt in {
3795 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3796                       "vmovd\t{$src, $dst|$dst, $src}",
3797                       [(set VR128X:$dst,
3798                         (v4i32 (scalar_to_vector GR32:$src)))]>,
3799                         EVEX, Sched<[WriteVecMoveFromGpr]>;
3800 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3801                       "vmovd\t{$src, $dst|$dst, $src}",
3802                       [(set VR128X:$dst,
3803                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3804                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3805 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3806                       "vmovq\t{$src, $dst|$dst, $src}",
3807                         [(set VR128X:$dst,
3808                           (v2i64 (scalar_to_vector GR64:$src)))]>,
3809                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3810 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3811 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3812                       (ins i64mem:$src),
3813                       "vmovq\t{$src, $dst|$dst, $src}", []>,
3814                       EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3815 let isCodeGenOnly = 1 in {
3816 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3817                        "vmovq\t{$src, $dst|$dst, $src}",
3818                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3819                        EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3820 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3821                          "vmovq\t{$src, $dst|$dst, $src}",
3822                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3823                          EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3824 }
3825 } // ExeDomain = SSEPackedInt
3826
3827 // Move Int Doubleword to Single Scalar
3828 //
3829 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3830 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3831                       "vmovd\t{$src, $dst|$dst, $src}",
3832                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3833                       EVEX, Sched<[WriteVecMoveFromGpr]>;
3834 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3835
3836 // Move doubleword from xmm register to r/m32
3837 //
3838 let ExeDomain = SSEPackedInt in {
3839 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3840                        "vmovd\t{$src, $dst|$dst, $src}",
3841                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3842                                         (iPTR 0)))]>,
3843                        EVEX, Sched<[WriteVecMoveToGpr]>;
3844 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3845                        (ins i32mem:$dst, VR128X:$src),
3846                        "vmovd\t{$src, $dst|$dst, $src}",
3847                        [(store (i32 (extractelt (v4i32 VR128X:$src),
3848                                      (iPTR 0))), addr:$dst)]>,
3849                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3850 } // ExeDomain = SSEPackedInt
3851
3852 // Move quadword from xmm1 register to r/m64
3853 //
3854 let ExeDomain = SSEPackedInt in {
3855 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3856                       "vmovq\t{$src, $dst|$dst, $src}",
3857                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3858                                                    (iPTR 0)))]>,
3859                       PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3860                       Requires<[HasAVX512]>;
3861
3862 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3863 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3864                       "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3865                       EVEX, VEX_W, Sched<[WriteVecStore]>,
3866                       Requires<[HasAVX512, In64BitMode]>;
3867
3868 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3869                       (ins i64mem:$dst, VR128X:$src),
3870                       "vmovq\t{$src, $dst|$dst, $src}",
3871                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3872                               addr:$dst)]>,
3873                       EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3874                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3875
3876 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3877 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3878                              (ins VR128X:$src),
3879                              "vmovq\t{$src, $dst|$dst, $src}", []>,
3880                              EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3881 } // ExeDomain = SSEPackedInt
3882
3883 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3884                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3885
3886 let Predicates = [HasAVX512] in {
3887   def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3888             (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3889 }
3890
3891 // Move Scalar Single to Double Int
3892 //
3893 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3894 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3895                       (ins FR32X:$src),
3896                       "vmovd\t{$src, $dst|$dst, $src}",
3897                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3898                       EVEX, Sched<[WriteVecMoveToGpr]>;
3899 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3900
3901 // Move Quadword Int to Packed Quadword Int
3902 //
3903 let ExeDomain = SSEPackedInt in {
3904 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3905                       (ins i64mem:$src),
3906                       "vmovq\t{$src, $dst|$dst, $src}",
3907                       [(set VR128X:$dst,
3908                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3909                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3910 } // ExeDomain = SSEPackedInt
3911
3912 // Allow "vmovd" but print "vmovq".
3913 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3914                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3915 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3916                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3917
3918 // Conversions between masks and scalar fp.
3919 def : Pat<(v32i1 (bitconvert FR32X:$src)),
3920           (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3921 def : Pat<(f32 (bitconvert VK32:$src)),
3922           (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3923
3924 def : Pat<(v64i1 (bitconvert FR64X:$src)),
3925           (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3926 def : Pat<(f64 (bitconvert VK64:$src)),
3927           (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3928
3929 //===----------------------------------------------------------------------===//
3930 // AVX-512  MOVSS, MOVSD
3931 //===----------------------------------------------------------------------===//
3932
3933 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3934                               X86VectorVTInfo _> {
3935   let Predicates = [HasAVX512, OptForSize] in
3936   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3937              (ins _.RC:$src1, _.RC:$src2),
3938              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3939              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3940              _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3941   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3942               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3943               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3944               "$dst {${mask}} {z}, $src1, $src2}"),
3945               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3946                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3947                                       _.ImmAllZerosV)))],
3948               _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3949   let Constraints = "$src0 = $dst"  in
3950   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3951              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3952              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3953              "$dst {${mask}}, $src1, $src2}"),
3954              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3955                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3956                                      (_.VT _.RC:$src0))))],
3957              _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3958   let canFoldAsLoad = 1, isReMaterializable = 1 in {
3959   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3960              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3961              [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3962              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3963   // _alt version uses FR32/FR64 register class.
3964   let isCodeGenOnly = 1 in
3965   def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3966                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3967                  [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3968                  _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3969   }
3970   let mayLoad = 1, hasSideEffects = 0 in {
3971     let Constraints = "$src0 = $dst" in
3972     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3973                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3974                !strconcat(asm, "\t{$src, $dst {${mask}}|",
3975                "$dst {${mask}}, $src}"),
3976                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3977     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3978                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3979                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3980                "$dst {${mask}} {z}, $src}"),
3981                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3982   }
3983   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3984              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3985              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3986              EVEX, Sched<[WriteFStore]>;
3987   let mayStore = 1, hasSideEffects = 0 in
3988   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3989               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3990               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3991               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3992               NotMemoryFoldable;
3993 }
3994
3995 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3996                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3997
3998 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3999                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4000
4001
4002 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4003                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
4004
4005 def : Pat<(_.VT (OpNode _.RC:$src0,
4006                         (_.VT (scalar_to_vector
4007                                   (_.EltVT (X86selects VK1WM:$mask,
4008                                                        (_.EltVT _.FRC:$src1),
4009                                                        (_.EltVT _.FRC:$src2))))))),
4010           (!cast<Instruction>(InstrStr#rrk)
4011                         (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4012                         VK1WM:$mask,
4013                         (_.VT _.RC:$src0),
4014                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4015
4016 def : Pat<(_.VT (OpNode _.RC:$src0,
4017                         (_.VT (scalar_to_vector
4018                                   (_.EltVT (X86selects VK1WM:$mask,
4019                                                        (_.EltVT _.FRC:$src1),
4020                                                        (_.EltVT ZeroFP))))))),
4021           (!cast<Instruction>(InstrStr#rrkz)
4022                         VK1WM:$mask,
4023                         (_.VT _.RC:$src0),
4024                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4025 }
4026
4027 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4028                                         dag Mask, RegisterClass MaskRC> {
4029
4030 def : Pat<(masked_store
4031              (_.info512.VT (insert_subvector undef,
4032                                (_.info128.VT _.info128.RC:$src),
4033                                (iPTR 0))), addr:$dst, Mask),
4034           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4035                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4036                       _.info128.RC:$src)>;
4037
4038 }
4039
4040 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4041                                                AVX512VLVectorVTInfo _,
4042                                                dag Mask, RegisterClass MaskRC,
4043                                                SubRegIndex subreg> {
4044
4045 def : Pat<(masked_store
4046              (_.info512.VT (insert_subvector undef,
4047                                (_.info128.VT _.info128.RC:$src),
4048                                (iPTR 0))), addr:$dst, Mask),
4049           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4050                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4051                       _.info128.RC:$src)>;
4052
4053 }
4054
4055 // This matches the more recent codegen from clang that avoids emitting a 512
4056 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4057 // bits on AVX512F only targets.
4058 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4059                                                AVX512VLVectorVTInfo _,
4060                                                dag Mask512, dag Mask128,
4061                                                RegisterClass MaskRC,
4062                                                SubRegIndex subreg> {
4063
4064 // AVX512F pattern.
4065 def : Pat<(masked_store
4066              (_.info512.VT (insert_subvector undef,
4067                                (_.info128.VT _.info128.RC:$src),
4068                                (iPTR 0))), addr:$dst, Mask512),
4069           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4070                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4071                       _.info128.RC:$src)>;
4072
4073 // AVX512VL pattern.
4074 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4075           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4076                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4077                       _.info128.RC:$src)>;
4078 }
4079
4080 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4081                                        dag Mask, RegisterClass MaskRC> {
4082
4083 def : Pat<(_.info128.VT (extract_subvector
4084                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4085                                         _.info512.ImmAllZerosV)),
4086                            (iPTR 0))),
4087           (!cast<Instruction>(InstrStr#rmkz)
4088                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4089                       addr:$srcAddr)>;
4090
4091 def : Pat<(_.info128.VT (extract_subvector
4092                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4093                       (_.info512.VT (insert_subvector undef,
4094                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4095                             (iPTR 0))))),
4096                 (iPTR 0))),
4097           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4098                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4099                       addr:$srcAddr)>;
4100
4101 }
4102
4103 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4104                                               AVX512VLVectorVTInfo _,
4105                                               dag Mask, RegisterClass MaskRC,
4106                                               SubRegIndex subreg> {
4107
4108 def : Pat<(_.info128.VT (extract_subvector
4109                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4110                                         _.info512.ImmAllZerosV)),
4111                            (iPTR 0))),
4112           (!cast<Instruction>(InstrStr#rmkz)
4113                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4114                       addr:$srcAddr)>;
4115
4116 def : Pat<(_.info128.VT (extract_subvector
4117                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4118                       (_.info512.VT (insert_subvector undef,
4119                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4120                             (iPTR 0))))),
4121                 (iPTR 0))),
4122           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4123                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4124                       addr:$srcAddr)>;
4125
4126 }
4127
4128 // This matches the more recent codegen from clang that avoids emitting a 512
4129 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4130 // bits on AVX512F only targets.
4131 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4132                                               AVX512VLVectorVTInfo _,
4133                                               dag Mask512, dag Mask128,
4134                                               RegisterClass MaskRC,
4135                                               SubRegIndex subreg> {
4136 // AVX512F patterns.
4137 def : Pat<(_.info128.VT (extract_subvector
4138                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4139                                         _.info512.ImmAllZerosV)),
4140                            (iPTR 0))),
4141           (!cast<Instruction>(InstrStr#rmkz)
4142                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4143                       addr:$srcAddr)>;
4144
4145 def : Pat<(_.info128.VT (extract_subvector
4146                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4147                       (_.info512.VT (insert_subvector undef,
4148                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4149                             (iPTR 0))))),
4150                 (iPTR 0))),
4151           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4152                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4153                       addr:$srcAddr)>;
4154
4155 // AVX512Vl patterns.
4156 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4157                          _.info128.ImmAllZerosV)),
4158           (!cast<Instruction>(InstrStr#rmkz)
4159                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4160                       addr:$srcAddr)>;
4161
4162 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4163                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4164           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4165                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4166                       addr:$srcAddr)>;
4167 }
4168
4169 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4170 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4171
4172 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4173                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4174 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4175                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4176 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4177                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4178
4179 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4180                    (v16i1 (insert_subvector
4181                            (v16i1 immAllZerosV),
4182                            (v4i1 (extract_subvector
4183                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4184                                   (iPTR 0))),
4185                            (iPTR 0))),
4186                    (v4i1 (extract_subvector
4187                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4188                           (iPTR 0))), GR8, sub_8bit>;
4189 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4190                    (v8i1
4191                     (extract_subvector
4192                      (v16i1
4193                       (insert_subvector
4194                        (v16i1 immAllZerosV),
4195                        (v2i1 (extract_subvector
4196                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4197                               (iPTR 0))),
4198                        (iPTR 0))),
4199                      (iPTR 0))),
4200                    (v2i1 (extract_subvector
4201                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4202                           (iPTR 0))), GR8, sub_8bit>;
4203
4204 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4205                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4206 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4207                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4208 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4209                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4210
4211 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4212                    (v16i1 (insert_subvector
4213                            (v16i1 immAllZerosV),
4214                            (v4i1 (extract_subvector
4215                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4216                                   (iPTR 0))),
4217                            (iPTR 0))),
4218                    (v4i1 (extract_subvector
4219                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4220                           (iPTR 0))), GR8, sub_8bit>;
4221 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4222                    (v8i1
4223                     (extract_subvector
4224                      (v16i1
4225                       (insert_subvector
4226                        (v16i1 immAllZerosV),
4227                        (v2i1 (extract_subvector
4228                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4229                               (iPTR 0))),
4230                        (iPTR 0))),
4231                      (iPTR 0))),
4232                    (v2i1 (extract_subvector
4233                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4234                           (iPTR 0))), GR8, sub_8bit>;
4235
4236 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4237           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4238            (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4239            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4240            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4241
4242 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4243           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4244            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4245
4246 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4247           (COPY_TO_REGCLASS
4248            (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4249                                                        VK1WM:$mask, addr:$src)),
4250            FR32X)>;
4251 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4252           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4253
4254 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4255           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4256            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4257            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4258            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4259
4260 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4261           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4262            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4263
4264 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4265           (COPY_TO_REGCLASS
4266            (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4267                                                        VK1WM:$mask, addr:$src)),
4268            FR64X)>;
4269 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4270           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4271
4272
4273 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4274           (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4275 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4276           (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4277
4278 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4279           (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4280 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4281           (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4282
4283 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4284   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4285                            (ins VR128X:$src1, VR128X:$src2),
4286                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4287                            []>, XS, EVEX_4V, VEX_LIG,
4288                            FoldGenData<"VMOVSSZrr">,
4289                            Sched<[SchedWriteFShuffle.XMM]>;
4290
4291   let Constraints = "$src0 = $dst" in
4292   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4293                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4294                                                    VR128X:$src1, VR128X:$src2),
4295                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4296                                         "$dst {${mask}}, $src1, $src2}",
4297                              []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4298                              FoldGenData<"VMOVSSZrrk">,
4299                              Sched<[SchedWriteFShuffle.XMM]>;
4300
4301   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4302                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4303                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4304                                     "$dst {${mask}} {z}, $src1, $src2}",
4305                          []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4306                          FoldGenData<"VMOVSSZrrkz">,
4307                          Sched<[SchedWriteFShuffle.XMM]>;
4308
4309   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4310                            (ins VR128X:$src1, VR128X:$src2),
4311                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4312                            []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4313                            FoldGenData<"VMOVSDZrr">,
4314                            Sched<[SchedWriteFShuffle.XMM]>;
4315
4316   let Constraints = "$src0 = $dst" in
4317   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4318                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4319                                                    VR128X:$src1, VR128X:$src2),
4320                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4321                                         "$dst {${mask}}, $src1, $src2}",
4322                              []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4323                              VEX_W, FoldGenData<"VMOVSDZrrk">,
4324                              Sched<[SchedWriteFShuffle.XMM]>;
4325
4326   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4327                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4328                                                           VR128X:$src2),
4329                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4330                                          "$dst {${mask}} {z}, $src1, $src2}",
4331                               []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4332                               VEX_W, FoldGenData<"VMOVSDZrrkz">,
4333                               Sched<[SchedWriteFShuffle.XMM]>;
4334 }
4335
4336 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4337                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4338 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4339                              "$dst {${mask}}, $src1, $src2}",
4340                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4341                                 VR128X:$src1, VR128X:$src2), 0>;
4342 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4343                              "$dst {${mask}} {z}, $src1, $src2}",
4344                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4345                                  VR128X:$src1, VR128X:$src2), 0>;
4346 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4347                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4348 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4349                              "$dst {${mask}}, $src1, $src2}",
4350                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4351                                 VR128X:$src1, VR128X:$src2), 0>;
4352 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4353                              "$dst {${mask}} {z}, $src1, $src2}",
4354                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4355                                  VR128X:$src1, VR128X:$src2), 0>;
4356
4357 let Predicates = [HasAVX512, OptForSize] in {
4358   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4359             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4360   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4361             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4362
4363   // Move low f32 and clear high bits.
4364   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4365             (SUBREG_TO_REG (i32 0),
4366              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4367               (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4368   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4369             (SUBREG_TO_REG (i32 0),
4370              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4371               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4372
4373   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4374             (SUBREG_TO_REG (i32 0),
4375              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4376               (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4377   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4378             (SUBREG_TO_REG (i32 0),
4379              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4380               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4381 }
4382
4383 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4384 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4385 let Predicates = [HasAVX512, OptForSpeed] in {
4386   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4387             (SUBREG_TO_REG (i32 0),
4388              (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4389                           (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4390                           (i8 1))), sub_xmm)>;
4391   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4392             (SUBREG_TO_REG (i32 0),
4393              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4394                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4395                           (i8 3))), sub_xmm)>;
4396 }
4397
4398 let Predicates = [HasAVX512] in {
4399   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4400             (VMOVSSZrm addr:$src)>;
4401   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4402             (VMOVSDZrm addr:$src)>;
4403
4404   // Represent the same patterns above but in the form they appear for
4405   // 256-bit types
4406   def : Pat<(v8f32 (X86vzload32 addr:$src)),
4407             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4408   def : Pat<(v4f64 (X86vzload64 addr:$src)),
4409             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4410
4411   // Represent the same patterns above but in the form they appear for
4412   // 512-bit types
4413   def : Pat<(v16f32 (X86vzload32 addr:$src)),
4414             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4415   def : Pat<(v8f64 (X86vzload64 addr:$src)),
4416             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4417 }
4418
4419 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4420 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4421                                 (ins VR128X:$src),
4422                                 "vmovq\t{$src, $dst|$dst, $src}",
4423                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4424                                                    (v2i64 VR128X:$src))))]>,
4425                                 EVEX, VEX_W;
4426 }
4427
4428 let Predicates = [HasAVX512] in {
4429   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4430             (VMOVDI2PDIZrr GR32:$src)>;
4431
4432   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4433             (VMOV64toPQIZrr GR64:$src)>;
4434
4435   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4436   def : Pat<(v4i32 (X86vzload32 addr:$src)),
4437             (VMOVDI2PDIZrm addr:$src)>;
4438   def : Pat<(v8i32 (X86vzload32 addr:$src)),
4439             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4440   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4441             (VMOVZPQILo2PQIZrr VR128X:$src)>;
4442   def : Pat<(v2i64 (X86vzload64 addr:$src)),
4443             (VMOVQI2PQIZrm addr:$src)>;
4444   def : Pat<(v4i64 (X86vzload64 addr:$src)),
4445             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4446
4447   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4448   def : Pat<(v16i32 (X86vzload32 addr:$src)),
4449             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4450   def : Pat<(v8i64 (X86vzload64 addr:$src)),
4451             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4452
4453   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4454             (SUBREG_TO_REG (i32 0),
4455              (v2f64 (VMOVZPQILo2PQIZrr
4456                      (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4457              sub_xmm)>;
4458   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4459             (SUBREG_TO_REG (i32 0),
4460              (v2i64 (VMOVZPQILo2PQIZrr
4461                      (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4462              sub_xmm)>;
4463
4464   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4465             (SUBREG_TO_REG (i32 0),
4466              (v2f64 (VMOVZPQILo2PQIZrr
4467                      (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4468              sub_xmm)>;
4469   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4470             (SUBREG_TO_REG (i32 0),
4471              (v2i64 (VMOVZPQILo2PQIZrr
4472                      (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4473              sub_xmm)>;
4474 }
4475
4476 //===----------------------------------------------------------------------===//
4477 // AVX-512 - Non-temporals
4478 //===----------------------------------------------------------------------===//
4479
4480 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4481                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4482                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4483                       EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4484
4485 let Predicates = [HasVLX] in {
4486   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4487                        (ins i256mem:$src),
4488                        "vmovntdqa\t{$src, $dst|$dst, $src}",
4489                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4490                        EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4491
4492   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4493                       (ins i128mem:$src),
4494                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4495                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4496                       EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4497 }
4498
4499 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4500                         X86SchedWriteMoveLS Sched,
4501                         PatFrag st_frag = alignednontemporalstore> {
4502   let SchedRW = [Sched.MR], AddedComplexity = 400 in
4503   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4504                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4505                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4506                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4507 }
4508
4509 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4510                            AVX512VLVectorVTInfo VTInfo,
4511                            X86SchedWriteMoveLSWidths Sched> {
4512   let Predicates = [HasAVX512] in
4513     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4514
4515   let Predicates = [HasAVX512, HasVLX] in {
4516     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4517     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4518   }
4519 }
4520
4521 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4522                                 SchedWriteVecMoveLSNT>, PD;
4523 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4524                                 SchedWriteFMoveLSNT>, PD, VEX_W;
4525 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4526                                 SchedWriteFMoveLSNT>, PS;
4527
4528 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4529   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4530             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4531   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4532             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4533   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4534             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4535
4536   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4537             (VMOVNTDQAZrm addr:$src)>;
4538   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4539             (VMOVNTDQAZrm addr:$src)>;
4540   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4541             (VMOVNTDQAZrm addr:$src)>;
4542   def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4543             (VMOVNTDQAZrm addr:$src)>;
4544   def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4545             (VMOVNTDQAZrm addr:$src)>;
4546   def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4547             (VMOVNTDQAZrm addr:$src)>;
4548 }
4549
4550 let Predicates = [HasVLX], AddedComplexity = 400 in {
4551   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4552             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4553   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4554             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4555   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4556             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4557
4558   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4559             (VMOVNTDQAZ256rm addr:$src)>;
4560   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4561             (VMOVNTDQAZ256rm addr:$src)>;
4562   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4563             (VMOVNTDQAZ256rm addr:$src)>;
4564   def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4565             (VMOVNTDQAZ256rm addr:$src)>;
4566   def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4567             (VMOVNTDQAZ256rm addr:$src)>;
4568   def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4569             (VMOVNTDQAZ256rm addr:$src)>;
4570
4571   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4572             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4573   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4574             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4575   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4576             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4577
4578   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4579             (VMOVNTDQAZ128rm addr:$src)>;
4580   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4581             (VMOVNTDQAZ128rm addr:$src)>;
4582   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4583             (VMOVNTDQAZ128rm addr:$src)>;
4584   def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4585             (VMOVNTDQAZ128rm addr:$src)>;
4586   def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4587             (VMOVNTDQAZ128rm addr:$src)>;
4588   def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4589             (VMOVNTDQAZ128rm addr:$src)>;
4590 }
4591
4592 //===----------------------------------------------------------------------===//
4593 // AVX-512 - Integer arithmetic
4594 //
4595 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4596                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4597                            bit IsCommutable = 0> {
4598   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4599                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4600                     "$src2, $src1", "$src1, $src2",
4601                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4602                     IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4603                     Sched<[sched]>;
4604
4605   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4606                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4607                   "$src2, $src1", "$src1, $src2",
4608                   (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4609                   AVX512BIBase, EVEX_4V,
4610                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4611 }
4612
4613 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4614                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
4615                             bit IsCommutable = 0> :
4616            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4617   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4618                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4619                   "${src2}"#_.BroadcastStr#", $src1",
4620                   "$src1, ${src2}"#_.BroadcastStr,
4621                   (_.VT (OpNode _.RC:$src1,
4622                                 (_.BroadcastLdFrag addr:$src2)))>,
4623                   AVX512BIBase, EVEX_4V, EVEX_B,
4624                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4625 }
4626
4627 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4628                               AVX512VLVectorVTInfo VTInfo,
4629                               X86SchedWriteWidths sched, Predicate prd,
4630                               bit IsCommutable = 0> {
4631   let Predicates = [prd] in
4632     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4633                              IsCommutable>, EVEX_V512;
4634
4635   let Predicates = [prd, HasVLX] in {
4636     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4637                                 sched.YMM, IsCommutable>, EVEX_V256;
4638     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4639                                 sched.XMM, IsCommutable>, EVEX_V128;
4640   }
4641 }
4642
4643 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4644                                AVX512VLVectorVTInfo VTInfo,
4645                                X86SchedWriteWidths sched, Predicate prd,
4646                                bit IsCommutable = 0> {
4647   let Predicates = [prd] in
4648     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4649                              IsCommutable>, EVEX_V512;
4650
4651   let Predicates = [prd, HasVLX] in {
4652     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4653                                  sched.YMM, IsCommutable>, EVEX_V256;
4654     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4655                                  sched.XMM, IsCommutable>, EVEX_V128;
4656   }
4657 }
4658
4659 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4660                                 X86SchedWriteWidths sched, Predicate prd,
4661                                 bit IsCommutable = 0> {
4662   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4663                                   sched, prd, IsCommutable>,
4664                                   VEX_W, EVEX_CD8<64, CD8VF>;
4665 }
4666
4667 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668                                 X86SchedWriteWidths sched, Predicate prd,
4669                                 bit IsCommutable = 0> {
4670   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4671                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4672 }
4673
4674 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4675                                 X86SchedWriteWidths sched, Predicate prd,
4676                                 bit IsCommutable = 0> {
4677   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4678                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4679                                  VEX_WIG;
4680 }
4681
4682 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683                                 X86SchedWriteWidths sched, Predicate prd,
4684                                 bit IsCommutable = 0> {
4685   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4686                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4687                                  VEX_WIG;
4688 }
4689
4690 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4691                                  SDNode OpNode, X86SchedWriteWidths sched,
4692                                  Predicate prd, bit IsCommutable = 0> {
4693   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4694                                    IsCommutable>;
4695
4696   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4697                                    IsCommutable>;
4698 }
4699
4700 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4701                                  SDNode OpNode, X86SchedWriteWidths sched,
4702                                  Predicate prd, bit IsCommutable = 0> {
4703   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4704                                    IsCommutable>;
4705
4706   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4707                                    IsCommutable>;
4708 }
4709
4710 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4711                                   bits<8> opc_d, bits<8> opc_q,
4712                                   string OpcodeStr, SDNode OpNode,
4713                                   X86SchedWriteWidths sched,
4714                                   bit IsCommutable = 0> {
4715   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4716                                     sched, HasAVX512, IsCommutable>,
4717               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4718                                     sched, HasBWI, IsCommutable>;
4719 }
4720
4721 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4722                             X86FoldableSchedWrite sched,
4723                             SDNode OpNode,X86VectorVTInfo _Src,
4724                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4725                             bit IsCommutable = 0> {
4726   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4727                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4728                             "$src2, $src1","$src1, $src2",
4729                             (_Dst.VT (OpNode
4730                                          (_Src.VT _Src.RC:$src1),
4731                                          (_Src.VT _Src.RC:$src2))),
4732                             IsCommutable>,
4733                             AVX512BIBase, EVEX_4V, Sched<[sched]>;
4734   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4735                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4736                         "$src2, $src1", "$src1, $src2",
4737                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4738                                       (_Src.LdFrag addr:$src2)))>,
4739                         AVX512BIBase, EVEX_4V,
4740                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4741
4742   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4743                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4744                     OpcodeStr,
4745                     "${src2}"#_Brdct.BroadcastStr#", $src1",
4746                      "$src1, ${src2}"#_Brdct.BroadcastStr,
4747                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4748                                  (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4749                     AVX512BIBase, EVEX_4V, EVEX_B,
4750                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4751 }
4752
4753 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4754                                     SchedWriteVecALU, 1>;
4755 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4756                                     SchedWriteVecALU, 0>;
4757 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4758                                     SchedWriteVecALU, HasBWI, 1>;
4759 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4760                                     SchedWriteVecALU, HasBWI, 0>;
4761 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4762                                      SchedWriteVecALU, HasBWI, 1>;
4763 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4764                                      SchedWriteVecALU, HasBWI, 0>;
4765 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4766                                     SchedWritePMULLD, HasAVX512, 1>, T8PD;
4767 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4768                                     SchedWriteVecIMul, HasBWI, 1>;
4769 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4770                                     SchedWriteVecIMul, HasDQI, 1>, T8PD,
4771                                     NotEVEX2VEXConvertible;
4772 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4773                                     HasBWI, 1>;
4774 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4775                                      HasBWI, 1>;
4776 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4777                                       SchedWriteVecIMul, HasBWI, 1>, T8PD;
4778 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4779                                    SchedWriteVecALU, HasBWI, 1>;
4780 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4781                                     SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4782 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4783                                      SchedWriteVecIMul, HasAVX512, 1>;
4784
4785 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4786                             X86SchedWriteWidths sched,
4787                             AVX512VLVectorVTInfo _SrcVTInfo,
4788                             AVX512VLVectorVTInfo _DstVTInfo,
4789                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4790   let Predicates = [prd] in
4791     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4792                                  _SrcVTInfo.info512, _DstVTInfo.info512,
4793                                  v8i64_info, IsCommutable>,
4794                                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4795   let Predicates = [HasVLX, prd] in {
4796     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4797                                       _SrcVTInfo.info256, _DstVTInfo.info256,
4798                                       v4i64x_info, IsCommutable>,
4799                                       EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4800     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4801                                       _SrcVTInfo.info128, _DstVTInfo.info128,
4802                                       v2i64x_info, IsCommutable>,
4803                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4804   }
4805 }
4806
4807 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4808                                 avx512vl_i8_info, avx512vl_i8_info,
4809                                 X86multishift, HasVBMI, 0>, T8PD;
4810
4811 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4812                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4813                             X86FoldableSchedWrite sched> {
4814   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4815                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4816                     OpcodeStr,
4817                     "${src2}"#_Src.BroadcastStr#", $src1",
4818                      "$src1, ${src2}"#_Src.BroadcastStr,
4819                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4820                                  (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4821                     EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4822                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4823 }
4824
4825 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4826                             SDNode OpNode,X86VectorVTInfo _Src,
4827                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4828                             bit IsCommutable = 0> {
4829   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4830                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4831                             "$src2, $src1","$src1, $src2",
4832                             (_Dst.VT (OpNode
4833                                          (_Src.VT _Src.RC:$src1),
4834                                          (_Src.VT _Src.RC:$src2))),
4835                             IsCommutable, IsCommutable>,
4836                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4837   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4838                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4839                         "$src2, $src1", "$src1, $src2",
4840                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4841                                       (_Src.LdFrag addr:$src2)))>,
4842                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4843                          Sched<[sched.Folded, sched.ReadAfterFold]>;
4844 }
4845
4846 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4847                                     SDNode OpNode> {
4848   let Predicates = [HasBWI] in
4849   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4850                                  v32i16_info, SchedWriteShuffle.ZMM>,
4851                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4852                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4853   let Predicates = [HasBWI, HasVLX] in {
4854     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4855                                      v16i16x_info, SchedWriteShuffle.YMM>,
4856                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4857                                       v16i16x_info, SchedWriteShuffle.YMM>,
4858                                       EVEX_V256;
4859     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4860                                      v8i16x_info, SchedWriteShuffle.XMM>,
4861                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4862                                       v8i16x_info, SchedWriteShuffle.XMM>,
4863                                       EVEX_V128;
4864   }
4865 }
4866 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4867                             SDNode OpNode> {
4868   let Predicates = [HasBWI] in
4869   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4870                                 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4871   let Predicates = [HasBWI, HasVLX] in {
4872     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4873                                      v32i8x_info, SchedWriteShuffle.YMM>,
4874                                      EVEX_V256, VEX_WIG;
4875     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4876                                      v16i8x_info, SchedWriteShuffle.XMM>,
4877                                      EVEX_V128, VEX_WIG;
4878   }
4879 }
4880
4881 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4882                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
4883                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4884   let Predicates = [HasBWI] in
4885   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4886                                 _Dst.info512, SchedWriteVecIMul.ZMM,
4887                                 IsCommutable>, EVEX_V512;
4888   let Predicates = [HasBWI, HasVLX] in {
4889     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4890                                      _Dst.info256, SchedWriteVecIMul.YMM,
4891                                      IsCommutable>, EVEX_V256;
4892     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4893                                      _Dst.info128, SchedWriteVecIMul.XMM,
4894                                      IsCommutable>, EVEX_V128;
4895   }
4896 }
4897
4898 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4899 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4900 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4901 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4902
4903 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4904                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4905 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4906                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4907
4908 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4909                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4910 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4911                                     SchedWriteVecALU, HasBWI, 1>;
4912 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4913                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4914 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4915                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4916                                     NotEVEX2VEXConvertible;
4917
4918 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4919                                     SchedWriteVecALU, HasBWI, 1>;
4920 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4921                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4922 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4923                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4924 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4925                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4926                                     NotEVEX2VEXConvertible;
4927
4928 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4929                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4930 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4931                                     SchedWriteVecALU, HasBWI, 1>;
4932 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4933                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4934 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4935                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4936                                     NotEVEX2VEXConvertible;
4937
4938 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4939                                     SchedWriteVecALU, HasBWI, 1>;
4940 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4941                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4942 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4943                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4944 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4945                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4946                                     NotEVEX2VEXConvertible;
4947
4948 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4949 let Predicates = [HasDQI, NoVLX] in {
4950   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4951             (EXTRACT_SUBREG
4952                 (VPMULLQZrr
4953                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4954                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4955              sub_ymm)>;
4956   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4957             (EXTRACT_SUBREG
4958                 (VPMULLQZrmb
4959                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4960                     addr:$src2),
4961              sub_ymm)>;
4962
4963   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4964             (EXTRACT_SUBREG
4965                 (VPMULLQZrr
4966                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4967                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4968              sub_xmm)>;
4969   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4970             (EXTRACT_SUBREG
4971                 (VPMULLQZrmb
4972                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4973                     addr:$src2),
4974              sub_xmm)>;
4975 }
4976
4977 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4978   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4979             (EXTRACT_SUBREG
4980                 (!cast<Instruction>(Instr#"rr")
4981                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4982                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4983              sub_ymm)>;
4984   def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4985             (EXTRACT_SUBREG
4986                 (!cast<Instruction>(Instr#"rmb")
4987                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4988                     addr:$src2),
4989              sub_ymm)>;
4990
4991   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4992             (EXTRACT_SUBREG
4993                 (!cast<Instruction>(Instr#"rr")
4994                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4995                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4996              sub_xmm)>;
4997   def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4998             (EXTRACT_SUBREG
4999                 (!cast<Instruction>(Instr#"rmb")
5000                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5001                     addr:$src2),
5002              sub_xmm)>;
5003 }
5004
5005 let Predicates = [HasAVX512, NoVLX] in {
5006   defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5007   defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5008   defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5009   defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5010 }
5011
5012 //===----------------------------------------------------------------------===//
5013 // AVX-512  Logical Instructions
5014 //===----------------------------------------------------------------------===//
5015
5016 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5017                                    SchedWriteVecLogic, HasAVX512, 1>;
5018 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5019                                   SchedWriteVecLogic, HasAVX512, 1>;
5020 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5021                                    SchedWriteVecLogic, HasAVX512, 1>;
5022 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5023                                     SchedWriteVecLogic, HasAVX512>;
5024
5025 let Predicates = [HasVLX] in {
5026   def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5027             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5028   def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5029             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5030
5031   def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5032             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5033   def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5034             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5035
5036   def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5037             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5038   def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5039             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5040
5041   def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5042             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5043   def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5044             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5045
5046   def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5047             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5048   def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5049             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5050
5051   def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5052             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5053   def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5054             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5055
5056   def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5057             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5058   def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5059             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5060
5061   def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5062             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5063   def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5064             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5065
5066   def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5067             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5068   def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5069             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5070
5071   def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5072             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5073   def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5074             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5075
5076   def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5077             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5078   def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5079             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5080
5081   def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5082             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5083   def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5084             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5085
5086   def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5087             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5088   def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5089             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5090
5091   def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5092             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5093   def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5094             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5095
5096   def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5097             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5098   def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5099             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5100
5101   def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5102             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5103   def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5104             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5105 }
5106
5107 let Predicates = [HasAVX512] in {
5108   def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5109             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5110   def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5111             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5112
5113   def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5114             (VPORQZrr VR512:$src1, VR512:$src2)>;
5115   def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5116             (VPORQZrr VR512:$src1, VR512:$src2)>;
5117
5118   def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5119             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5120   def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5121             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5122
5123   def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5124             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5125   def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5126             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5127
5128   def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5129             (VPANDQZrm VR512:$src1, addr:$src2)>;
5130   def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5131             (VPANDQZrm VR512:$src1, addr:$src2)>;
5132
5133   def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5134             (VPORQZrm VR512:$src1, addr:$src2)>;
5135   def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5136             (VPORQZrm VR512:$src1, addr:$src2)>;
5137
5138   def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5139             (VPXORQZrm VR512:$src1, addr:$src2)>;
5140   def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5141             (VPXORQZrm VR512:$src1, addr:$src2)>;
5142
5143   def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5144             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5145   def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5146             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5147 }
5148
5149 // Patterns to catch vselect with different type than logic op.
5150 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5151                                     X86VectorVTInfo _,
5152                                     X86VectorVTInfo IntInfo> {
5153   // Masked register-register logical operations.
5154   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5155                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5156                    _.RC:$src0)),
5157             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5158              _.RC:$src1, _.RC:$src2)>;
5159
5160   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5161                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5162                    _.ImmAllZerosV)),
5163             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5164              _.RC:$src2)>;
5165
5166   // Masked register-memory logical operations.
5167   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5168                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5169                                             (load addr:$src2)))),
5170                    _.RC:$src0)),
5171             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5172              _.RC:$src1, addr:$src2)>;
5173   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5174                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5175                                             (load addr:$src2)))),
5176                    _.ImmAllZerosV)),
5177             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5178              addr:$src2)>;
5179 }
5180
5181 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5182                                          X86VectorVTInfo _,
5183                                          X86VectorVTInfo IntInfo> {
5184   // Register-broadcast logical operations.
5185   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5186                    (bitconvert
5187                     (IntInfo.VT (OpNode _.RC:$src1,
5188                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5189                    _.RC:$src0)),
5190             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5191              _.RC:$src1, addr:$src2)>;
5192   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5193                    (bitconvert
5194                     (IntInfo.VT (OpNode _.RC:$src1,
5195                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5196                    _.ImmAllZerosV)),
5197             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5198              _.RC:$src1, addr:$src2)>;
5199 }
5200
5201 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5202                                          AVX512VLVectorVTInfo SelectInfo,
5203                                          AVX512VLVectorVTInfo IntInfo> {
5204 let Predicates = [HasVLX] in {
5205   defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5206                                  IntInfo.info128>;
5207   defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5208                                  IntInfo.info256>;
5209 }
5210 let Predicates = [HasAVX512] in {
5211   defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5212                                  IntInfo.info512>;
5213 }
5214 }
5215
5216 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5217                                                AVX512VLVectorVTInfo SelectInfo,
5218                                                AVX512VLVectorVTInfo IntInfo> {
5219 let Predicates = [HasVLX] in {
5220   defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5221                                        SelectInfo.info128, IntInfo.info128>;
5222   defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5223                                        SelectInfo.info256, IntInfo.info256>;
5224 }
5225 let Predicates = [HasAVX512] in {
5226   defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5227                                        SelectInfo.info512, IntInfo.info512>;
5228 }
5229 }
5230
5231 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5232   // i64 vselect with i32/i16/i8 logic op
5233   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5234                                        avx512vl_i32_info>;
5235   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5236                                        avx512vl_i16_info>;
5237   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5238                                        avx512vl_i8_info>;
5239
5240   // i32 vselect with i64/i16/i8 logic op
5241   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5242                                        avx512vl_i64_info>;
5243   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5244                                        avx512vl_i16_info>;
5245   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5246                                        avx512vl_i8_info>;
5247
5248   // f32 vselect with i64/i32/i16/i8 logic op
5249   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5250                                        avx512vl_i64_info>;
5251   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5252                                        avx512vl_i32_info>;
5253   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5254                                        avx512vl_i16_info>;
5255   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5256                                        avx512vl_i8_info>;
5257
5258   // f64 vselect with i64/i32/i16/i8 logic op
5259   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5260                                        avx512vl_i64_info>;
5261   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5262                                        avx512vl_i32_info>;
5263   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5264                                        avx512vl_i16_info>;
5265   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5266                                        avx512vl_i8_info>;
5267
5268   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5269                                              avx512vl_f32_info,
5270                                              avx512vl_i32_info>;
5271   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5272                                              avx512vl_f64_info,
5273                                              avx512vl_i64_info>;
5274 }
5275
5276 defm : avx512_logical_lowering_types<"VPAND", and>;
5277 defm : avx512_logical_lowering_types<"VPOR",  or>;
5278 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5279 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5280
5281 //===----------------------------------------------------------------------===//
5282 // AVX-512  FP arithmetic
5283 //===----------------------------------------------------------------------===//
5284
5285 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5286                             SDNode OpNode, SDNode VecNode,
5287                             X86FoldableSchedWrite sched, bit IsCommutable> {
5288   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5289   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5290                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5291                            "$src2, $src1", "$src1, $src2",
5292                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5293                            Sched<[sched]>;
5294
5295   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5296                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5297                          "$src2, $src1", "$src1, $src2",
5298                          (_.VT (VecNode _.RC:$src1,
5299                                         (_.ScalarIntMemFrags addr:$src2)))>,
5300                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5301   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5302   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5303                          (ins _.FRC:$src1, _.FRC:$src2),
5304                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5305                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5306                           Sched<[sched]> {
5307     let isCommutable = IsCommutable;
5308   }
5309   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5310                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5311                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5312                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5313                          (_.ScalarLdFrag addr:$src2)))]>,
5314                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5315   }
5316   }
5317 }
5318
5319 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5320                                   SDNode VecNode, X86FoldableSchedWrite sched,
5321                                   bit IsCommutable = 0> {
5322   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5323   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5324                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5325                           "$rc, $src2, $src1", "$src1, $src2, $rc",
5326                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5327                           (i32 timm:$rc))>,
5328                           EVEX_B, EVEX_RC, Sched<[sched]>;
5329 }
5330 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5331                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5332                                 X86FoldableSchedWrite sched, bit IsCommutable,
5333                                 string EVEX2VexOvrd> {
5334   let ExeDomain = _.ExeDomain in {
5335   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5336                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5337                            "$src2, $src1", "$src1, $src2",
5338                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5339                            Sched<[sched]>, SIMD_EXC;
5340
5341   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5342                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5343                          "$src2, $src1", "$src1, $src2",
5344                          (_.VT (VecNode _.RC:$src1,
5345                                         (_.ScalarIntMemFrags addr:$src2)))>,
5346                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5347
5348   let isCodeGenOnly = 1, Predicates = [HasAVX512],
5349       Uses = [MXCSR], mayRaiseFPException = 1 in {
5350   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5351                          (ins _.FRC:$src1, _.FRC:$src2),
5352                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5353                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5354                           Sched<[sched]>,
5355                           EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5356     let isCommutable = IsCommutable;
5357   }
5358   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5359                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5360                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5361                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5362                          (_.ScalarLdFrag addr:$src2)))]>,
5363                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5364                          EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5365   }
5366
5367   let Uses = [MXCSR] in
5368   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5369                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5370                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5371                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5372                             EVEX_B, Sched<[sched]>;
5373   }
5374 }
5375
5376 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5377                                 SDNode VecNode, SDNode RndNode,
5378                                 X86SchedWriteSizes sched, bit IsCommutable> {
5379   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5380                               sched.PS.Scl, IsCommutable>,
5381              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5382                               sched.PS.Scl, IsCommutable>,
5383                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5384   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5385                               sched.PD.Scl, IsCommutable>,
5386              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5387                               sched.PD.Scl, IsCommutable>,
5388                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5389 }
5390
5391 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5392                               SDNode VecNode, SDNode SaeNode,
5393                               X86SchedWriteSizes sched, bit IsCommutable> {
5394   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5395                               VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5396                               NAME#"SS">,
5397                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5398   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5399                               VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5400                               NAME#"SD">,
5401                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5402 }
5403 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5404                                  SchedWriteFAddSizes, 1>;
5405 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5406                                  SchedWriteFMulSizes, 1>;
5407 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5408                                  SchedWriteFAddSizes, 0>;
5409 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5410                                  SchedWriteFDivSizes, 0>;
5411 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5412                                SchedWriteFCmpSizes, 0>;
5413 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5414                                SchedWriteFCmpSizes, 0>;
5415
5416 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5417 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5418 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5419                                     X86VectorVTInfo _, SDNode OpNode,
5420                                     X86FoldableSchedWrite sched,
5421                                     string EVEX2VEXOvrd> {
5422   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5423   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5424                          (ins _.FRC:$src1, _.FRC:$src2),
5425                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5426                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5427                           Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5428     let isCommutable = 1;
5429   }
5430   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5431                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5432                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5433                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5434                          (_.ScalarLdFrag addr:$src2)))]>,
5435                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5436                          EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5437   }
5438 }
5439 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5440                                          SchedWriteFCmp.Scl, "VMINCSS">, XS,
5441                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5442
5443 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5444                                          SchedWriteFCmp.Scl, "VMINCSD">, XD,
5445                                          VEX_W, EVEX_4V, VEX_LIG,
5446                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5447
5448 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5449                                          SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5450                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5451
5452 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5453                                          SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5454                                          VEX_W, EVEX_4V, VEX_LIG,
5455                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5456
5457 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5458                             SDPatternOperator MaskOpNode,
5459                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
5460                             bit IsCommutable,
5461                             bit IsKCommutable = IsCommutable> {
5462   let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5463       Uses = [MXCSR], mayRaiseFPException = 1 in {
5464   defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5465                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5466                   "$src2, $src1", "$src1, $src2",
5467                   (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5468                   (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5469                   IsKCommutable, IsKCommutable>,
5470                   EVEX_4V, Sched<[sched]>;
5471   let mayLoad = 1 in {
5472     defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5473                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5474                     "$src2, $src1", "$src1, $src2",
5475                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5476                     (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5477                     EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5478     defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5479                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5480                      "${src2}"#_.BroadcastStr#", $src1",
5481                      "$src1, ${src2}"#_.BroadcastStr,
5482                      (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5483                      (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5484                      EVEX_4V, EVEX_B,
5485                      Sched<[sched.Folded, sched.ReadAfterFold]>;
5486     }
5487   }
5488 }
5489
5490 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5491                                   SDPatternOperator OpNodeRnd,
5492                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5493   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5494   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5495                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
5496                   "$rc, $src2, $src1", "$src1, $src2, $rc",
5497                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5498                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5499 }
5500
5501 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5502                                 SDPatternOperator OpNodeSAE,
5503                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5504   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5505   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5506                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5507                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5508                   (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5509                   EVEX_4V, EVEX_B, Sched<[sched]>;
5510 }
5511
5512 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5513                              SDPatternOperator MaskOpNode,
5514                              Predicate prd, X86SchedWriteSizes sched,
5515                              bit IsCommutable = 0,
5516                              bit IsPD128Commutable = IsCommutable> {
5517   let Predicates = [prd] in {
5518   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5519                               sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5520                               EVEX_CD8<32, CD8VF>;
5521   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5522                               sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5523                               EVEX_CD8<64, CD8VF>;
5524   }
5525
5526     // Define only if AVX512VL feature is present.
5527   let Predicates = [prd, HasVLX] in {
5528     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5529                                    sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5530                                    EVEX_CD8<32, CD8VF>;
5531     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5532                                    sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5533                                    EVEX_CD8<32, CD8VF>;
5534     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5535                                    sched.PD.XMM, IsPD128Commutable,
5536                                    IsCommutable>, EVEX_V128, PD, VEX_W,
5537                                    EVEX_CD8<64, CD8VF>;
5538     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5539                                    sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5540                                    EVEX_CD8<64, CD8VF>;
5541   }
5542 }
5543
5544 let Uses = [MXCSR] in
5545 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5546                                    X86SchedWriteSizes sched> {
5547   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5548                                     v16f32_info>,
5549                                     EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5550   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5551                                     v8f64_info>,
5552                                     EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5553 }
5554
5555 let Uses = [MXCSR] in
5556 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5557                                  X86SchedWriteSizes sched> {
5558   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5559                                   v16f32_info>,
5560                                   EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5561   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5562                                   v8f64_info>,
5563                                   EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5564 }
5565
5566 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5567                               SchedWriteFAddSizes, 1>,
5568             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5569 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5570                               SchedWriteFMulSizes, 1>,
5571             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5572 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5573                               SchedWriteFAddSizes>,
5574             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5575 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5576                               SchedWriteFDivSizes>,
5577             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5578 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5579                               SchedWriteFCmpSizes, 0>,
5580             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5581 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5582                               SchedWriteFCmpSizes, 0>,
5583             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5584 let isCodeGenOnly = 1 in {
5585   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5586                                  SchedWriteFCmpSizes, 1>;
5587   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5588                                  SchedWriteFCmpSizes, 1>;
5589 }
5590 let Uses = []<Register>, mayRaiseFPException = 0 in {
5591 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5592                                SchedWriteFLogicSizes, 1>;
5593 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5594                                SchedWriteFLogicSizes, 0>;
5595 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5596                                SchedWriteFLogicSizes, 1>;
5597 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5598                                SchedWriteFLogicSizes, 1>;
5599 }
5600
5601 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5602                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5603   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5604   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5605                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5606                   "$src2, $src1", "$src1, $src2",
5607                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5608                   EVEX_4V, Sched<[sched]>;
5609   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5610                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5611                   "$src2, $src1", "$src1, $src2",
5612                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5613                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5614   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5615                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5616                    "${src2}"#_.BroadcastStr#", $src1",
5617                    "$src1, ${src2}"#_.BroadcastStr,
5618                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5619                    EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5620   }
5621 }
5622
5623 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5624                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5625   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5626   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5627                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5628                   "$src2, $src1", "$src1, $src2",
5629                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5630                   Sched<[sched]>;
5631   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5632                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5633                   "$src2, $src1", "$src1, $src2",
5634                   (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5635                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5636   }
5637 }
5638
5639 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5640                                 X86SchedWriteWidths sched> {
5641   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5642              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5643                               EVEX_V512, EVEX_CD8<32, CD8VF>;
5644   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5645              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5646                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5647   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5648              avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5649                                     X86scalefsRnd, sched.Scl>,
5650                                     EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5651   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5652              avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5653                                     X86scalefsRnd, sched.Scl>,
5654                                     EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5655
5656   // Define only if AVX512VL feature is present.
5657   let Predicates = [HasVLX] in {
5658     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5659                                    EVEX_V128, EVEX_CD8<32, CD8VF>;
5660     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5661                                    EVEX_V256, EVEX_CD8<32, CD8VF>;
5662     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5663                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5664     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5665                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5666   }
5667 }
5668 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5669                                     SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5670
5671 //===----------------------------------------------------------------------===//
5672 // AVX-512  VPTESTM instructions
5673 //===----------------------------------------------------------------------===//
5674
5675 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5676                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
5677                          string Name> {
5678   // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5679   // There are just too many permutations due to commutability and bitcasts.
5680   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5681   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5682                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5683                       "$src2, $src1", "$src1, $src2",
5684                    (null_frag), (null_frag), 1>,
5685                    EVEX_4V, Sched<[sched]>;
5686   let mayLoad = 1 in
5687   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5688                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5689                        "$src2, $src1", "$src1, $src2",
5690                    (null_frag), (null_frag)>,
5691                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5692                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5693   }
5694 }
5695
5696 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5697                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5698   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5699   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5700                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5701                     "${src2}"#_.BroadcastStr#", $src1",
5702                     "$src1, ${src2}"#_.BroadcastStr,
5703                     (null_frag), (null_frag)>,
5704                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5705                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5706 }
5707
5708 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5709                                   X86SchedWriteWidths sched,
5710                                   AVX512VLVectorVTInfo _> {
5711   let Predicates  = [HasAVX512] in
5712   defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5713            avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5714
5715   let Predicates = [HasAVX512, HasVLX] in {
5716   defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5717               avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5718   defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5719               avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5720   }
5721 }
5722
5723 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5724                             X86SchedWriteWidths sched> {
5725   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5726                                  avx512vl_i32_info>;
5727   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5728                                  avx512vl_i64_info>, VEX_W;
5729 }
5730
5731 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5732                             X86SchedWriteWidths sched> {
5733   let Predicates = [HasBWI] in {
5734   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5735                             v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5736   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5737                             v64i8_info, NAME#"B">, EVEX_V512;
5738   }
5739   let Predicates = [HasVLX, HasBWI] in {
5740
5741   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5742                             v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5743   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5744                             v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5745   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5746                             v32i8x_info, NAME#"B">, EVEX_V256;
5747   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5748                             v16i8x_info, NAME#"B">, EVEX_V128;
5749   }
5750 }
5751
5752 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5753                                    X86SchedWriteWidths sched> :
5754   avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5755   avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5756
5757 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5758                                          SchedWriteVecLogic>, T8PD;
5759 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5760                                          SchedWriteVecLogic>, T8XS;
5761
5762 //===----------------------------------------------------------------------===//
5763 // AVX-512  Shift instructions
5764 //===----------------------------------------------------------------------===//
5765
5766 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5767                             string OpcodeStr, SDNode OpNode,
5768                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5769   let ExeDomain = _.ExeDomain in {
5770   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5771                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5772                       "$src2, $src1", "$src1, $src2",
5773                    (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5774                    Sched<[sched]>;
5775   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5776                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5777                        "$src2, $src1", "$src1, $src2",
5778                    (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5779                           (i8 timm:$src2)))>,
5780                    Sched<[sched.Folded]>;
5781   }
5782 }
5783
5784 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5785                              string OpcodeStr, SDNode OpNode,
5786                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5787   let ExeDomain = _.ExeDomain in
5788   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5789                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5790       "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5791      (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5792      EVEX_B, Sched<[sched.Folded]>;
5793 }
5794
5795 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5796                             X86FoldableSchedWrite sched, ValueType SrcVT,
5797                             X86VectorVTInfo _> {
5798    // src2 is always 128-bit
5799   let ExeDomain = _.ExeDomain in {
5800   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5801                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5802                       "$src2, $src1", "$src1, $src2",
5803                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5804                    AVX512BIBase, EVEX_4V, Sched<[sched]>;
5805   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5806                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5807                        "$src2, $src1", "$src1, $src2",
5808                    (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5809                    AVX512BIBase,
5810                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5811   }
5812 }
5813
5814 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5815                               X86SchedWriteWidths sched, ValueType SrcVT,
5816                               AVX512VLVectorVTInfo VTInfo,
5817                               Predicate prd> {
5818   let Predicates = [prd] in
5819   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5820                                VTInfo.info512>, EVEX_V512,
5821                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5822   let Predicates = [prd, HasVLX] in {
5823   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5824                                VTInfo.info256>, EVEX_V256,
5825                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5826   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5827                                VTInfo.info128>, EVEX_V128,
5828                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5829   }
5830 }
5831
5832 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5833                               string OpcodeStr, SDNode OpNode,
5834                               X86SchedWriteWidths sched,
5835                               bit NotEVEX2VEXConvertibleQ = 0> {
5836   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5837                               avx512vl_i32_info, HasAVX512>;
5838   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5839   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5840                               avx512vl_i64_info, HasAVX512>, VEX_W;
5841   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5842                               avx512vl_i16_info, HasBWI>;
5843 }
5844
5845 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5846                                   string OpcodeStr, SDNode OpNode,
5847                                   X86SchedWriteWidths sched,
5848                                   AVX512VLVectorVTInfo VTInfo> {
5849   let Predicates = [HasAVX512] in
5850   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5851                               sched.ZMM, VTInfo.info512>,
5852              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5853                                VTInfo.info512>, EVEX_V512;
5854   let Predicates = [HasAVX512, HasVLX] in {
5855   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5856                               sched.YMM, VTInfo.info256>,
5857              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5858                                VTInfo.info256>, EVEX_V256;
5859   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5860                               sched.XMM, VTInfo.info128>,
5861              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5862                                VTInfo.info128>, EVEX_V128;
5863   }
5864 }
5865
5866 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5867                               string OpcodeStr, SDNode OpNode,
5868                               X86SchedWriteWidths sched> {
5869   let Predicates = [HasBWI] in
5870   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5871                                sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5872   let Predicates = [HasVLX, HasBWI] in {
5873   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5874                                sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5875   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5876                                sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5877   }
5878 }
5879
5880 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5881                                Format ImmFormR, Format ImmFormM,
5882                                string OpcodeStr, SDNode OpNode,
5883                                X86SchedWriteWidths sched,
5884                                bit NotEVEX2VEXConvertibleQ = 0> {
5885   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5886                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5887   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5888   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5889                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5890 }
5891
5892 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5893                                  SchedWriteVecShiftImm>,
5894              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5895                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5896
5897 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5898                                  SchedWriteVecShiftImm>,
5899              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5900                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5901
5902 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5903                                  SchedWriteVecShiftImm, 1>,
5904              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5905                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5906
5907 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5908                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5909 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5910                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5911
5912 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5913                                 SchedWriteVecShift>;
5914 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5915                                 SchedWriteVecShift, 1>;
5916 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5917                                 SchedWriteVecShift>;
5918
5919 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5920 let Predicates = [HasAVX512, NoVLX] in {
5921   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5922             (EXTRACT_SUBREG (v8i64
5923               (VPSRAQZrr
5924                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5925                  VR128X:$src2)), sub_ymm)>;
5926
5927   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5928             (EXTRACT_SUBREG (v8i64
5929               (VPSRAQZrr
5930                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5931                  VR128X:$src2)), sub_xmm)>;
5932
5933   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5934             (EXTRACT_SUBREG (v8i64
5935               (VPSRAQZri
5936                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5937                  timm:$src2)), sub_ymm)>;
5938
5939   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5940             (EXTRACT_SUBREG (v8i64
5941               (VPSRAQZri
5942                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5943                  timm:$src2)), sub_xmm)>;
5944 }
5945
5946 //===-------------------------------------------------------------------===//
5947 // Variable Bit Shifts
5948 //===-------------------------------------------------------------------===//
5949
5950 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5951                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5952   let ExeDomain = _.ExeDomain in {
5953   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5954                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5955                       "$src2, $src1", "$src1, $src2",
5956                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5957                    AVX5128IBase, EVEX_4V, Sched<[sched]>;
5958   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5959                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5960                        "$src2, $src1", "$src1, $src2",
5961                    (_.VT (OpNode _.RC:$src1,
5962                    (_.VT (_.LdFrag addr:$src2))))>,
5963                    AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5964                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5965   }
5966 }
5967
5968 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5969                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5970   let ExeDomain = _.ExeDomain in
5971   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5972                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5973                     "${src2}"#_.BroadcastStr#", $src1",
5974                     "$src1, ${src2}"#_.BroadcastStr,
5975                     (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5976                     AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5977                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5978 }
5979
5980 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5981                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5982   let Predicates  = [HasAVX512] in
5983   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5984            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5985
5986   let Predicates = [HasAVX512, HasVLX] in {
5987   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5988               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5989   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5990               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5991   }
5992 }
5993
5994 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5995                                   SDNode OpNode, X86SchedWriteWidths sched> {
5996   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5997                                  avx512vl_i32_info>;
5998   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5999                                  avx512vl_i64_info>, VEX_W;
6000 }
6001
6002 // Use 512bit version to implement 128/256 bit in case NoVLX.
6003 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6004                                      SDNode OpNode, list<Predicate> p> {
6005   let Predicates = p in {
6006   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6007                                   (_.info256.VT _.info256.RC:$src2))),
6008             (EXTRACT_SUBREG
6009                 (!cast<Instruction>(OpcodeStr#"Zrr")
6010                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6011                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6012              sub_ymm)>;
6013
6014   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6015                                   (_.info128.VT _.info128.RC:$src2))),
6016             (EXTRACT_SUBREG
6017                 (!cast<Instruction>(OpcodeStr#"Zrr")
6018                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6019                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6020              sub_xmm)>;
6021   }
6022 }
6023 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6024                               SDNode OpNode, X86SchedWriteWidths sched> {
6025   let Predicates = [HasBWI] in
6026   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6027               EVEX_V512, VEX_W;
6028   let Predicates = [HasVLX, HasBWI] in {
6029
6030   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6031               EVEX_V256, VEX_W;
6032   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6033               EVEX_V128, VEX_W;
6034   }
6035 }
6036
6037 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6038               avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6039
6040 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6041               avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6042
6043 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6044               avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6045
6046 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6047 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6048
6049 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6050 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6051 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6052 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6053
6054
6055 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6056 let Predicates = [HasAVX512, NoVLX] in {
6057   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6058             (EXTRACT_SUBREG (v8i64
6059               (VPROLVQZrr
6060                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6061                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6062                        sub_xmm)>;
6063   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6064             (EXTRACT_SUBREG (v8i64
6065               (VPROLVQZrr
6066                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6067                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6068                        sub_ymm)>;
6069
6070   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6071             (EXTRACT_SUBREG (v16i32
6072               (VPROLVDZrr
6073                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6074                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6075                         sub_xmm)>;
6076   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6077             (EXTRACT_SUBREG (v16i32
6078               (VPROLVDZrr
6079                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6080                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6081                         sub_ymm)>;
6082
6083   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6084             (EXTRACT_SUBREG (v8i64
6085               (VPROLQZri
6086                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6087                         timm:$src2)), sub_xmm)>;
6088   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6089             (EXTRACT_SUBREG (v8i64
6090               (VPROLQZri
6091                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6092                        timm:$src2)), sub_ymm)>;
6093
6094   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6095             (EXTRACT_SUBREG (v16i32
6096               (VPROLDZri
6097                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6098                         timm:$src2)), sub_xmm)>;
6099   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6100             (EXTRACT_SUBREG (v16i32
6101               (VPROLDZri
6102                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6103                         timm:$src2)), sub_ymm)>;
6104 }
6105
6106 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6107 let Predicates = [HasAVX512, NoVLX] in {
6108   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6109             (EXTRACT_SUBREG (v8i64
6110               (VPRORVQZrr
6111                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6112                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6113                        sub_xmm)>;
6114   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6115             (EXTRACT_SUBREG (v8i64
6116               (VPRORVQZrr
6117                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6118                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6119                        sub_ymm)>;
6120
6121   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6122             (EXTRACT_SUBREG (v16i32
6123               (VPRORVDZrr
6124                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6125                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6126                         sub_xmm)>;
6127   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6128             (EXTRACT_SUBREG (v16i32
6129               (VPRORVDZrr
6130                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6131                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6132                         sub_ymm)>;
6133
6134   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6135             (EXTRACT_SUBREG (v8i64
6136               (VPRORQZri
6137                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6138                         timm:$src2)), sub_xmm)>;
6139   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6140             (EXTRACT_SUBREG (v8i64
6141               (VPRORQZri
6142                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6143                        timm:$src2)), sub_ymm)>;
6144
6145   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6146             (EXTRACT_SUBREG (v16i32
6147               (VPRORDZri
6148                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6149                         timm:$src2)), sub_xmm)>;
6150   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6151             (EXTRACT_SUBREG (v16i32
6152               (VPRORDZri
6153                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6154                         timm:$src2)), sub_ymm)>;
6155 }
6156
6157 //===-------------------------------------------------------------------===//
6158 // 1-src variable permutation VPERMW/D/Q
6159 //===-------------------------------------------------------------------===//
6160
6161 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6162                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6163   let Predicates  = [HasAVX512] in
6164   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6165            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6166
6167   let Predicates = [HasAVX512, HasVLX] in
6168   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6169               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6170 }
6171
6172 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6173                                  string OpcodeStr, SDNode OpNode,
6174                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6175   let Predicates = [HasAVX512] in
6176   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6177                               sched, VTInfo.info512>,
6178              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6179                                sched, VTInfo.info512>, EVEX_V512;
6180   let Predicates = [HasAVX512, HasVLX] in
6181   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6182                               sched, VTInfo.info256>,
6183              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6184                                sched, VTInfo.info256>, EVEX_V256;
6185 }
6186
6187 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6188                               Predicate prd, SDNode OpNode,
6189                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6190   let Predicates = [prd] in
6191   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6192               EVEX_V512 ;
6193   let Predicates = [HasVLX, prd] in {
6194   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6195               EVEX_V256 ;
6196   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6197               EVEX_V128 ;
6198   }
6199 }
6200
6201 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6202                                WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6203 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6204                                WriteVarShuffle256, avx512vl_i8_info>;
6205
6206 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6207                                     WriteVarShuffle256, avx512vl_i32_info>;
6208 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6209                                     WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6210 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6211                                      WriteFVarShuffle256, avx512vl_f32_info>;
6212 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6213                                      WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6214
6215 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6216                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6217                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6218 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6219                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6220                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6221
6222 //===----------------------------------------------------------------------===//
6223 // AVX-512 - VPERMIL
6224 //===----------------------------------------------------------------------===//
6225
6226 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6227                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
6228                              X86VectorVTInfo Ctrl> {
6229   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6230                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6231                   "$src2, $src1", "$src1, $src2",
6232                   (_.VT (OpNode _.RC:$src1,
6233                                (Ctrl.VT Ctrl.RC:$src2)))>,
6234                   T8PD, EVEX_4V, Sched<[sched]>;
6235   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6236                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6237                   "$src2, $src1", "$src1, $src2",
6238                   (_.VT (OpNode
6239                            _.RC:$src1,
6240                            (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6241                   T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6242                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6243   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6244                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6245                    "${src2}"#_.BroadcastStr#", $src1",
6246                    "$src1, ${src2}"#_.BroadcastStr,
6247                    (_.VT (OpNode
6248                             _.RC:$src1,
6249                             (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6250                    T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6251                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6252 }
6253
6254 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6255                                     X86SchedWriteWidths sched,
6256                                     AVX512VLVectorVTInfo _,
6257                                     AVX512VLVectorVTInfo Ctrl> {
6258   let Predicates = [HasAVX512] in {
6259     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6260                                   _.info512, Ctrl.info512>, EVEX_V512;
6261   }
6262   let Predicates = [HasAVX512, HasVLX] in {
6263     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6264                                   _.info128, Ctrl.info128>, EVEX_V128;
6265     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6266                                   _.info256, Ctrl.info256>, EVEX_V256;
6267   }
6268 }
6269
6270 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6271                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6272   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6273                                       _, Ctrl>;
6274   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6275                                     X86VPermilpi, SchedWriteFShuffle, _>,
6276                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6277 }
6278
6279 let ExeDomain = SSEPackedSingle in
6280 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6281                                avx512vl_i32_info>;
6282 let ExeDomain = SSEPackedDouble in
6283 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6284                                avx512vl_i64_info>, VEX_W1X;
6285
6286 //===----------------------------------------------------------------------===//
6287 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6288 //===----------------------------------------------------------------------===//
6289
6290 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6291                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6292                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6293 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6294                                   X86PShufhw, SchedWriteShuffle>,
6295                                   EVEX, AVX512XSIi8Base;
6296 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6297                                   X86PShuflw, SchedWriteShuffle>,
6298                                   EVEX, AVX512XDIi8Base;
6299
6300 //===----------------------------------------------------------------------===//
6301 // AVX-512 - VPSHUFB
6302 //===----------------------------------------------------------------------===//
6303
6304 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6305                                X86SchedWriteWidths sched> {
6306   let Predicates = [HasBWI] in
6307   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6308                               EVEX_V512;
6309
6310   let Predicates = [HasVLX, HasBWI] in {
6311   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6312                               EVEX_V256;
6313   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6314                               EVEX_V128;
6315   }
6316 }
6317
6318 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6319                                   SchedWriteVarShuffle>, VEX_WIG;
6320
6321 //===----------------------------------------------------------------------===//
6322 // Move Low to High and High to Low packed FP Instructions
6323 //===----------------------------------------------------------------------===//
6324
6325 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6326           (ins VR128X:$src1, VR128X:$src2),
6327           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6328           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6329           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6330 let isCommutable = 1 in
6331 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6332           (ins VR128X:$src1, VR128X:$src2),
6333           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6334           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6335           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6336
6337 //===----------------------------------------------------------------------===//
6338 // VMOVHPS/PD VMOVLPS Instructions
6339 // All patterns was taken from SSS implementation.
6340 //===----------------------------------------------------------------------===//
6341
6342 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6343                                   SDPatternOperator OpNode,
6344                                   X86VectorVTInfo _> {
6345   let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6346   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6347                   (ins _.RC:$src1, f64mem:$src2),
6348                   !strconcat(OpcodeStr,
6349                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6350                   [(set _.RC:$dst,
6351                      (OpNode _.RC:$src1,
6352                        (_.VT (bitconvert
6353                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6354                   Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6355 }
6356
6357 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6358 // SSE1. And MOVLPS pattern is even more complex.
6359 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6360                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6361 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6362                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6363 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6364                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6365 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6366                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6367
6368 let Predicates = [HasAVX512] in {
6369   // VMOVHPD patterns
6370   def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6371             (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6372
6373   // VMOVLPD patterns
6374   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6375             (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6376 }
6377
6378 let SchedRW = [WriteFStore] in {
6379 let mayStore = 1, hasSideEffects = 0 in
6380 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6381                        (ins f64mem:$dst, VR128X:$src),
6382                        "vmovhps\t{$src, $dst|$dst, $src}",
6383                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6384 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6385                        (ins f64mem:$dst, VR128X:$src),
6386                        "vmovhpd\t{$src, $dst|$dst, $src}",
6387                        [(store (f64 (extractelt
6388                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6389                                      (iPTR 0))), addr:$dst)]>,
6390                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6391 let mayStore = 1, hasSideEffects = 0 in
6392 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6393                        (ins f64mem:$dst, VR128X:$src),
6394                        "vmovlps\t{$src, $dst|$dst, $src}",
6395                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6396 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6397                        (ins f64mem:$dst, VR128X:$src),
6398                        "vmovlpd\t{$src, $dst|$dst, $src}",
6399                        [(store (f64 (extractelt (v2f64 VR128X:$src),
6400                                      (iPTR 0))), addr:$dst)]>,
6401                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6402 } // SchedRW
6403
6404 let Predicates = [HasAVX512] in {
6405   // VMOVHPD patterns
6406   def : Pat<(store (f64 (extractelt
6407                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6408                            (iPTR 0))), addr:$dst),
6409            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6410 }
6411 //===----------------------------------------------------------------------===//
6412 // FMA - Fused Multiply Operations
6413 //
6414
6415 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6416                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6417                                X86VectorVTInfo _, string Suff> {
6418   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6419       Uses = [MXCSR], mayRaiseFPException = 1 in {
6420   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6421           (ins _.RC:$src2, _.RC:$src3),
6422           OpcodeStr, "$src3, $src2", "$src2, $src3",
6423           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6424           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6425           AVX512FMA3Base, Sched<[sched]>;
6426
6427   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6428           (ins _.RC:$src2, _.MemOp:$src3),
6429           OpcodeStr, "$src3, $src2", "$src2, $src3",
6430           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6431           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6432           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6433
6434   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6435             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6436             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6437             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6438             (OpNode _.RC:$src2,
6439              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6440             (MaskOpNode _.RC:$src2,
6441              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6442             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6443   }
6444 }
6445
6446 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6447                                  X86FoldableSchedWrite sched,
6448                                  X86VectorVTInfo _, string Suff> {
6449   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6450       Uses = [MXCSR] in
6451   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6452           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6453           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6454           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6455           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6456           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6457 }
6458
6459 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6460                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6461                                    X86SchedWriteWidths sched,
6462                                    AVX512VLVectorVTInfo _, string Suff> {
6463   let Predicates = [HasAVX512] in {
6464     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6465                                       sched.ZMM, _.info512, Suff>,
6466                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6467                                         _.info512, Suff>,
6468                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6469   }
6470   let Predicates = [HasVLX, HasAVX512] in {
6471     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6472                                     sched.YMM, _.info256, Suff>,
6473                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6474     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6475                                     sched.XMM, _.info128, Suff>,
6476                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6477   }
6478 }
6479
6480 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6481                               SDNode MaskOpNode, SDNode OpNodeRnd> {
6482     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6483                                       OpNodeRnd, SchedWriteFMA,
6484                                       avx512vl_f32_info, "PS">;
6485     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6486                                       OpNodeRnd, SchedWriteFMA,
6487                                       avx512vl_f64_info, "PD">, VEX_W;
6488 }
6489
6490 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd,
6491                                        X86Fmadd, X86FmaddRnd>;
6492 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6493                                        X86Fmsub, X86FmsubRnd>;
6494 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6495                                        X86Fmaddsub, X86FmaddsubRnd>;
6496 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6497                                        X86Fmsubadd, X86FmsubaddRnd>;
6498 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6499                                        X86Fnmadd, X86FnmaddRnd>;
6500 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6501                                        X86Fnmsub, X86FnmsubRnd>;
6502
6503
6504 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6505                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6506                                X86VectorVTInfo _, string Suff> {
6507   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6508       Uses = [MXCSR], mayRaiseFPException = 1 in {
6509   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6510           (ins _.RC:$src2, _.RC:$src3),
6511           OpcodeStr, "$src3, $src2", "$src2, $src3",
6512           (null_frag),
6513           (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6514           AVX512FMA3Base, Sched<[sched]>;
6515
6516   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6517           (ins _.RC:$src2, _.MemOp:$src3),
6518           OpcodeStr, "$src3, $src2", "$src2, $src3",
6519           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6520           (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6521           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6522
6523   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6524          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6525          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6526          "$src2, ${src3}"#_.BroadcastStr,
6527          (_.VT (OpNode _.RC:$src2,
6528                       (_.VT (_.BroadcastLdFrag addr:$src3)),
6529                       _.RC:$src1)),
6530          (_.VT (MaskOpNode _.RC:$src2,
6531                            (_.VT (_.BroadcastLdFrag addr:$src3)),
6532                            _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6533          Sched<[sched.Folded, sched.ReadAfterFold]>;
6534   }
6535 }
6536
6537 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6538                                  X86FoldableSchedWrite sched,
6539                                  X86VectorVTInfo _, string Suff> {
6540   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6541       Uses = [MXCSR] in
6542   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6543           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6544           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6545           (null_frag),
6546           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6547           1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6548 }
6549
6550 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6552                                    X86SchedWriteWidths sched,
6553                                    AVX512VLVectorVTInfo _, string Suff> {
6554   let Predicates = [HasAVX512] in {
6555     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6556                                       sched.ZMM, _.info512, Suff>,
6557                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6558                                         _.info512, Suff>,
6559                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6560   }
6561   let Predicates = [HasVLX, HasAVX512] in {
6562     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6563                                     sched.YMM, _.info256, Suff>,
6564                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6565     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6566                                     sched.XMM, _.info128, Suff>,
6567                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6568   }
6569 }
6570
6571 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6572                               SDNode MaskOpNode, SDNode OpNodeRnd > {
6573     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6574                                       OpNodeRnd, SchedWriteFMA,
6575                                       avx512vl_f32_info, "PS">;
6576     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6577                                       OpNodeRnd, SchedWriteFMA,
6578                                       avx512vl_f64_info, "PD">, VEX_W;
6579 }
6580
6581 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd,
6582                                        X86Fmadd, X86FmaddRnd>;
6583 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6584                                        X86Fmsub, X86FmsubRnd>;
6585 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6586                                        X86Fmaddsub, X86FmaddsubRnd>;
6587 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6588                                        X86Fmsubadd, X86FmsubaddRnd>;
6589 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6590                                        X86Fnmadd, X86FnmaddRnd>;
6591 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6592                                        X86Fnmsub, X86FnmsubRnd>;
6593
6594 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6595                                SDNode MaskOpNode, X86FoldableSchedWrite sched,
6596                                X86VectorVTInfo _, string Suff> {
6597   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6598       Uses = [MXCSR], mayRaiseFPException = 1 in {
6599   defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6600           (ins _.RC:$src2, _.RC:$src3),
6601           OpcodeStr, "$src3, $src2", "$src2, $src3",
6602           (null_frag),
6603           (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6604           AVX512FMA3Base, Sched<[sched]>;
6605
6606   // Pattern is 312 order so that the load is in a different place from the
6607   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6608   defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6609           (ins _.RC:$src2, _.MemOp:$src3),
6610           OpcodeStr, "$src3, $src2", "$src2, $src3",
6611           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6612           (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6613           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6614
6615   // Pattern is 312 order so that the load is in a different place from the
6616   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6617   defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6618          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6619          OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6620          "$src2, ${src3}"#_.BroadcastStr,
6621          (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6622                        _.RC:$src1, _.RC:$src2)),
6623          (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6624                            _.RC:$src1, _.RC:$src2)), 1, 0>,
6625          AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6626   }
6627 }
6628
6629 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6630                                  X86FoldableSchedWrite sched,
6631                                  X86VectorVTInfo _, string Suff> {
6632   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6633       Uses = [MXCSR] in
6634   defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6635           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6636           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6637           (null_frag),
6638           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6639           1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6640 }
6641
6642 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6643                                    SDNode MaskOpNode, SDNode OpNodeRnd,
6644                                    X86SchedWriteWidths sched,
6645                                    AVX512VLVectorVTInfo _, string Suff> {
6646   let Predicates = [HasAVX512] in {
6647     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6648                                       sched.ZMM, _.info512, Suff>,
6649                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6650                                         _.info512, Suff>,
6651                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6652   }
6653   let Predicates = [HasVLX, HasAVX512] in {
6654     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6655                                     sched.YMM, _.info256, Suff>,
6656                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6657     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6658                                     sched.XMM, _.info128, Suff>,
6659                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6660   }
6661 }
6662
6663 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6664                               SDNode MaskOpNode, SDNode OpNodeRnd > {
6665     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6666                                       OpNodeRnd, SchedWriteFMA,
6667                                       avx512vl_f32_info, "PS">;
6668     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6669                                       OpNodeRnd, SchedWriteFMA,
6670                                       avx512vl_f64_info, "PD">, VEX_W;
6671 }
6672
6673 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd,
6674                                        X86Fmadd, X86FmaddRnd>;
6675 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6676                                        X86Fmsub, X86FmsubRnd>;
6677 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6678                                        X86Fmaddsub, X86FmaddsubRnd>;
6679 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6680                                        X86Fmsubadd, X86FmsubaddRnd>;
6681 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6682                                        X86Fnmadd, X86FnmaddRnd>;
6683 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6684                                        X86Fnmsub, X86FnmsubRnd>;
6685
6686 // Scalar FMA
6687 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6688                                dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6689 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6690   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6691           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6692           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6693           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6694
6695   let mayLoad = 1 in
6696   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6697           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6698           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6699           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6700
6701   let Uses = [MXCSR] in
6702   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6703          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6704          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6705          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6706
6707   let isCodeGenOnly = 1, isCommutable = 1 in {
6708     def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6709                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6710                      !strconcat(OpcodeStr,
6711                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6712                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6713     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6714                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6715                     !strconcat(OpcodeStr,
6716                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6717                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6718
6719     let Uses = [MXCSR] in
6720     def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6721                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6722                      !strconcat(OpcodeStr,
6723                               "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6724                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6725                      Sched<[SchedWriteFMA.Scl]>;
6726   }// isCodeGenOnly = 1
6727 }// Constraints = "$src1 = $dst"
6728 }
6729
6730 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6731                             string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6732                             X86VectorVTInfo _, string SUFF> {
6733   let ExeDomain = _.ExeDomain in {
6734   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6735                 // Operands for intrinsic are in 123 order to preserve passthu
6736                 // semantics.
6737                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6738                          _.FRC:$src3))),
6739                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6740                          (_.ScalarLdFrag addr:$src3)))),
6741                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6742                          _.FRC:$src3, (i32 timm:$rc)))), 0>;
6743
6744   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6745                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6746                                           _.FRC:$src1))),
6747                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6748                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6749                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6750                          _.FRC:$src1, (i32 timm:$rc)))), 1>;
6751
6752   // One pattern is 312 order so that the load is in a different place from the
6753   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6754   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6755                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6756                          _.FRC:$src2))),
6757                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6758                                  _.FRC:$src1, _.FRC:$src2))),
6759                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6760                          _.FRC:$src2, (i32 timm:$rc)))), 1>;
6761   }
6762 }
6763
6764 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6765                         string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6766   let Predicates = [HasAVX512] in {
6767     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6768                                  OpNodeRnd, f32x_info, "SS">,
6769                                  EVEX_CD8<32, CD8VT1>, VEX_LIG;
6770     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6771                                  OpNodeRnd, f64x_info, "SD">,
6772                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6773   }
6774 }
6775
6776 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
6777 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6778 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6779 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6780
6781 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
6782                                       SDNode RndOp, string Prefix,
6783                                       string Suffix, SDNode Move,
6784                                       X86VectorVTInfo _, PatLeaf ZeroFP> {
6785   let Predicates = [HasAVX512] in {
6786     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6787                 (Op _.FRC:$src2,
6788                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6789                     _.FRC:$src3))))),
6790               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6791                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6792                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6793
6794     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6795                 (Op _.FRC:$src2, _.FRC:$src3,
6796                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6797               (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6798                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6799                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6800
6801     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6802                 (Op _.FRC:$src2,
6803                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6804                     (_.ScalarLdFrag addr:$src3)))))),
6805               (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6806                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6807                addr:$src3)>;
6808
6809     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6810                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6811                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6812               (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6813                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6814                addr:$src3)>;
6815
6816     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6817                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6818                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6819               (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6820                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6821                addr:$src3)>;
6822
6823     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6824                (X86selects_mask VK1WM:$mask,
6825                 (MaskedOp _.FRC:$src2,
6826                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6827                     _.FRC:$src3),
6828                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6829               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6830                VR128X:$src1, VK1WM:$mask,
6831                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6832                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6833
6834     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6835                (X86selects_mask VK1WM:$mask,
6836                 (MaskedOp _.FRC:$src2,
6837                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6838                     (_.ScalarLdFrag addr:$src3)),
6839                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6840               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6841                VR128X:$src1, VK1WM:$mask,
6842                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6843
6844     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6845                (X86selects_mask VK1WM:$mask,
6846                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6847                           (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6848                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6849               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6850                VR128X:$src1, VK1WM:$mask,
6851                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6852
6853     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6854                (X86selects_mask VK1WM:$mask,
6855                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
6856                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6857                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6858               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6859                VR128X:$src1, VK1WM:$mask,
6860                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6861                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6862
6863     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6864                (X86selects_mask VK1WM:$mask,
6865                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6866                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6867                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6868               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6869                VR128X:$src1, VK1WM:$mask,
6870                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6871
6872     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6873                (X86selects_mask VK1WM:$mask,
6874                 (MaskedOp _.FRC:$src2,
6875                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6876                           _.FRC:$src3),
6877                 (_.EltVT ZeroFP)))))),
6878               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6879                VR128X:$src1, VK1WM:$mask,
6880                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6881                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6882
6883     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6884                (X86selects_mask VK1WM:$mask,
6885                 (MaskedOp _.FRC:$src2, _.FRC:$src3,
6886                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6887                 (_.EltVT ZeroFP)))))),
6888               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6889                VR128X:$src1, VK1WM:$mask,
6890                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6891                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6892
6893     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6894                (X86selects_mask VK1WM:$mask,
6895                 (MaskedOp _.FRC:$src2,
6896                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6897                           (_.ScalarLdFrag addr:$src3)),
6898                 (_.EltVT ZeroFP)))))),
6899               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6900                VR128X:$src1, VK1WM:$mask,
6901                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6902
6903     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6904                (X86selects_mask VK1WM:$mask,
6905                 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6906                           _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6907                 (_.EltVT ZeroFP)))))),
6908               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6909                VR128X:$src1, VK1WM:$mask,
6910                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6911
6912     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6913                (X86selects_mask VK1WM:$mask,
6914                 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6915                           (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6916                 (_.EltVT ZeroFP)))))),
6917               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6918                VR128X:$src1, VK1WM:$mask,
6919                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6920
6921     // Patterns with rounding mode.
6922     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6923                 (RndOp _.FRC:$src2,
6924                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6925                        _.FRC:$src3, (i32 timm:$rc)))))),
6926               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6927                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6928                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6929
6930     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6931                 (RndOp _.FRC:$src2, _.FRC:$src3,
6932                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6933                        (i32 timm:$rc)))))),
6934               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6935                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6936                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6937
6938     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6939                (X86selects_mask VK1WM:$mask,
6940                 (RndOp _.FRC:$src2,
6941                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942                        _.FRC:$src3, (i32 timm:$rc)),
6943                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6944               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6945                VR128X:$src1, VK1WM:$mask,
6946                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6947                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6948
6949     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6950                (X86selects_mask VK1WM:$mask,
6951                 (RndOp _.FRC:$src2, _.FRC:$src3,
6952                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6953                        (i32 timm:$rc)),
6954                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6955               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6956                VR128X:$src1, VK1WM:$mask,
6957                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6958                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6959
6960     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6961                (X86selects_mask VK1WM:$mask,
6962                 (RndOp _.FRC:$src2,
6963                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6964                        _.FRC:$src3, (i32 timm:$rc)),
6965                 (_.EltVT ZeroFP)))))),
6966               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6967                VR128X:$src1, VK1WM:$mask,
6968                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6969                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6970
6971     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6972                (X86selects_mask VK1WM:$mask,
6973                 (RndOp _.FRC:$src2, _.FRC:$src3,
6974                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6975                        (i32 timm:$rc)),
6976                 (_.EltVT ZeroFP)))))),
6977               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6978                VR128X:$src1, VK1WM:$mask,
6979                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6980                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6981   }
6982 }
6983
6984 defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
6985                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
6986 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6987                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
6988 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6989                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
6990 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
6991                                   "SS", X86Movss, v4f32x_info, fp32imm0>;
6992
6993 defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
6994                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
6995 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6996                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
6997 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6998                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
6999 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7000                                   "SD", X86Movsd, v2f64x_info, fp64imm0>;
7001
7002 //===----------------------------------------------------------------------===//
7003 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7004 //===----------------------------------------------------------------------===//
7005 let Constraints = "$src1 = $dst" in {
7006 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7007                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7008   // NOTE: The SDNode have the multiply operands first with the add last.
7009   // This enables commuted load patterns to be autogenerated by tablegen.
7010   let ExeDomain = _.ExeDomain in {
7011   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7012           (ins _.RC:$src2, _.RC:$src3),
7013           OpcodeStr, "$src3, $src2", "$src2, $src3",
7014           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7015          AVX512FMA3Base, Sched<[sched]>;
7016
7017   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7018           (ins _.RC:$src2, _.MemOp:$src3),
7019           OpcodeStr, "$src3, $src2", "$src2, $src3",
7020           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7021           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7022
7023   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7024             (ins _.RC:$src2, _.ScalarMemOp:$src3),
7025             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7026             !strconcat("$src2, ${src3}", _.BroadcastStr ),
7027             (OpNode _.RC:$src2,
7028                     (_.VT (_.BroadcastLdFrag addr:$src3)),
7029                     _.RC:$src1)>,
7030             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7031   }
7032 }
7033 } // Constraints = "$src1 = $dst"
7034
7035 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7036                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7037   let Predicates = [HasIFMA] in {
7038     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7039                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7040   }
7041   let Predicates = [HasVLX, HasIFMA] in {
7042     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7043                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7044     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7045                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7046   }
7047 }
7048
7049 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7050                                          SchedWriteVecIMul, avx512vl_i64_info>,
7051                                          VEX_W;
7052 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7053                                          SchedWriteVecIMul, avx512vl_i64_info>,
7054                                          VEX_W;
7055
7056 //===----------------------------------------------------------------------===//
7057 // AVX-512  Scalar convert from sign integer to float/double
7058 //===----------------------------------------------------------------------===//
7059
7060 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7061                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
7062                     X86MemOperand x86memop, PatFrag ld_frag, string asm,
7063                     string mem, list<Register> _Uses = [MXCSR],
7064                     bit _mayRaiseFPException = 1> {
7065 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7066     mayRaiseFPException = _mayRaiseFPException in {
7067   let hasSideEffects = 0, isCodeGenOnly = 1 in {
7068     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7069               (ins DstVT.FRC:$src1, SrcRC:$src),
7070               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7071               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7072     let mayLoad = 1 in
7073       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7074               (ins DstVT.FRC:$src1, x86memop:$src),
7075               asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7076               EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7077   } // hasSideEffects = 0
7078   def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7079                 (ins DstVT.RC:$src1, SrcRC:$src2),
7080                 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7081                 [(set DstVT.RC:$dst,
7082                       (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7083                EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7084
7085   def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7086                 (ins DstVT.RC:$src1, x86memop:$src2),
7087                 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7088                 [(set DstVT.RC:$dst,
7089                       (OpNode (DstVT.VT DstVT.RC:$src1),
7090                                (ld_frag addr:$src2)))]>,
7091                 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7092 }
7093   def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7094                   (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7095                   DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7096 }
7097
7098 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7099                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
7100                                X86VectorVTInfo DstVT, string asm,
7101                                string mem> {
7102   let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7103   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7104               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7105               !strconcat(asm,
7106                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7107               [(set DstVT.RC:$dst,
7108                     (OpNode (DstVT.VT DstVT.RC:$src1),
7109                              SrcRC:$src2,
7110                              (i32 timm:$rc)))]>,
7111               EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7112   def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7113                   (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7114                   DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7115 }
7116
7117 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7118                                 X86FoldableSchedWrite sched,
7119                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7120                                 X86MemOperand x86memop, PatFrag ld_frag,
7121                                 string asm, string mem> {
7122   defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7123               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7124                             ld_frag, asm, mem>, VEX_LIG;
7125 }
7126
7127 let Predicates = [HasAVX512] in {
7128 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7129                                  WriteCvtI2SS, GR32,
7130                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7131                                  XS, EVEX_CD8<32, CD8VT1>;
7132 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7133                                  WriteCvtI2SS, GR64,
7134                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7135                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7136 defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7137                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7138                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7139 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7140                                  WriteCvtI2SD, GR64,
7141                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7142                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7143
7144 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7145               (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7146 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7147               (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7148
7149 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7150           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7151 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7152           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7153 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7154           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7155 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7156           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7157
7158 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7159           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7160 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7161           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7162 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7163           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7164 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7165           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7166
7167 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7168                                   WriteCvtI2SS, GR32,
7169                                   v4f32x_info, i32mem, loadi32,
7170                                   "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7171 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7172                                   WriteCvtI2SS, GR64,
7173                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7174                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7175 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7176                                   i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7177                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7178 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7179                                   WriteCvtI2SD, GR64,
7180                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7181                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7182
7183 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7184               (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7185 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7186               (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7187
7188 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7189           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7190 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7191           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7192 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7193           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7194 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7195           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7196
7197 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7198           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7199 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7200           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7201 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7202           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7203 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7204           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7205 }
7206
7207 //===----------------------------------------------------------------------===//
7208 // AVX-512  Scalar convert from float/double to integer
7209 //===----------------------------------------------------------------------===//
7210
7211 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7212                                   X86VectorVTInfo DstVT, SDNode OpNode,
7213                                   SDNode OpNodeRnd,
7214                                   X86FoldableSchedWrite sched, string asm,
7215                                   string aliasStr> {
7216   let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7217     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7218                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7219                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7220                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7221     let Uses = [MXCSR] in
7222     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7223                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7224                  [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7225                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7226                  Sched<[sched]>;
7227     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7228                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7229                 [(set DstVT.RC:$dst, (OpNode
7230                       (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7231                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7232   } // Predicates = [HasAVX512]
7233
7234   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7235           (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7236   def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7237           (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7238   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7239           (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7240                                           SrcVT.IntScalarMemOp:$src), 0, "att">;
7241 }
7242
7243 // Convert float/double to signed/unsigned int 32/64
7244 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7245                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7246                                    XS, EVEX_CD8<32, CD8VT1>;
7247 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7248                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7249                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7250 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7251                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7252                                    XS, EVEX_CD8<32, CD8VT1>;
7253 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7254                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7255                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7256 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7257                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7258                                    XD, EVEX_CD8<64, CD8VT1>;
7259 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7260                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7261                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7262 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7263                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7264                                    XD, EVEX_CD8<64, CD8VT1>;
7265 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7266                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7267                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7268
7269 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7270                         X86VectorVTInfo DstVT, SDNode OpNode,
7271                         X86FoldableSchedWrite sched,
7272                         string aliasStr> {
7273   let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7274     let isCodeGenOnly = 1 in {
7275     def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7276                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7277                 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7278                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7279     def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7280                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7281                 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7282                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7283     }
7284   } // Predicates = [HasAVX512]
7285 }
7286
7287 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7288                        lrint, WriteCvtSS2I,
7289                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7290 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7291                        llrint, WriteCvtSS2I,
7292                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7293 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7294                        lrint, WriteCvtSD2I,
7295                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7296 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7297                        llrint, WriteCvtSD2I,
7298                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7299
7300 let Predicates = [HasAVX512] in {
7301   def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7302   def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7303
7304   def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7305   def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7306 }
7307
7308 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7309 // which produce unnecessary vmovs{s,d} instructions
7310 let Predicates = [HasAVX512] in {
7311 def : Pat<(v4f32 (X86Movss
7312                    (v4f32 VR128X:$dst),
7313                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7314           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7315
7316 def : Pat<(v4f32 (X86Movss
7317                    (v4f32 VR128X:$dst),
7318                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7319           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7320
7321 def : Pat<(v4f32 (X86Movss
7322                    (v4f32 VR128X:$dst),
7323                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7324           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7325
7326 def : Pat<(v4f32 (X86Movss
7327                    (v4f32 VR128X:$dst),
7328                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7329           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7330
7331 def : Pat<(v2f64 (X86Movsd
7332                    (v2f64 VR128X:$dst),
7333                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7334           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7335
7336 def : Pat<(v2f64 (X86Movsd
7337                    (v2f64 VR128X:$dst),
7338                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7339           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7340
7341 def : Pat<(v2f64 (X86Movsd
7342                    (v2f64 VR128X:$dst),
7343                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7344           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7345
7346 def : Pat<(v2f64 (X86Movsd
7347                    (v2f64 VR128X:$dst),
7348                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7349           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7350
7351 def : Pat<(v4f32 (X86Movss
7352                    (v4f32 VR128X:$dst),
7353                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7354           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7355
7356 def : Pat<(v4f32 (X86Movss
7357                    (v4f32 VR128X:$dst),
7358                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7359           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7360
7361 def : Pat<(v4f32 (X86Movss
7362                    (v4f32 VR128X:$dst),
7363                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7364           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7365
7366 def : Pat<(v4f32 (X86Movss
7367                    (v4f32 VR128X:$dst),
7368                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7369           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7370
7371 def : Pat<(v2f64 (X86Movsd
7372                    (v2f64 VR128X:$dst),
7373                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7374           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7375
7376 def : Pat<(v2f64 (X86Movsd
7377                    (v2f64 VR128X:$dst),
7378                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7379           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7380
7381 def : Pat<(v2f64 (X86Movsd
7382                    (v2f64 VR128X:$dst),
7383                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7384           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7385
7386 def : Pat<(v2f64 (X86Movsd
7387                    (v2f64 VR128X:$dst),
7388                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7389           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7390 } // Predicates = [HasAVX512]
7391
7392 // Convert float/double to signed/unsigned int 32/64 with truncation
7393 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7394                             X86VectorVTInfo _DstRC, SDNode OpNode,
7395                             SDNode OpNodeInt, SDNode OpNodeSAE,
7396                             X86FoldableSchedWrite sched, string aliasStr>{
7397 let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
7398   let isCodeGenOnly = 1 in {
7399   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7400               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7401               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7402               EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7403   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7404               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7405               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7406               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7407   }
7408
7409   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7410             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7411            [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7412            EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7413   let Uses = [MXCSR] in
7414   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7415             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7416             [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7417                                   EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7418   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7419               (ins _SrcRC.IntScalarMemOp:$src),
7420               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7421               [(set _DstRC.RC:$dst,
7422                 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7423               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7424 } //HasAVX512
7425
7426   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7427           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7428   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7429           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7430   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7431           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7432                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
7433 }
7434
7435 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7436                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7437                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7438 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7439                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7440                         "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7441 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7442                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7443                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7444 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7445                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7446                         "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7447
7448 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7449                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7450                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7451 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7452                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7453                         "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7454 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7455                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7456                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7457 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7458                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7459                         "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7460
7461 //===----------------------------------------------------------------------===//
7462 // AVX-512  Convert form float to double and back
7463 //===----------------------------------------------------------------------===//
7464
7465 let Uses = [MXCSR], mayRaiseFPException = 1 in
7466 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7467                                 X86VectorVTInfo _Src, SDNode OpNode,
7468                                 X86FoldableSchedWrite sched> {
7469   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7470                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7471                          "$src2, $src1", "$src1, $src2",
7472                          (_.VT (OpNode (_.VT _.RC:$src1),
7473                                        (_Src.VT _Src.RC:$src2)))>,
7474                          EVEX_4V, VEX_LIG, Sched<[sched]>;
7475   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7476                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7477                          "$src2, $src1", "$src1, $src2",
7478                          (_.VT (OpNode (_.VT _.RC:$src1),
7479                                   (_Src.ScalarIntMemFrags addr:$src2)))>,
7480                          EVEX_4V, VEX_LIG,
7481                          Sched<[sched.Folded, sched.ReadAfterFold]>;
7482
7483   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7484     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7485                (ins _.FRC:$src1, _Src.FRC:$src2),
7486                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7487                EVEX_4V, VEX_LIG, Sched<[sched]>;
7488     let mayLoad = 1 in
7489     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7490                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7491                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7492                EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7493   }
7494 }
7495
7496 // Scalar Conversion with SAE - suppress all exceptions
7497 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7498                                     X86VectorVTInfo _Src, SDNode OpNodeSAE,
7499                                     X86FoldableSchedWrite sched> {
7500   let Uses = [MXCSR] in
7501   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7502                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7503                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7504                         (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7505                                          (_Src.VT _Src.RC:$src2)))>,
7506                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7507 }
7508
7509 // Scalar Conversion with rounding control (RC)
7510 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7511                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7512                                    X86FoldableSchedWrite sched> {
7513   let Uses = [MXCSR] in
7514   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7515                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7516                         "$rc, $src2, $src1", "$src1, $src2, $rc",
7517                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7518                                          (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7519                         EVEX_4V, VEX_LIG, Sched<[sched]>,
7520                         EVEX_B, EVEX_RC;
7521 }
7522 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7523                                       SDNode OpNode, SDNode OpNodeRnd,
7524                                       X86FoldableSchedWrite sched,
7525                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7526   let Predicates = [HasAVX512] in {
7527     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7528              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7529                                OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7530   }
7531 }
7532
7533 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7534                                       SDNode OpNode, SDNode OpNodeSAE,
7535                                       X86FoldableSchedWrite sched,
7536                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7537   let Predicates = [HasAVX512] in {
7538     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7539              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7540              EVEX_CD8<32, CD8VT1>, XS;
7541   }
7542 }
7543 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7544                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7545                                          f32x_info>;
7546 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7547                                           X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7548                                           f64x_info>;
7549
7550 def : Pat<(f64 (any_fpextend FR32X:$src)),
7551           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7552           Requires<[HasAVX512]>;
7553 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7554           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7555           Requires<[HasAVX512, OptForSize]>;
7556
7557 def : Pat<(f32 (any_fpround FR64X:$src)),
7558           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7559            Requires<[HasAVX512]>;
7560
7561 def : Pat<(v4f32 (X86Movss
7562                    (v4f32 VR128X:$dst),
7563                    (v4f32 (scalar_to_vector
7564                      (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7565           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7566           Requires<[HasAVX512]>;
7567
7568 def : Pat<(v2f64 (X86Movsd
7569                    (v2f64 VR128X:$dst),
7570                    (v2f64 (scalar_to_vector
7571                      (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7572           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7573           Requires<[HasAVX512]>;
7574
7575 //===----------------------------------------------------------------------===//
7576 // AVX-512  Vector convert from signed/unsigned integer to float/double
7577 //          and from float/double to signed/unsigned integer
7578 //===----------------------------------------------------------------------===//
7579
7580 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7581                           X86VectorVTInfo _Src, SDNode OpNode, SDNode MaskOpNode,
7582                           X86FoldableSchedWrite sched,
7583                           string Broadcast = _.BroadcastStr,
7584                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7585                           RegisterClass MaskRC = _.KRCWM,
7586                           dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7587                           dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7588 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7589   defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7590                          (ins _Src.RC:$src),
7591                          (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7592                          (ins MaskRC:$mask, _Src.RC:$src),
7593                           OpcodeStr, "$src", "$src",
7594                          (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7595                          (vselect_mask MaskRC:$mask,
7596                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7597                                        _.RC:$src0),
7598                          (vselect_mask MaskRC:$mask,
7599                                        (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7600                                        _.ImmAllZerosV)>,
7601                          EVEX, Sched<[sched]>;
7602
7603   defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7604                          (ins MemOp:$src),
7605                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7606                          (ins MaskRC:$mask, MemOp:$src),
7607                          OpcodeStr#Alias, "$src", "$src",
7608                          LdDAG,
7609                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7610                          (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7611                          EVEX, Sched<[sched.Folded]>;
7612
7613   defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7614                          (ins _Src.ScalarMemOp:$src),
7615                          (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7616                          (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7617                          OpcodeStr,
7618                          "${src}"#Broadcast, "${src}"#Broadcast,
7619                          (_.VT (OpNode (_Src.VT
7620                                   (_Src.BroadcastLdFrag addr:$src))
7621                             )),
7622                          (vselect_mask MaskRC:$mask,
7623                                        (_.VT
7624                                         (MaskOpNode
7625                                          (_Src.VT
7626                                           (_Src.BroadcastLdFrag addr:$src)))),
7627                                        _.RC:$src0),
7628                          (vselect_mask MaskRC:$mask,
7629                                        (_.VT
7630                                         (MaskOpNode
7631                                          (_Src.VT
7632                                           (_Src.BroadcastLdFrag addr:$src)))),
7633                                        _.ImmAllZerosV)>,
7634                          EVEX, EVEX_B, Sched<[sched.Folded]>;
7635   }
7636 }
7637 // Conversion with SAE - suppress all exceptions
7638 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7639                               X86VectorVTInfo _Src, SDNode OpNodeSAE,
7640                               X86FoldableSchedWrite sched> {
7641   let Uses = [MXCSR] in
7642   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7643                         (ins _Src.RC:$src), OpcodeStr,
7644                         "{sae}, $src", "$src, {sae}",
7645                         (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7646                         EVEX, EVEX_B, Sched<[sched]>;
7647 }
7648
7649 // Conversion with rounding control (RC)
7650 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7651                          X86VectorVTInfo _Src, SDNode OpNodeRnd,
7652                          X86FoldableSchedWrite sched> {
7653   let Uses = [MXCSR] in
7654   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7655                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7656                         "$rc, $src", "$src, $rc",
7657                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7658                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7659 }
7660
7661 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7662 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7663                                 X86VectorVTInfo _Src, SDNode OpNode,
7664                                 SDNode MaskOpNode, 
7665                                 X86FoldableSchedWrite sched,
7666                                 string Broadcast = _.BroadcastStr,
7667                                 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7668                                 RegisterClass MaskRC = _.KRCWM>
7669   : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7670                    Alias, MemOp, MaskRC,
7671                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7672                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7673
7674 // Extend Float to Double
7675 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7676                            X86SchedWriteWidths sched> {
7677   let Predicates = [HasAVX512] in {
7678     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7679                             any_fpextend, fpextend, sched.ZMM>,
7680              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7681                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7682   }
7683   let Predicates = [HasVLX] in {
7684     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7685                                X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
7686                                "", f64mem>, EVEX_V128;
7687     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
7688                                      any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7689   }
7690 }
7691
7692 // Truncate Double to Float
7693 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7694   let Predicates = [HasAVX512] in {
7695     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
7696                             X86any_vfpround, X86vfpround, sched.ZMM>,
7697              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7698                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
7699   }
7700   let Predicates = [HasVLX] in {
7701     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7702                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
7703                                f128mem, VK2WM>, EVEX_V128;
7704     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
7705                                X86any_vfpround, X86vfpround,
7706                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7707   }
7708
7709   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7710                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7711   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7712                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7713                   VK2WM:$mask, VR128X:$src), 0, "att">;
7714   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7715                   "$dst {${mask}} {z}, $src}",
7716                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7717                   VK2WM:$mask, VR128X:$src), 0, "att">;
7718   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7719                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7720   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7721                   "$dst {${mask}}, ${src}{1to2}}",
7722                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7723                   VK2WM:$mask, f64mem:$src), 0, "att">;
7724   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7725                   "$dst {${mask}} {z}, ${src}{1to2}}",
7726                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7727                   VK2WM:$mask, f64mem:$src), 0, "att">;
7728
7729   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7730                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7731   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7732                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7733                   VK4WM:$mask, VR256X:$src), 0, "att">;
7734   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7735                   "$dst {${mask}} {z}, $src}",
7736                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7737                   VK4WM:$mask, VR256X:$src), 0, "att">;
7738   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7739                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7740   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7741                   "$dst {${mask}}, ${src}{1to4}}",
7742                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7743                   VK4WM:$mask, f64mem:$src), 0, "att">;
7744   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7745                   "$dst {${mask}} {z}, ${src}{1to4}}",
7746                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7747                   VK4WM:$mask, f64mem:$src), 0, "att">;
7748 }
7749
7750 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7751                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
7752 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7753                                   PS, EVEX_CD8<32, CD8VH>;
7754
7755 let Predicates = [HasVLX] in {
7756   // Special patterns to allow use of X86vmfpround for masking. Instruction
7757   // patterns have been disabled with null_frag.
7758   def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
7759             (VCVTPD2PSZ128rr VR128X:$src)>;
7760   def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7761                           VK2WM:$mask),
7762             (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7763   def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7764                           VK2WM:$mask),
7765             (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7766
7767   def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
7768             (VCVTPD2PSZ128rm addr:$src)>;
7769   def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7770                           VK2WM:$mask),
7771             (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7772   def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7773                           VK2WM:$mask),
7774             (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7775
7776   def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7777             (VCVTPD2PSZ128rmb addr:$src)>;
7778   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7779                           (v4f32 VR128X:$src0), VK2WM:$mask),
7780             (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7781   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7782                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7783             (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7784 }
7785
7786 // Convert Signed/Unsigned Doubleword to Double
7787 let Uses = []<Register>, mayRaiseFPException = 0 in
7788 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7789                            SDNode MaskOpNode, SDNode OpNode128,
7790                            SDNode MaskOpNode128,
7791                            X86SchedWriteWidths sched> {
7792   // No rounding in this op
7793   let Predicates = [HasAVX512] in
7794     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7795                             MaskOpNode, sched.ZMM>, EVEX_V512;
7796
7797   let Predicates = [HasVLX] in {
7798     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7799                                OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
7800                                "", i64mem, VK2WM,
7801                                (v2f64 (OpNode128 (bc_v4i32
7802                                 (v2i64
7803                                  (scalar_to_vector (loadi64 addr:$src)))))),
7804                                (v2f64 (MaskOpNode128 (bc_v4i32
7805                                 (v2i64
7806                                  (scalar_to_vector (loadi64 addr:$src))))))>,
7807                                EVEX_V128;
7808     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7809                                MaskOpNode, sched.YMM>, EVEX_V256;
7810   }
7811 }
7812
7813 // Convert Signed/Unsigned Doubleword to Float
7814 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7815                            SDNode MaskOpNode, SDNode OpNodeRnd,
7816                            X86SchedWriteWidths sched> {
7817   let Predicates = [HasAVX512] in
7818     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7819                             MaskOpNode, sched.ZMM>,
7820              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7821                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7822
7823   let Predicates = [HasVLX] in {
7824     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7825                                MaskOpNode, sched.XMM>, EVEX_V128;
7826     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7827                                MaskOpNode, sched.YMM>, EVEX_V256;
7828   }
7829 }
7830
7831 // Convert Float to Signed/Unsigned Doubleword with truncation
7832 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7833                             SDNode MaskOpNode,
7834                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7835   let Predicates = [HasAVX512] in {
7836     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7837                             MaskOpNode, sched.ZMM>,
7838              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7839                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
7840   }
7841   let Predicates = [HasVLX] in {
7842     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7843                                MaskOpNode, sched.XMM>, EVEX_V128;
7844     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7845                                MaskOpNode, sched.YMM>, EVEX_V256;
7846   }
7847 }
7848
7849 // Convert Float to Signed/Unsigned Doubleword
7850 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7851                            SDNode MaskOpNode, SDNode OpNodeRnd,
7852                            X86SchedWriteWidths sched> {
7853   let Predicates = [HasAVX512] in {
7854     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7855                             MaskOpNode, sched.ZMM>,
7856              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7857                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7858   }
7859   let Predicates = [HasVLX] in {
7860     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7861                                MaskOpNode, sched.XMM>, EVEX_V128;
7862     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7863                                MaskOpNode, sched.YMM>, EVEX_V256;
7864   }
7865 }
7866
7867 // Convert Double to Signed/Unsigned Doubleword with truncation
7868 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7869                             SDNode MaskOpNode, SDNode OpNodeSAE,
7870                             X86SchedWriteWidths sched> {
7871   let Predicates = [HasAVX512] in {
7872     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7873                             MaskOpNode, sched.ZMM>,
7874              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7875                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
7876   }
7877   let Predicates = [HasVLX] in {
7878     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7879     // memory forms of these instructions in Asm Parser. They have the same
7880     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7881     // due to the same reason.
7882     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7883                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7884                                VK2WM>, EVEX_V128;
7885     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7886                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7887   }
7888
7889   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7890                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7891                   VR128X:$src), 0, "att">;
7892   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7893                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7894                   VK2WM:$mask, VR128X:$src), 0, "att">;
7895   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7896                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7897                   VK2WM:$mask, VR128X:$src), 0, "att">;
7898   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7899                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7900                   f64mem:$src), 0, "att">;
7901   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7902                   "$dst {${mask}}, ${src}{1to2}}",
7903                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7904                   VK2WM:$mask, f64mem:$src), 0, "att">;
7905   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7906                   "$dst {${mask}} {z}, ${src}{1to2}}",
7907                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7908                   VK2WM:$mask, f64mem:$src), 0, "att">;
7909
7910   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7911                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7912                   VR256X:$src), 0, "att">;
7913   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7914                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7915                   VK4WM:$mask, VR256X:$src), 0, "att">;
7916   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7917                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7918                   VK4WM:$mask, VR256X:$src), 0, "att">;
7919   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7920                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7921                   f64mem:$src), 0, "att">;
7922   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7923                   "$dst {${mask}}, ${src}{1to4}}",
7924                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7925                   VK4WM:$mask, f64mem:$src), 0, "att">;
7926   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7927                   "$dst {${mask}} {z}, ${src}{1to4}}",
7928                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7929                   VK4WM:$mask, f64mem:$src), 0, "att">;
7930 }
7931
7932 // Convert Double to Signed/Unsigned Doubleword
7933 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7934                            SDNode MaskOpNode, SDNode OpNodeRnd,
7935                            X86SchedWriteWidths sched> {
7936   let Predicates = [HasAVX512] in {
7937     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7938                             MaskOpNode, sched.ZMM>,
7939              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7940                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7941   }
7942   let Predicates = [HasVLX] in {
7943     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7944     // memory forms of these instructions in Asm Parcer. They have the same
7945     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7946     // due to the same reason.
7947     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7948                                null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7949                                VK2WM>, EVEX_V128;
7950     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7951                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7952   }
7953
7954   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7955                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7956   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7957                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7958                   VK2WM:$mask, VR128X:$src), 0, "att">;
7959   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7960                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7961                   VK2WM:$mask, VR128X:$src), 0, "att">;
7962   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7963                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7964                   f64mem:$src), 0, "att">;
7965   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7966                   "$dst {${mask}}, ${src}{1to2}}",
7967                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7968                   VK2WM:$mask, f64mem:$src), 0, "att">;
7969   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7970                   "$dst {${mask}} {z}, ${src}{1to2}}",
7971                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7972                   VK2WM:$mask, f64mem:$src), 0, "att">;
7973
7974   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7975                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7976   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7977                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7978                   VK4WM:$mask, VR256X:$src), 0, "att">;
7979   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7980                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7981                   VK4WM:$mask, VR256X:$src), 0, "att">;
7982   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7983                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7984                   f64mem:$src), 0, "att">;
7985   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7986                   "$dst {${mask}}, ${src}{1to4}}",
7987                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7988                   VK4WM:$mask, f64mem:$src), 0, "att">;
7989   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7990                   "$dst {${mask}} {z}, ${src}{1to4}}",
7991                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7992                   VK4WM:$mask, f64mem:$src), 0, "att">;
7993 }
7994
7995 // Convert Double to Signed/Unsigned Quardword
7996 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7997                            SDNode MaskOpNode, SDNode OpNodeRnd,
7998                            X86SchedWriteWidths sched> {
7999   let Predicates = [HasDQI] in {
8000     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8001                             MaskOpNode, sched.ZMM>,
8002              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8003                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8004   }
8005   let Predicates = [HasDQI, HasVLX] in {
8006     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8007                                MaskOpNode, sched.XMM>, EVEX_V128;
8008     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8009                                MaskOpNode, sched.YMM>, EVEX_V256;
8010   }
8011 }
8012
8013 // Convert Double to Signed/Unsigned Quardword with truncation
8014 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8015                             SDNode MaskOpNode, SDNode OpNodeRnd,
8016                             X86SchedWriteWidths sched> {
8017   let Predicates = [HasDQI] in {
8018     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8019                             MaskOpNode, sched.ZMM>,
8020              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8021                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8022   }
8023   let Predicates = [HasDQI, HasVLX] in {
8024     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8025                                MaskOpNode, sched.XMM>, EVEX_V128;
8026     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8027                                MaskOpNode, sched.YMM>, EVEX_V256;
8028   }
8029 }
8030
8031 // Convert Signed/Unsigned Quardword to Double
8032 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8033                            SDNode MaskOpNode, SDNode OpNodeRnd,
8034                            X86SchedWriteWidths sched> {
8035   let Predicates = [HasDQI] in {
8036     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8037                             MaskOpNode, sched.ZMM>,
8038              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8039                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8040   }
8041   let Predicates = [HasDQI, HasVLX] in {
8042     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8043                                MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8044     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8045                                MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8046   }
8047 }
8048
8049 // Convert Float to Signed/Unsigned Quardword
8050 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8051                            SDNode MaskOpNode, SDNode OpNodeRnd,
8052                            X86SchedWriteWidths sched> {
8053   let Predicates = [HasDQI] in {
8054     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8055                             MaskOpNode, sched.ZMM>,
8056              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8057                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8058   }
8059   let Predicates = [HasDQI, HasVLX] in {
8060     // Explicitly specified broadcast string, since we take only 2 elements
8061     // from v4f32x_info source
8062     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8063                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8064                                (v2i64 (OpNode (bc_v4f32
8065                                 (v2f64
8066                                  (scalar_to_vector (loadf64 addr:$src)))))),
8067                                (v2i64 (MaskOpNode (bc_v4f32
8068                                 (v2f64
8069                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8070                                EVEX_V128;
8071     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8072                                MaskOpNode, sched.YMM>, EVEX_V256;
8073   }
8074 }
8075
8076 // Convert Float to Signed/Unsigned Quardword with truncation
8077 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8078                             SDNode MaskOpNode, SDNode OpNodeRnd,
8079                             X86SchedWriteWidths sched> {
8080   let Predicates = [HasDQI] in {
8081     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8082                             MaskOpNode, sched.ZMM>,
8083              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8084                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
8085   }
8086   let Predicates = [HasDQI, HasVLX] in {
8087     // Explicitly specified broadcast string, since we take only 2 elements
8088     // from v4f32x_info source
8089     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8090                                MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8091                                (v2i64 (OpNode (bc_v4f32
8092                                 (v2f64
8093                                  (scalar_to_vector (loadf64 addr:$src)))))),
8094                                (v2i64 (MaskOpNode (bc_v4f32
8095                                 (v2f64
8096                                  (scalar_to_vector (loadf64 addr:$src))))))>,
8097                                EVEX_V128;
8098     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8099                                MaskOpNode, sched.YMM>, EVEX_V256;
8100   }
8101 }
8102
8103 // Convert Signed/Unsigned Quardword to Float
8104 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8105                            SDNode MaskOpNode, SDNode OpNodeRnd,
8106                            X86SchedWriteWidths sched> {
8107   let Predicates = [HasDQI] in {
8108     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8109                             MaskOpNode, sched.ZMM>,
8110              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8111                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8112   }
8113   let Predicates = [HasDQI, HasVLX] in {
8114     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8115     // memory forms of these instructions in Asm Parcer. They have the same
8116     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8117     // due to the same reason.
8118     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8119                                null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8120                                EVEX_V128, NotEVEX2VEXConvertible;
8121     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8122                                MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8123                                NotEVEX2VEXConvertible;
8124   }
8125
8126   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8127                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8128                   VR128X:$src), 0, "att">;
8129   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8130                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8131                   VK2WM:$mask, VR128X:$src), 0, "att">;
8132   def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8133                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8134                   VK2WM:$mask, VR128X:$src), 0, "att">;
8135   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8136                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8137                   i64mem:$src), 0, "att">;
8138   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8139                   "$dst {${mask}}, ${src}{1to2}}",
8140                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8141                   VK2WM:$mask, i64mem:$src), 0, "att">;
8142   def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8143                   "$dst {${mask}} {z}, ${src}{1to2}}",
8144                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8145                   VK2WM:$mask, i64mem:$src), 0, "att">;
8146
8147   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8148                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8149                   VR256X:$src), 0, "att">;
8150   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8151                   "$dst {${mask}}, $src}",
8152                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8153                   VK4WM:$mask, VR256X:$src), 0, "att">;
8154   def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8155                   "$dst {${mask}} {z}, $src}",
8156                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8157                   VK4WM:$mask, VR256X:$src), 0, "att">;
8158   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8159                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8160                   i64mem:$src), 0, "att">;
8161   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8162                   "$dst {${mask}}, ${src}{1to4}}",
8163                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8164                   VK4WM:$mask, i64mem:$src), 0, "att">;
8165   def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8166                   "$dst {${mask}} {z}, ${src}{1to4}}",
8167                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8168                   VK4WM:$mask, i64mem:$src), 0, "att">;
8169 }
8170
8171 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8172                                  X86any_VSintToFP, X86VSintToFP,
8173                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8174
8175 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8176                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8177                                 PS, EVEX_CD8<32, CD8VF>;
8178
8179 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8180                                  X86cvttp2si, X86cvttp2siSAE,
8181                                  SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8182
8183 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8184                                  X86cvttp2si, X86cvttp2siSAE,
8185                                  SchedWriteCvtPD2DQ>,
8186                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
8187
8188 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8189                                  X86cvttp2ui, X86cvttp2uiSAE,
8190                                  SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8191
8192 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8193                                  X86cvttp2ui, X86cvttp2uiSAE,
8194                                  SchedWriteCvtPD2DQ>,
8195                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
8196
8197 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8198                                   uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8199                                   SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8200
8201 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8202                                  uint_to_fp, X86VUintToFpRnd,
8203                                  SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8204
8205 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8206                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8207                                  EVEX_CD8<32, CD8VF>;
8208
8209 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8210                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8211                                  VEX_W, EVEX_CD8<64, CD8VF>;
8212
8213 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8214                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8215                                  PS, EVEX_CD8<32, CD8VF>;
8216
8217 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8218                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8219                                  PS, EVEX_CD8<64, CD8VF>;
8220
8221 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8222                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8223                                  PD, EVEX_CD8<64, CD8VF>;
8224
8225 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8226                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8227                                  EVEX_CD8<32, CD8VH>;
8228
8229 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8230                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8231                                  PD, EVEX_CD8<64, CD8VF>;
8232
8233 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8234                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8235                                  EVEX_CD8<32, CD8VH>;
8236
8237 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8238                                  X86cvttp2si, X86cvttp2siSAE,
8239                                  SchedWriteCvtPD2DQ>, VEX_W,
8240                                  PD, EVEX_CD8<64, CD8VF>;
8241
8242 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8243                                  X86cvttp2si, X86cvttp2siSAE,
8244                                  SchedWriteCvtPS2DQ>, PD,
8245                                  EVEX_CD8<32, CD8VH>;
8246
8247 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8248                                  X86cvttp2ui, X86cvttp2uiSAE,
8249                                  SchedWriteCvtPD2DQ>, VEX_W,
8250                                  PD, EVEX_CD8<64, CD8VF>;
8251
8252 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8253                                  X86cvttp2ui, X86cvttp2uiSAE,
8254                                  SchedWriteCvtPS2DQ>, PD,
8255                                  EVEX_CD8<32, CD8VH>;
8256
8257 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8258                             sint_to_fp, X86VSintToFpRnd,
8259                             SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8260
8261 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8262                             uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8263                             VEX_W, XS, EVEX_CD8<64, CD8VF>;
8264
8265 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
8266                             sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8267                             VEX_W, PS, EVEX_CD8<64, CD8VF>;
8268
8269 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
8270                             uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
8271                             VEX_W, XD, EVEX_CD8<64, CD8VF>;
8272
8273 let Predicates = [HasVLX] in {
8274   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8275   // patterns have been disabled with null_frag.
8276   def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8277             (VCVTPD2DQZ128rr VR128X:$src)>;
8278   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8279                           VK2WM:$mask),
8280             (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8281   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8282                           VK2WM:$mask),
8283             (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8284
8285   def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8286             (VCVTPD2DQZ128rm addr:$src)>;
8287   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8288                           VK2WM:$mask),
8289             (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8290   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8291                           VK2WM:$mask),
8292             (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8293
8294   def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8295             (VCVTPD2DQZ128rmb addr:$src)>;
8296   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8297                           (v4i32 VR128X:$src0), VK2WM:$mask),
8298             (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8299   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8300                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8301             (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8302
8303   // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8304   // patterns have been disabled with null_frag.
8305   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8306             (VCVTTPD2DQZ128rr VR128X:$src)>;
8307   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8308                           VK2WM:$mask),
8309             (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8310   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8311                           VK2WM:$mask),
8312             (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8313
8314   def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8315             (VCVTTPD2DQZ128rm addr:$src)>;
8316   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8317                           VK2WM:$mask),
8318             (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8319   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8320                           VK2WM:$mask),
8321             (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8322
8323   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8324             (VCVTTPD2DQZ128rmb addr:$src)>;
8325   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8326                           (v4i32 VR128X:$src0), VK2WM:$mask),
8327             (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8328   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8329                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8330             (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8331
8332   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8333   // patterns have been disabled with null_frag.
8334   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8335             (VCVTPD2UDQZ128rr VR128X:$src)>;
8336   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8337                            VK2WM:$mask),
8338             (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8339   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8340                            VK2WM:$mask),
8341             (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8342
8343   def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8344             (VCVTPD2UDQZ128rm addr:$src)>;
8345   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8346                            VK2WM:$mask),
8347             (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8348   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8349                            VK2WM:$mask),
8350             (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8351
8352   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8353             (VCVTPD2UDQZ128rmb addr:$src)>;
8354   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8355                            (v4i32 VR128X:$src0), VK2WM:$mask),
8356             (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8357   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8358                            v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8359             (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8360
8361   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8362   // patterns have been disabled with null_frag.
8363   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8364             (VCVTTPD2UDQZ128rr VR128X:$src)>;
8365   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8366                           VK2WM:$mask),
8367             (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8368   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8369                           VK2WM:$mask),
8370             (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8371
8372   def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8373             (VCVTTPD2UDQZ128rm addr:$src)>;
8374   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8375                           VK2WM:$mask),
8376             (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8377   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8378                           VK2WM:$mask),
8379             (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8380
8381   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8382             (VCVTTPD2UDQZ128rmb addr:$src)>;
8383   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8384                           (v4i32 VR128X:$src0), VK2WM:$mask),
8385             (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8386   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8387                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8388             (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8389 }
8390
8391 let Predicates = [HasDQI, HasVLX] in {
8392   def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8393             (VCVTPS2QQZ128rm addr:$src)>;
8394   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8395                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8396                                  VR128X:$src0)),
8397             (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8398   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8399                                  (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8400                                  v2i64x_info.ImmAllZerosV)),
8401             (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8402
8403   def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8404             (VCVTPS2UQQZ128rm addr:$src)>;
8405   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8406                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8407                                  VR128X:$src0)),
8408             (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8409   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8410                                  (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8411                                  v2i64x_info.ImmAllZerosV)),
8412             (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8413
8414   def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8415             (VCVTTPS2QQZ128rm addr:$src)>;
8416   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8417                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8418                                  VR128X:$src0)),
8419             (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8420   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8421                                  (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8422                                  v2i64x_info.ImmAllZerosV)),
8423             (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8424
8425   def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8426             (VCVTTPS2UQQZ128rm addr:$src)>;
8427   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8428                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8429                                  VR128X:$src0)),
8430             (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8431   def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8432                                  (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8433                                  v2i64x_info.ImmAllZerosV)),
8434             (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8435 }
8436
8437 let Predicates = [HasVLX] in {
8438   def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8439             (VCVTDQ2PDZ128rm addr:$src)>;
8440   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8441                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8442                                  VR128X:$src0)),
8443             (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8444   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8445                                  (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8446                                  v2f64x_info.ImmAllZerosV)),
8447             (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8448
8449   def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8450             (VCVTUDQ2PDZ128rm addr:$src)>;
8451   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8452                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8453                                  VR128X:$src0)),
8454             (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8455   def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8456                                  (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8457                                  v2f64x_info.ImmAllZerosV)),
8458             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8459 }
8460
8461 let Predicates = [HasDQI, HasVLX] in {
8462   // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8463   // patterns have been disabled with null_frag.
8464   def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
8465             (VCVTQQ2PSZ128rr VR128X:$src)>;
8466   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8467                            VK2WM:$mask),
8468             (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8469   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8470                            VK2WM:$mask),
8471             (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8472
8473   def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
8474             (VCVTQQ2PSZ128rm addr:$src)>;
8475   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8476                            VK2WM:$mask),
8477             (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8478   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8479                            VK2WM:$mask),
8480             (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8481
8482   def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8483             (VCVTQQ2PSZ128rmb addr:$src)>;
8484   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8485                            (v4f32 VR128X:$src0), VK2WM:$mask),
8486             (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8487   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8488                            v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8489             (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8490
8491   // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8492   // patterns have been disabled with null_frag.
8493   def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
8494             (VCVTUQQ2PSZ128rr VR128X:$src)>;
8495   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8496                            VK2WM:$mask),
8497             (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8498   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8499                            VK2WM:$mask),
8500             (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8501
8502   def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
8503             (VCVTUQQ2PSZ128rm addr:$src)>;
8504   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8505                            VK2WM:$mask),
8506             (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8507   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8508                            VK2WM:$mask),
8509             (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8510
8511   def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8512             (VCVTUQQ2PSZ128rmb addr:$src)>;
8513   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8514                            (v4f32 VR128X:$src0), VK2WM:$mask),
8515             (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8516   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8517                            v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8518             (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8519 }
8520
8521 //===----------------------------------------------------------------------===//
8522 // Half precision conversion instructions
8523 //===----------------------------------------------------------------------===//
8524
8525 let Uses = [MXCSR], mayRaiseFPException = 1 in
8526 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8527                            X86MemOperand x86memop, dag ld_dag,
8528                            X86FoldableSchedWrite sched> {
8529   defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8530                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8531                             (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8532                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
8533                             T8PD, Sched<[sched]>;
8534   defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8535                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8536                             (X86any_cvtph2ps (_src.VT ld_dag)),
8537                             (X86cvtph2ps (_src.VT ld_dag))>,
8538                             T8PD, Sched<[sched.Folded]>;
8539 }
8540
8541 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8542                                X86FoldableSchedWrite sched> {
8543   let Uses = [MXCSR] in
8544   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8545                              (ins _src.RC:$src), "vcvtph2ps",
8546                              "{sae}, $src", "$src, {sae}",
8547                              (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8548                              T8PD, EVEX_B, Sched<[sched]>;
8549 }
8550
8551 let Predicates = [HasAVX512] in
8552   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8553                                     (load addr:$src), WriteCvtPH2PSZ>,
8554                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8555                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8556
8557 let Predicates = [HasVLX] in {
8558   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8559                        (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8560                        EVEX_CD8<32, CD8VH>;
8561   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8562                        (bitconvert (v2i64 (X86vzload64 addr:$src))),
8563                        WriteCvtPH2PS>, EVEX, EVEX_V128,
8564                        EVEX_CD8<32, CD8VH>;
8565
8566   // Pattern match vcvtph2ps of a scalar i64 load.
8567   def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8568               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8569             (VCVTPH2PSZ128rm addr:$src)>;
8570 }
8571
8572 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8573                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8574 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8575   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8576              (ins _src.RC:$src1, i32u8imm:$src2),
8577              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8578              [(set _dest.RC:$dst,
8579                    (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8580              Sched<[RR]>;
8581   let Constraints = "$src0 = $dst" in
8582   def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8583              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8584              "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8585              [(set _dest.RC:$dst,
8586                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8587                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
8588              Sched<[RR]>, EVEX_K;
8589   def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8590              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8591              "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8592              [(set _dest.RC:$dst,
8593                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8594                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8595              Sched<[RR]>, EVEX_KZ;
8596   let hasSideEffects = 0, mayStore = 1 in {
8597     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8598                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8599                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8600                Sched<[MR]>;
8601     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8602                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8603                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8604                 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8605   }
8606 }
8607 }
8608
8609 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8610                                SchedWrite Sched> {
8611   let hasSideEffects = 0, Uses = [MXCSR] in
8612   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8613                    (outs _dest.RC:$dst),
8614                    (ins _src.RC:$src1, i32u8imm:$src2),
8615                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8616                    EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8617 }
8618
8619 let Predicates = [HasAVX512] in {
8620   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8621                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8622                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8623                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8624
8625   def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8626             (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8627 }
8628
8629 let Predicates = [HasVLX] in {
8630   defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8631                                        WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8632                                        EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8633   defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8634                                        WriteCvtPS2PH, WriteCvtPS2PHSt>,
8635                                        EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8636
8637   def : Pat<(store (f64 (extractelt
8638                          (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8639                          (iPTR 0))), addr:$dst),
8640             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8641   def : Pat<(store (i64 (extractelt
8642                          (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8643                          (iPTR 0))), addr:$dst),
8644             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8645   def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8646             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8647 }
8648
8649 //  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8650 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8651                             string OpcodeStr, Domain d,
8652                             X86FoldableSchedWrite sched = WriteFComX> {
8653   let hasSideEffects = 0, Uses = [MXCSR] in
8654   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8655                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8656                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8657 }
8658
8659 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8660   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
8661                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8662   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
8663                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8664   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
8665                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8666   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
8667                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8668 }
8669
8670 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8671   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
8672                                  "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8673                                  EVEX_CD8<32, CD8VT1>;
8674   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
8675                                   "ucomisd", SSEPackedDouble>, PD, EVEX,
8676                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8677   defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
8678                                  "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8679                                  EVEX_CD8<32, CD8VT1>;
8680   defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
8681                                  "comisd", SSEPackedDouble>, PD, EVEX,
8682                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8683   let isCodeGenOnly = 1 in {
8684     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8685                           sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8686                           EVEX_CD8<32, CD8VT1>;
8687     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8688                           sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
8689                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8690
8691     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8692                           sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8693                           EVEX_CD8<32, CD8VT1>;
8694     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8695                           sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
8696                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8697   }
8698 }
8699
8700 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8701 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8702                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8703   let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8704   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8705                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8706                            "$src2, $src1", "$src1, $src2",
8707                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8708                            EVEX_4V, VEX_LIG, Sched<[sched]>;
8709   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8710                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8711                          "$src2, $src1", "$src1, $src2",
8712                          (OpNode (_.VT _.RC:$src1),
8713                           (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
8714                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8715 }
8716 }
8717
8718 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8719                                f32x_info>, EVEX_CD8<32, CD8VT1>,
8720                                T8PD;
8721 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8722                                f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8723                                T8PD;
8724 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8725                                  SchedWriteFRsqrt.Scl, f32x_info>,
8726                                  EVEX_CD8<32, CD8VT1>, T8PD;
8727 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8728                                  SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8729                                  EVEX_CD8<64, CD8VT1>, T8PD;
8730
8731 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8732 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8733                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8734   let ExeDomain = _.ExeDomain in {
8735   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8736                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8737                          (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8738                          Sched<[sched]>;
8739   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8740                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8741                          (OpNode (_.VT
8742                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8743                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8744   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8745                           (ins _.ScalarMemOp:$src), OpcodeStr,
8746                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8747                           (OpNode (_.VT
8748                             (_.BroadcastLdFrag addr:$src)))>,
8749                           EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8750   }
8751 }
8752
8753 let Uses = [MXCSR] in
8754 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8755                                 X86SchedWriteWidths sched> {
8756   defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8757                            v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8758   defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8759                            v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8760
8761   // Define only if AVX512VL feature is present.
8762   let Predicates = [HasVLX] in {
8763     defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8764                                 OpNode, sched.XMM, v4f32x_info>,
8765                                EVEX_V128, EVEX_CD8<32, CD8VF>;
8766     defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8767                                 OpNode, sched.YMM, v8f32x_info>,
8768                                EVEX_V256, EVEX_CD8<32, CD8VF>;
8769     defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8770                                 OpNode, sched.XMM, v2f64x_info>,
8771                                EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8772     defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8773                                 OpNode, sched.YMM, v4f64x_info>,
8774                                EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8775   }
8776 }
8777
8778 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8779 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8780
8781 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8782 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8783                          SDNode OpNode, SDNode OpNodeSAE,
8784                          X86FoldableSchedWrite sched> {
8785   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8786   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8787                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8788                            "$src2, $src1", "$src1, $src2",
8789                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8790                            Sched<[sched]>, SIMD_EXC;
8791
8792   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8793                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8794                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8795                             (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8796                             EVEX_B, Sched<[sched]>;
8797
8798   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8799                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8800                          "$src2, $src1", "$src1, $src2",
8801                          (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
8802                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8803   }
8804 }
8805
8806 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8807                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8808   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8809                            sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8810   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8811                            sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8812 }
8813
8814 let Predicates = [HasERI] in {
8815   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8816                                SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8817   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8818                                SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8819 }
8820
8821 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8822                               SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8823 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8824
8825 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8826                          SDNode OpNode, X86FoldableSchedWrite sched> {
8827   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8828   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8829                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8830                          (OpNode (_.VT _.RC:$src))>,
8831                          Sched<[sched]>;
8832
8833   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8834                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8835                          (OpNode (_.VT
8836                              (bitconvert (_.LdFrag addr:$src))))>,
8837                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8838
8839   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8840                          (ins _.ScalarMemOp:$src), OpcodeStr,
8841                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8842                          (OpNode (_.VT
8843                                   (_.BroadcastLdFrag addr:$src)))>,
8844                          EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8845   }
8846 }
8847 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8848                          SDNode OpNode, X86FoldableSchedWrite sched> {
8849   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
8850   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8851                         (ins _.RC:$src), OpcodeStr,
8852                         "{sae}, $src", "$src, {sae}",
8853                         (OpNode (_.VT _.RC:$src))>,
8854                         EVEX_B, Sched<[sched]>;
8855 }
8856
8857 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8858                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8859    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8860               avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8861               T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8862    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8863               avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8864               T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8865 }
8866
8867 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8868                                   SDNode OpNode, X86SchedWriteWidths sched> {
8869   // Define only if AVX512VL feature is present.
8870   let Predicates = [HasVLX] in {
8871     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8872                                 sched.XMM>,
8873                                 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8874     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8875                                 sched.YMM>,
8876                                 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8877     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8878                                 sched.XMM>,
8879                                 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8880     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8881                                 sched.YMM>,
8882                                 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8883   }
8884 }
8885
8886 let Predicates = [HasERI] in {
8887  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8888                             SchedWriteFRsqrt>, EVEX;
8889  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8890                             SchedWriteFRcp>, EVEX;
8891  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8892                             SchedWriteFAdd>, EVEX;
8893 }
8894 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8895                             SchedWriteFRnd>,
8896                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8897                                           SchedWriteFRnd>, EVEX;
8898
8899 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8900                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8901   let ExeDomain = _.ExeDomain in
8902   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8903                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8904                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8905                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8906 }
8907
8908 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8909                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8910   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8911   defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
8912                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8913                          (_.VT (any_fsqrt _.RC:$src)),
8914                          (_.VT (fsqrt _.RC:$src))>, EVEX,
8915                          Sched<[sched]>;
8916   defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8917                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8918                          (any_fsqrt (_.VT (_.LdFrag addr:$src))),
8919                          (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
8920                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8921   defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8922                           (ins _.ScalarMemOp:$src), OpcodeStr,
8923                           "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8924                           (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
8925                           (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
8926                           EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8927   }
8928 }
8929
8930 let Uses = [MXCSR], mayRaiseFPException = 1 in
8931 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8932                                   X86SchedWriteSizes sched> {
8933   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8934                                 sched.PS.ZMM, v16f32_info>,
8935                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8936   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8937                                 sched.PD.ZMM, v8f64_info>,
8938                                 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8939   // Define only if AVX512VL feature is present.
8940   let Predicates = [HasVLX] in {
8941     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8942                                      sched.PS.XMM, v4f32x_info>,
8943                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8944     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8945                                      sched.PS.YMM, v8f32x_info>,
8946                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8947     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8948                                      sched.PD.XMM, v2f64x_info>,
8949                                      EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8950     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8951                                      sched.PD.YMM, v4f64x_info>,
8952                                      EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8953   }
8954 }
8955
8956 let Uses = [MXCSR] in
8957 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8958                                         X86SchedWriteSizes sched> {
8959   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8960                                       sched.PS.ZMM, v16f32_info>,
8961                                       EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8962   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8963                                       sched.PD.ZMM, v8f64_info>,
8964                                       EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8965 }
8966
8967 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8968                               X86VectorVTInfo _, string Name> {
8969   let ExeDomain = _.ExeDomain in {
8970     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8971                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8972                          "$src2, $src1", "$src1, $src2",
8973                          (X86fsqrts (_.VT _.RC:$src1),
8974                                     (_.VT _.RC:$src2))>,
8975                          Sched<[sched]>, SIMD_EXC;
8976     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8977                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8978                          "$src2, $src1", "$src1, $src2",
8979                          (X86fsqrts (_.VT _.RC:$src1),
8980                                     (_.ScalarIntMemFrags addr:$src2))>,
8981                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8982     let Uses = [MXCSR] in
8983     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8984                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8985                          "$rc, $src2, $src1", "$src1, $src2, $rc",
8986                          (X86fsqrtRnds (_.VT _.RC:$src1),
8987                                      (_.VT _.RC:$src2),
8988                                      (i32 timm:$rc))>,
8989                          EVEX_B, EVEX_RC, Sched<[sched]>;
8990
8991     let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8992       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8993                 (ins _.FRC:$src1, _.FRC:$src2),
8994                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8995                 Sched<[sched]>, SIMD_EXC;
8996       let mayLoad = 1 in
8997         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8998                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8999                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9000                   Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9001     }
9002   }
9003
9004   let Predicates = [HasAVX512] in {
9005     def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9006               (!cast<Instruction>(Name#Zr)
9007                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9008   }
9009
9010   let Predicates = [HasAVX512, OptForSize] in {
9011     def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9012               (!cast<Instruction>(Name#Zm)
9013                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9014   }
9015 }
9016
9017 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9018                                   X86SchedWriteSizes sched> {
9019   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9020                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9021   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9022                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9023 }
9024
9025 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9026              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9027
9028 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9029
9030 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9031                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9032   let ExeDomain = _.ExeDomain in {
9033   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9034                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9035                            "$src3, $src2, $src1", "$src1, $src2, $src3",
9036                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9037                            (i32 timm:$src3)))>,
9038                            Sched<[sched]>, SIMD_EXC;
9039
9040   let Uses = [MXCSR] in
9041   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9042                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9043                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9044                          (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9045                          (i32 timm:$src3)))>, EVEX_B,
9046                          Sched<[sched]>;
9047
9048   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9049                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9050                          OpcodeStr,
9051                          "$src3, $src2, $src1", "$src1, $src2, $src3",
9052                          (_.VT (X86RndScales _.RC:$src1,
9053                                 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9054                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9055
9056   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9057     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9058                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9059                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9060                []>, Sched<[sched]>, SIMD_EXC;
9061
9062     let mayLoad = 1 in
9063       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9064                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9065                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9066                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9067   }
9068   }
9069
9070   let Predicates = [HasAVX512] in {
9071     def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9072               (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9073                _.FRC:$src1, timm:$src2))>;
9074   }
9075
9076   let Predicates = [HasAVX512, OptForSize] in {
9077     def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9078               (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9079                addr:$src1, timm:$src2))>;
9080   }
9081 }
9082
9083 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9084                                            SchedWriteFRnd.Scl, f32x_info>,
9085                                            AVX512AIi8Base, EVEX_4V, VEX_LIG,
9086                                            EVEX_CD8<32, CD8VT1>;
9087
9088 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9089                                            SchedWriteFRnd.Scl, f64x_info>,
9090                                            VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9091                                            EVEX_CD8<64, CD8VT1>;
9092
9093 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9094                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9095                                 dag OutMask, Predicate BasePredicate> {
9096   let Predicates = [BasePredicate] in {
9097     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9098                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9099                (extractelt _.VT:$dst, (iPTR 0))))),
9100               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9101                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9102
9103     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9104                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9105                ZeroFP))),
9106               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9107                OutMask, _.VT:$src2, _.VT:$src1)>;
9108   }
9109 }
9110
9111 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9112                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9113                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9114 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9115                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9116                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9117
9118
9119 //-------------------------------------------------
9120 // Integer truncate and extend operations
9121 //-------------------------------------------------
9122
9123 // PatFrags that contain a select and a truncate op. The take operands in the
9124 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9125 // either to the multiclasses.
9126 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9127                            (vselect_mask node:$mask,
9128                                          (trunc node:$src), node:$src0)>;
9129 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9130                             (vselect_mask node:$mask,
9131                                           (X86vtruncs node:$src), node:$src0)>;
9132 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9133                              (vselect_mask node:$mask,
9134                                            (X86vtruncus node:$src), node:$src0)>;
9135
9136 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9137                               SDPatternOperator MaskNode,
9138                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9139                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9140   let ExeDomain = DestInfo.ExeDomain in {
9141   def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9142              (ins SrcInfo.RC:$src),
9143              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9144              [(set DestInfo.RC:$dst,
9145                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9146              EVEX, Sched<[sched]>;
9147   let Constraints = "$src0 = $dst" in
9148   def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9149              (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9150              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9151              [(set DestInfo.RC:$dst,
9152                    (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9153                              (DestInfo.VT DestInfo.RC:$src0),
9154                              SrcInfo.KRCWM:$mask))]>,
9155              EVEX, EVEX_K, Sched<[sched]>;
9156   def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9157              (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9158              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9159              [(set DestInfo.RC:$dst,
9160                    (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9161                              DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9162              EVEX, EVEX_KZ, Sched<[sched]>;
9163   }
9164
9165   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9166     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9167                (ins x86memop:$dst, SrcInfo.RC:$src),
9168                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9169                EVEX, Sched<[sched.Folded]>;
9170
9171     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9172                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9173                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9174                EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9175   }//mayStore = 1, hasSideEffects = 0
9176 }
9177
9178 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9179                                     X86VectorVTInfo DestInfo,
9180                                     PatFrag truncFrag, PatFrag mtruncFrag,
9181                                     string Name> {
9182
9183   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9184             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9185                                     addr:$dst, SrcInfo.RC:$src)>;
9186
9187   def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9188                         SrcInfo.KRCWM:$mask),
9189             (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9190                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9191 }
9192
9193 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9194                         SDNode OpNode256, SDNode OpNode512,
9195                         SDPatternOperator MaskNode128,
9196                         SDPatternOperator MaskNode256,
9197                         SDPatternOperator MaskNode512,
9198                         X86FoldableSchedWrite sched,
9199                         AVX512VLVectorVTInfo VTSrcInfo,
9200                         X86VectorVTInfo DestInfoZ128,
9201                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9202                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9203                         X86MemOperand x86memopZ, PatFrag truncFrag,
9204                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9205
9206   let Predicates = [HasVLX, prd] in {
9207     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9208                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9209                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9210                              truncFrag, mtruncFrag, NAME>, EVEX_V128;
9211
9212     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9213                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9214                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9215                              truncFrag, mtruncFrag, NAME>, EVEX_V256;
9216   }
9217   let Predicates = [prd] in
9218     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9219                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9220                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9221                              truncFrag, mtruncFrag, NAME>, EVEX_V512;
9222 }
9223
9224 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9225                            SDPatternOperator MaskNode,
9226                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9227                            PatFrag MaskedStoreNode, SDNode InVecNode,
9228                            SDPatternOperator InVecMaskNode> {
9229   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9230                           InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9231                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
9232                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9233                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9234 }
9235
9236 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9237                            SDPatternOperator MaskNode,
9238                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9239                            PatFrag MaskedStoreNode, SDNode InVecNode,
9240                            SDPatternOperator InVecMaskNode> {
9241   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9242                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9243                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
9244                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9245                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9246 }
9247
9248 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9249                            SDPatternOperator MaskNode,
9250                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9251                            PatFrag MaskedStoreNode, SDNode InVecNode,
9252                            SDPatternOperator InVecMaskNode> {
9253   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9254                           InVecMaskNode, MaskNode, MaskNode, sched,
9255                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
9256                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9257                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9258 }
9259
9260 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9261                            SDPatternOperator MaskNode,
9262                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9263                            PatFrag MaskedStoreNode, SDNode InVecNode,
9264                            SDPatternOperator InVecMaskNode> {
9265   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9266                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9267                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
9268                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9269                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9270 }
9271
9272 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9273                            SDPatternOperator MaskNode,
9274                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9275                            PatFrag MaskedStoreNode, SDNode InVecNode,
9276                            SDPatternOperator InVecMaskNode> {
9277   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9278                           InVecMaskNode, MaskNode, MaskNode, sched,
9279                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
9280                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9281                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9282 }
9283
9284 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9285                            SDPatternOperator MaskNode,
9286                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9287                            PatFrag MaskedStoreNode, SDNode InVecNode,
9288                            SDPatternOperator InVecMaskNode> {
9289   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9290                           InVecMaskNode, MaskNode, MaskNode, sched,
9291                           avx512vl_i16_info, v16i8x_info, v16i8x_info,
9292                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9293                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9294 }
9295
9296 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9297                                   WriteShuffle256, truncstorevi8,
9298                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9299 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9300                                   WriteShuffle256, truncstore_s_vi8,
9301                                   masked_truncstore_s_vi8, X86vtruncs,
9302                                   X86vmtruncs>;
9303 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9304                                   select_truncus, WriteShuffle256,
9305                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9306                                   X86vtruncus, X86vmtruncus>;
9307
9308 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9309                                   WriteShuffle256, truncstorevi16,
9310                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9311 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9312                                   WriteShuffle256, truncstore_s_vi16,
9313                                   masked_truncstore_s_vi16, X86vtruncs,
9314                                   X86vmtruncs>;
9315 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9316                                   select_truncus, WriteShuffle256,
9317                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9318                                   X86vtruncus, X86vmtruncus>;
9319
9320 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9321                                   WriteShuffle256, truncstorevi32,
9322                                   masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9323 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9324                                   WriteShuffle256, truncstore_s_vi32,
9325                                   masked_truncstore_s_vi32, X86vtruncs,
9326                                   X86vmtruncs>;
9327 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9328                                   select_truncus, WriteShuffle256,
9329                                   truncstore_us_vi32, masked_truncstore_us_vi32,
9330                                   X86vtruncus, X86vmtruncus>;
9331
9332 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9333                                   WriteShuffle256, truncstorevi8,
9334                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9335 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9336                                   WriteShuffle256, truncstore_s_vi8,
9337                                   masked_truncstore_s_vi8, X86vtruncs,
9338                                   X86vmtruncs>;
9339 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9340                                   select_truncus, WriteShuffle256,
9341                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9342                                   X86vtruncus, X86vmtruncus>;
9343
9344 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9345                                   WriteShuffle256, truncstorevi16,
9346                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9347 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9348                                   WriteShuffle256, truncstore_s_vi16,
9349                                   masked_truncstore_s_vi16, X86vtruncs,
9350                                   X86vmtruncs>;
9351 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9352                                   select_truncus, WriteShuffle256,
9353                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9354                                   X86vtruncus, X86vmtruncus>;
9355
9356 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9357                                   WriteShuffle256, truncstorevi8,
9358                                   masked_truncstorevi8, X86vtrunc,
9359                                   X86vmtrunc>;
9360 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9361                                   WriteShuffle256, truncstore_s_vi8,
9362                                   masked_truncstore_s_vi8, X86vtruncs,
9363                                   X86vmtruncs>;
9364 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9365                                   select_truncus, WriteShuffle256,
9366                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9367                                   X86vtruncus, X86vmtruncus>;
9368
9369 let Predicates = [HasAVX512, NoVLX] in {
9370 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9371          (v8i16 (EXTRACT_SUBREG
9372                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9373                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9374 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9375          (v4i32 (EXTRACT_SUBREG
9376                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9377                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
9378 }
9379
9380 let Predicates = [HasBWI, NoVLX] in {
9381 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9382          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9383                                             VR256X:$src, sub_ymm))), sub_xmm))>;
9384 }
9385
9386 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9387 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9388                            X86VectorVTInfo DestInfo,
9389                            X86VectorVTInfo SrcInfo> {
9390   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9391                                  DestInfo.RC:$src0,
9392                                  SrcInfo.KRCWM:$mask)),
9393             (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9394                                                  SrcInfo.KRCWM:$mask,
9395                                                  SrcInfo.RC:$src)>;
9396
9397   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9398                                  DestInfo.ImmAllZerosV,
9399                                  SrcInfo.KRCWM:$mask)),
9400             (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9401                                                   SrcInfo.RC:$src)>;
9402 }
9403
9404 let Predicates = [HasVLX] in {
9405 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9406 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9407 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9408 }
9409
9410 let Predicates = [HasAVX512] in {
9411 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9412 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9413 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9414
9415 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9416 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9417 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9418
9419 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9420 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9421 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9422 }
9423
9424 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9425               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9426               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9427   let ExeDomain = DestInfo.ExeDomain in {
9428   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9429                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9430                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9431                   EVEX, Sched<[sched]>;
9432
9433   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9434                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9435                   (DestInfo.VT (LdFrag addr:$src))>,
9436                 EVEX, Sched<[sched.Folded]>;
9437   }
9438 }
9439
9440 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9441           SDNode OpNode, SDNode InVecNode, string ExtTy,
9442           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9443   let Predicates = [HasVLX, HasBWI] in {
9444     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9445                     v16i8x_info, i64mem, LdFrag, InVecNode>,
9446                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9447
9448     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9449                     v16i8x_info, i128mem, LdFrag, OpNode>,
9450                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9451   }
9452   let Predicates = [HasBWI] in {
9453     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9454                     v32i8x_info, i256mem, LdFrag, OpNode>,
9455                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9456   }
9457 }
9458
9459 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9460           SDNode OpNode, SDNode InVecNode, string ExtTy,
9461           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9462   let Predicates = [HasVLX, HasAVX512] in {
9463     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9464                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9465                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9466
9467     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9468                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9469                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9470   }
9471   let Predicates = [HasAVX512] in {
9472     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9473                    v16i8x_info, i128mem, LdFrag, OpNode>,
9474                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9475   }
9476 }
9477
9478 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9479           SDNode OpNode, SDNode InVecNode, string ExtTy,
9480           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9481   let Predicates = [HasVLX, HasAVX512] in {
9482     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9483                    v16i8x_info, i16mem, LdFrag, InVecNode>,
9484                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9485
9486     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9487                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9488                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9489   }
9490   let Predicates = [HasAVX512] in {
9491     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9492                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9493                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9494   }
9495 }
9496
9497 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9498          SDNode OpNode, SDNode InVecNode, string ExtTy,
9499          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9500   let Predicates = [HasVLX, HasAVX512] in {
9501     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9502                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9503                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9504
9505     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9506                    v8i16x_info, i128mem, LdFrag, OpNode>,
9507                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9508   }
9509   let Predicates = [HasAVX512] in {
9510     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9511                    v16i16x_info, i256mem, LdFrag, OpNode>,
9512                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9513   }
9514 }
9515
9516 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9517          SDNode OpNode, SDNode InVecNode, string ExtTy,
9518          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9519   let Predicates = [HasVLX, HasAVX512] in {
9520     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9521                    v8i16x_info, i32mem, LdFrag, InVecNode>,
9522                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9523
9524     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9525                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9526                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9527   }
9528   let Predicates = [HasAVX512] in {
9529     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9530                    v8i16x_info, i128mem, LdFrag, OpNode>,
9531                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9532   }
9533 }
9534
9535 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9536          SDNode OpNode, SDNode InVecNode, string ExtTy,
9537          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9538
9539   let Predicates = [HasVLX, HasAVX512] in {
9540     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9541                    v4i32x_info, i64mem, LdFrag, InVecNode>,
9542                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9543
9544     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9545                    v4i32x_info, i128mem, LdFrag, OpNode>,
9546                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9547   }
9548   let Predicates = [HasAVX512] in {
9549     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9550                    v8i32x_info, i256mem, LdFrag, OpNode>,
9551                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9552   }
9553 }
9554
9555 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9556 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9557 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9558 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9559 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9560 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9561
9562 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9563 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9564 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9565 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9566 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9567 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9568
9569
9570 // Patterns that we also need any extend versions of. aext_vector_inreg
9571 // is currently legalized to zext_vector_inreg.
9572 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9573   // 256-bit patterns
9574   let Predicates = [HasVLX, HasBWI] in {
9575     def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9576               (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9577   }
9578
9579   let Predicates = [HasVLX] in {
9580     def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9581               (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9582
9583     def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9584               (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9585   }
9586
9587   // 512-bit patterns
9588   let Predicates = [HasBWI] in {
9589     def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9590               (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9591   }
9592   let Predicates = [HasAVX512] in {
9593     def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9594               (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9595     def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9596               (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9597
9598     def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9599               (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9600
9601     def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9602               (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9603   }
9604 }
9605
9606 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9607                                  SDNode InVecOp> :
9608     AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9609   // 128-bit patterns
9610   let Predicates = [HasVLX, HasBWI] in {
9611   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9612             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9613   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9614             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9615   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9616             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9617   }
9618   let Predicates = [HasVLX] in {
9619   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9620             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9621   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9622             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9623
9624   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9625             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9626
9627   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9628             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9629   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9630             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9631   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9632             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9633
9634   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9635             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9636   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9637             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9638
9639   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9640             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9641   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9642             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9643   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9644             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9645   }
9646   let Predicates = [HasVLX] in {
9647   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9648             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9649   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9650             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9651   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9652             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9653
9654   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9655             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9656   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9657             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9658
9659   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9660             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9661   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9662             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9663   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9664             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9665   }
9666   // 512-bit patterns
9667   let Predicates = [HasAVX512] in {
9668   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9669             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9670   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9671             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9672   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9673             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9674   }
9675 }
9676
9677 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9678 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9679
9680 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9681 // ext+trunc aggressively making it impossible to legalize the DAG to this
9682 // pattern directly.
9683 let Predicates = [HasAVX512, NoBWI] in {
9684 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9685          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9686 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9687          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9688 }
9689
9690 //===----------------------------------------------------------------------===//
9691 // GATHER - SCATTER Operations
9692
9693 // FIXME: Improve scheduling of gather/scatter instructions.
9694 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9695                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9696   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9697       ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
9698   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9699             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9700             !strconcat(OpcodeStr#_.Suffix,
9701             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9702             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9703 }
9704
9705 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9706                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9707   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
9708                                       vy512xmem>, EVEX_V512, VEX_W;
9709   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
9710                                       vz512mem>, EVEX_V512, VEX_W;
9711 let Predicates = [HasVLX] in {
9712   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9713                               vx256xmem>, EVEX_V256, VEX_W;
9714   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
9715                               vy256xmem>, EVEX_V256, VEX_W;
9716   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9717                               vx128xmem>, EVEX_V128, VEX_W;
9718   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9719                               vx128xmem>, EVEX_V128, VEX_W;
9720 }
9721 }
9722
9723 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9724                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9725   defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9726                                        EVEX_V512;
9727   defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9728                                        EVEX_V512;
9729 let Predicates = [HasVLX] in {
9730   defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9731                                           vy256xmem>, EVEX_V256;
9732   defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9733                                           vy128xmem>, EVEX_V256;
9734   defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9735                                           vx128xmem>, EVEX_V128;
9736   defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9737                                           vx64xmem, VK2WM>, EVEX_V128;
9738 }
9739 }
9740
9741
9742 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9743                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9744
9745 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9746                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9747
9748 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9749                           X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9750
9751 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 
9752     hasSideEffects = 0 in
9753
9754   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9755             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9756             !strconcat(OpcodeStr#_.Suffix,
9757             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9758             []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9759             Sched<[WriteStore]>;
9760 }
9761
9762 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9763                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9764   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
9765                                       vy512xmem>, EVEX_V512, VEX_W;
9766   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
9767                                       vz512mem>, EVEX_V512, VEX_W;
9768 let Predicates = [HasVLX] in {
9769   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9770                               vx256xmem>, EVEX_V256, VEX_W;
9771   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
9772                               vy256xmem>, EVEX_V256, VEX_W;
9773   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9774                               vx128xmem>, EVEX_V128, VEX_W;
9775   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9776                               vx128xmem>, EVEX_V128, VEX_W;
9777 }
9778 }
9779
9780 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9781                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9782   defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9783                                        EVEX_V512;
9784   defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9785                                        EVEX_V512;
9786 let Predicates = [HasVLX] in {
9787   defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9788                                           vy256xmem>, EVEX_V256;
9789   defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9790                                           vy128xmem>, EVEX_V256;
9791   defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9792                                           vx128xmem>, EVEX_V128;
9793   defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9794                                           vx64xmem, VK2WM>, EVEX_V128;
9795 }
9796 }
9797
9798 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9799                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9800
9801 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9802                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9803
9804 // prefetch
9805 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9806                        RegisterClass KRC, X86MemOperand memop> {
9807   let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9808   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9809             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9810             EVEX, EVEX_K, Sched<[WriteLoad]>;
9811 }
9812
9813 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9814                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9815
9816 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9817                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9818
9819 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9820                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9821
9822 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9823                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9824
9825 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9826                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9827
9828 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9829                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9830
9831 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9832                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9833
9834 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9835                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9836
9837 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9838                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9839
9840 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9841                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9842
9843 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9844                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9845
9846 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9847                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9848
9849 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9850                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9851
9852 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9853                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9854
9855 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9856                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9857
9858 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9859                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9860
9861 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9862 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9863                   !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9864                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9865                   EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9866 }
9867
9868 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9869                                  string OpcodeStr, Predicate prd> {
9870 let Predicates = [prd] in
9871   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9872
9873   let Predicates = [prd, HasVLX] in {
9874     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9875     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9876   }
9877 }
9878
9879 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9880 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9881 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9882 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9883
9884 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9885     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9886                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9887                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9888                         EVEX, Sched<[WriteMove]>;
9889 }
9890
9891 // Use 512bit version to implement 128/256 bit in case NoVLX.
9892 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9893                                            X86VectorVTInfo _,
9894                                            string Name> {
9895
9896   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9897             (_.KVT (COPY_TO_REGCLASS
9898                      (!cast<Instruction>(Name#"Zrr")
9899                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9900                                       _.RC:$src, _.SubRegIdx)),
9901                    _.KRC))>;
9902 }
9903
9904 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9905                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9906   let Predicates = [prd] in
9907     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9908                                             EVEX_V512;
9909
9910   let Predicates = [prd, HasVLX] in {
9911     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9912                                               EVEX_V256;
9913     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9914                                                EVEX_V128;
9915   }
9916   let Predicates = [prd, NoVLX] in {
9917     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9918     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9919   }
9920 }
9921
9922 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9923                                               avx512vl_i8_info, HasBWI>;
9924 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9925                                               avx512vl_i16_info, HasBWI>, VEX_W;
9926 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9927                                               avx512vl_i32_info, HasDQI>;
9928 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9929                                               avx512vl_i64_info, HasDQI>, VEX_W;
9930
9931 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9932 // is available, but BWI is not. We can't handle this in lowering because
9933 // a target independent DAG combine likes to combine sext and trunc.
9934 let Predicates = [HasDQI, NoBWI] in {
9935   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9936             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9937   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9938             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9939 }
9940
9941 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9942   def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9943             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9944 }
9945
9946 //===----------------------------------------------------------------------===//
9947 // AVX-512 - COMPRESS and EXPAND
9948 //
9949
9950 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9951                                  string OpcodeStr, X86FoldableSchedWrite sched> {
9952   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9953               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9954               (null_frag)>, AVX5128IBase,
9955               Sched<[sched]>;
9956
9957   let mayStore = 1, hasSideEffects = 0 in
9958   def mr : AVX5128I<opc, MRMDestMem, (outs),
9959               (ins _.MemOp:$dst, _.RC:$src),
9960               OpcodeStr # "\t{$src, $dst|$dst, $src}",
9961               []>, EVEX_CD8<_.EltSize, CD8VT1>,
9962               Sched<[sched.Folded]>;
9963
9964   def mrk : AVX5128I<opc, MRMDestMem, (outs),
9965               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9966               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9967               []>,
9968               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9969               Sched<[sched.Folded]>;
9970 }
9971
9972 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9973   def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9974             (!cast<Instruction>(Name#_.ZSuffix#mrk)
9975                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9976
9977   def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9978             (!cast<Instruction>(Name#_.ZSuffix#rrk)
9979                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9980   def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9981             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
9982                             _.KRCWM:$mask, _.RC:$src)>;
9983 }
9984
9985 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9986                                  X86FoldableSchedWrite sched,
9987                                  AVX512VLVectorVTInfo VTInfo,
9988                                  Predicate Pred = HasAVX512> {
9989   let Predicates = [Pred] in
9990   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9991            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9992
9993   let Predicates = [Pred, HasVLX] in {
9994     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9995                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9996     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9997                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9998   }
9999 }
10000
10001 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10002 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10003                                           avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10004 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10005                                           avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10006 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10007                                           avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10008 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10009                                           avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10010
10011 // expand
10012 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10013                                  string OpcodeStr, X86FoldableSchedWrite sched> {
10014   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10015               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10016               (null_frag)>, AVX5128IBase,
10017               Sched<[sched]>;
10018
10019   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10020               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10021               (null_frag)>,
10022             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10023             Sched<[sched.Folded, sched.ReadAfterFold]>;
10024 }
10025
10026 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10027
10028   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10029             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10030                                         _.KRCWM:$mask, addr:$src)>;
10031
10032   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10033             (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10034                                         _.KRCWM:$mask, addr:$src)>;
10035
10036   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10037                                                (_.VT _.RC:$src0))),
10038             (!cast<Instruction>(Name#_.ZSuffix#rmk)
10039                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10040
10041   def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10042             (!cast<Instruction>(Name#_.ZSuffix#rrk)
10043                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10044   def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10045             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10046                             _.KRCWM:$mask, _.RC:$src)>;
10047 }
10048
10049 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10050                                X86FoldableSchedWrite sched,
10051                                AVX512VLVectorVTInfo VTInfo,
10052                                Predicate Pred = HasAVX512> {
10053   let Predicates = [Pred] in
10054   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10055            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10056
10057   let Predicates = [Pred, HasVLX] in {
10058     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10059                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10060     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10061                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10062   }
10063 }
10064
10065 // FIXME: Is there a better scheduler class for VPEXPAND?
10066 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10067                                       avx512vl_i32_info>, EVEX;
10068 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10069                                       avx512vl_i64_info>, EVEX, VEX_W;
10070 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10071                                       avx512vl_f32_info>, EVEX;
10072 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10073                                       avx512vl_f64_info>, EVEX, VEX_W;
10074
10075 //handle instruction  reg_vec1 = op(reg_vec,imm)
10076 //                               op(mem_vec,imm)
10077 //                               op(broadcast(eltVt),imm)
10078 //all instruction created with FROUND_CURRENT
10079 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10080                                       SDNode OpNode, SDNode MaskOpNode,
10081                                       X86FoldableSchedWrite sched,
10082                                       X86VectorVTInfo _> {
10083   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10084   defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10085                       (ins _.RC:$src1, i32u8imm:$src2),
10086                       OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10087                       (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10088                       (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10089                       Sched<[sched]>;
10090   defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10091                     (ins _.MemOp:$src1, i32u8imm:$src2),
10092                     OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10093                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10094                             (i32 timm:$src2)),
10095                     (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10096                                 (i32 timm:$src2))>,
10097                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10098   defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10099                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10100                     OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10101                     "${src1}"#_.BroadcastStr#", $src2",
10102                     (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10103                             (i32 timm:$src2)),
10104                     (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10105                                 (i32 timm:$src2))>, EVEX_B,
10106                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10107   }
10108 }
10109
10110 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10111 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10112                                           SDNode OpNode, X86FoldableSchedWrite sched,
10113                                           X86VectorVTInfo _> {
10114   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10115   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10116                       (ins _.RC:$src1, i32u8imm:$src2),
10117                       OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10118                       "$src1, {sae}, $src2",
10119                       (OpNode (_.VT _.RC:$src1),
10120                               (i32 timm:$src2))>,
10121                       EVEX_B, Sched<[sched]>;
10122 }
10123
10124 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10125             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10126             SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10127             Predicate prd>{
10128   let Predicates = [prd] in {
10129     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10130                                            sched.ZMM, _.info512>,
10131                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10132                                                sched.ZMM, _.info512>, EVEX_V512;
10133   }
10134   let Predicates = [prd, HasVLX] in {
10135     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10136                                            sched.XMM, _.info128>, EVEX_V128;
10137     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10138                                            sched.YMM, _.info256>, EVEX_V256;
10139   }
10140 }
10141
10142 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10143 //                               op(reg_vec2,mem_vec,imm)
10144 //                               op(reg_vec2,broadcast(eltVt),imm)
10145 //all instruction created with FROUND_CURRENT
10146 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10147                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10148   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10149   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10150                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10151                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10152                       (OpNode (_.VT _.RC:$src1),
10153                               (_.VT _.RC:$src2),
10154                               (i32 timm:$src3))>,
10155                       Sched<[sched]>;
10156   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10157                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10158                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10159                     (OpNode (_.VT _.RC:$src1),
10160                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
10161                             (i32 timm:$src3))>,
10162                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10163   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10164                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10165                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10166                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10167                     (OpNode (_.VT _.RC:$src1),
10168                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10169                             (i32 timm:$src3))>, EVEX_B,
10170                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10171   }
10172 }
10173
10174 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10175 //                               op(reg_vec2,mem_vec,imm)
10176 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10177                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10178                               X86VectorVTInfo SrcInfo>{
10179   let ExeDomain = DestInfo.ExeDomain in {
10180   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10181                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10182                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10183                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10184                                (SrcInfo.VT SrcInfo.RC:$src2),
10185                                (i8 timm:$src3)))>,
10186                   Sched<[sched]>;
10187   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10188                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10189                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10190                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10191                              (SrcInfo.VT (bitconvert
10192                                                 (SrcInfo.LdFrag addr:$src2))),
10193                              (i8 timm:$src3)))>,
10194                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10195   }
10196 }
10197
10198 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10199 //                               op(reg_vec2,mem_vec,imm)
10200 //                               op(reg_vec2,broadcast(eltVt),imm)
10201 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10202                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10203   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10204
10205   let ExeDomain = _.ExeDomain in
10206   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10207                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10208                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10209                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10210                     (OpNode (_.VT _.RC:$src1),
10211                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10212                             (i8 timm:$src3))>, EVEX_B,
10213                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10214 }
10215
10216 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10217 //                                      op(reg_vec2,mem_scalar,imm)
10218 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10219                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10220   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10221   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10222                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10223                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10224                       (OpNode (_.VT _.RC:$src1),
10225                               (_.VT _.RC:$src2),
10226                               (i32 timm:$src3))>,
10227                       Sched<[sched]>;
10228   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10229                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10230                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10231                     (OpNode (_.VT _.RC:$src1),
10232                             (_.ScalarIntMemFrags addr:$src2),
10233                             (i32 timm:$src3))>,
10234                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10235   }
10236 }
10237
10238 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10239 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10240                                     SDNode OpNode, X86FoldableSchedWrite sched,
10241                                     X86VectorVTInfo _> {
10242   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10243   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10244                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10245                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10246                       "$src1, $src2, {sae}, $src3",
10247                       (OpNode (_.VT _.RC:$src1),
10248                               (_.VT _.RC:$src2),
10249                               (i32 timm:$src3))>,
10250                       EVEX_B, Sched<[sched]>;
10251 }
10252
10253 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10254 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10255                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10256   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10257   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10258                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10259                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10260                       "$src1, $src2, {sae}, $src3",
10261                       (OpNode (_.VT _.RC:$src1),
10262                               (_.VT _.RC:$src2),
10263                               (i32 timm:$src3))>,
10264                       EVEX_B, Sched<[sched]>;
10265 }
10266
10267 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10268             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10269             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10270   let Predicates = [prd] in {
10271     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10272                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10273                                   EVEX_V512;
10274
10275   }
10276   let Predicates = [prd, HasVLX] in {
10277     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10278                                   EVEX_V128;
10279     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10280                                   EVEX_V256;
10281   }
10282 }
10283
10284 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10285                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10286                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10287   let Predicates = [Pred] in {
10288     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10289                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10290   }
10291   let Predicates = [Pred, HasVLX] in {
10292     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10293                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10294     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10295                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10296   }
10297 }
10298
10299 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10300                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10301                                   Predicate Pred = HasAVX512> {
10302   let Predicates = [Pred] in {
10303     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10304                                 EVEX_V512;
10305   }
10306   let Predicates = [Pred, HasVLX] in {
10307     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10308                                 EVEX_V128;
10309     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10310                                 EVEX_V256;
10311   }
10312 }
10313
10314 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10315                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10316                   SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10317   let Predicates = [prd] in {
10318      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10319               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10320   }
10321 }
10322
10323 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10324                     bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10325                     SDNode MaskOpNode, SDNode OpNodeSAE,
10326                     X86SchedWriteWidths sched, Predicate prd>{
10327   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10328                             opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10329                             EVEX_CD8<32, CD8VF>;
10330   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10331                             opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10332                             EVEX_CD8<64, CD8VF>, VEX_W;
10333 }
10334
10335 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10336                               X86VReduce, X86VReduce, X86VReduceSAE,
10337                               SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX;
10338 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10339                               X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10340                               SchedWriteFRnd, HasAVX512>,
10341                               AVX512AIi8Base, EVEX;
10342 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10343                               X86VGetMant, X86VGetMant, X86VGetMantSAE,
10344                               SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX;
10345
10346 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10347                                                 0x50, X86VRange, X86VRangeSAE,
10348                                                 SchedWriteFAdd, HasDQI>,
10349       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10350 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10351                                                 0x50, X86VRange, X86VRangeSAE,
10352                                                 SchedWriteFAdd, HasDQI>,
10353       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10354
10355 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10356       f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10357       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10358 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10359       0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10360       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10361
10362 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10363       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10364       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10365 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10366       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10367       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10368
10369 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10370       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10371       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10372 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10373       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10374       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10375
10376 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10377                                           X86FoldableSchedWrite sched,
10378                                           X86VectorVTInfo _,
10379                                           X86VectorVTInfo CastInfo,
10380                                           string EVEX2VEXOvrd> {
10381   let ExeDomain = _.ExeDomain in {
10382   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10383                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10384                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10385                   (_.VT (bitconvert
10386                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10387                                                   (i8 timm:$src3)))))>,
10388                   Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10389   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10390                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10391                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10392                 (_.VT
10393                  (bitconvert
10394                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
10395                                            (CastInfo.LdFrag addr:$src2),
10396                                            (i8 timm:$src3)))))>,
10397                 Sched<[sched.Folded, sched.ReadAfterFold]>,
10398                 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10399   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10400                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10401                     OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10402                     "$src1, ${src2}"#_.BroadcastStr#", $src3",
10403                     (_.VT
10404                      (bitconvert
10405                       (CastInfo.VT
10406                        (X86Shuf128 _.RC:$src1,
10407                                    (_.BroadcastLdFrag addr:$src2),
10408                                    (i8 timm:$src3)))))>, EVEX_B,
10409                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10410   }
10411 }
10412
10413 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10414                                    AVX512VLVectorVTInfo _,
10415                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10416                                    string EVEX2VEXOvrd>{
10417   let Predicates = [HasAVX512] in
10418   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10419                                           _.info512, CastInfo.info512, "">, EVEX_V512;
10420
10421   let Predicates = [HasAVX512, HasVLX] in
10422   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10423                                              _.info256, CastInfo.info256,
10424                                              EVEX2VEXOvrd>, EVEX_V256;
10425 }
10426
10427 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10428       avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10429 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10430       avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10431 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10432       avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10433 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10434       avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10435
10436 let Predicates = [HasAVX512] in {
10437 // Provide fallback in case the load node that is used in the broadcast
10438 // patterns above is used by additional users, which prevents the pattern
10439 // selection.
10440 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10441           (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10442                           (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10443                           0)>;
10444 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10445           (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10446                           (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10447                           0)>;
10448
10449 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10450           (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10451                           (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10452                           0)>;
10453 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10454           (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10455                           (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10456                           0)>;
10457
10458 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10459           (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10460                           (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10461                           0)>;
10462
10463 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10464           (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10465                           (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10466                           0)>;
10467 }
10468
10469 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10470                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10471   // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10472   // instantiation of this class.
10473   let ExeDomain = _.ExeDomain in {
10474   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10475                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10476                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10477                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10478                   Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10479   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10480                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10481                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10482                 (_.VT (X86VAlign _.RC:$src1,
10483                                  (bitconvert (_.LdFrag addr:$src2)),
10484                                  (i8 timm:$src3)))>,
10485                 Sched<[sched.Folded, sched.ReadAfterFold]>,
10486                 EVEX2VEXOverride<"VPALIGNRrmi">;
10487
10488   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10489                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10490                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10491                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10492                    (X86VAlign _.RC:$src1,
10493                               (_.VT (_.BroadcastLdFrag addr:$src2)),
10494                               (i8 timm:$src3))>, EVEX_B,
10495                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10496   }
10497 }
10498
10499 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10500                                 AVX512VLVectorVTInfo _> {
10501   let Predicates = [HasAVX512] in {
10502     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10503                                 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10504   }
10505   let Predicates = [HasAVX512, HasVLX] in {
10506     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10507                                 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10508     // We can't really override the 256-bit version so change it back to unset.
10509     let EVEX2VEXOverride = ? in
10510     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10511                                 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10512   }
10513 }
10514
10515 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10516                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10517 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10518                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10519                                    VEX_W;
10520
10521 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10522                                          SchedWriteShuffle, avx512vl_i8_info,
10523                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10524
10525 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10526 // into vpalignr.
10527 def ValignqImm32XForm : SDNodeXForm<timm, [{
10528   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10529 }]>;
10530 def ValignqImm8XForm : SDNodeXForm<timm, [{
10531   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10532 }]>;
10533 def ValigndImm8XForm : SDNodeXForm<timm, [{
10534   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10535 }]>;
10536
10537 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10538                                         X86VectorVTInfo From, X86VectorVTInfo To,
10539                                         SDNodeXForm ImmXForm> {
10540   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10541                                  (bitconvert
10542                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10543                                                    timm:$src3))),
10544                                  To.RC:$src0)),
10545             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10546                                                   To.RC:$src1, To.RC:$src2,
10547                                                   (ImmXForm timm:$src3))>;
10548
10549   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10550                                  (bitconvert
10551                                   (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10552                                                    timm:$src3))),
10553                                  To.ImmAllZerosV)),
10554             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10555                                                    To.RC:$src1, To.RC:$src2,
10556                                                    (ImmXForm timm:$src3))>;
10557
10558   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10559                                  (bitconvert
10560                                   (From.VT (OpNode From.RC:$src1,
10561                                                    (From.LdFrag addr:$src2),
10562                                            timm:$src3))),
10563                                  To.RC:$src0)),
10564             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10565                                                   To.RC:$src1, addr:$src2,
10566                                                   (ImmXForm timm:$src3))>;
10567
10568   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10569                                  (bitconvert
10570                                   (From.VT (OpNode From.RC:$src1,
10571                                                    (From.LdFrag addr:$src2),
10572                                            timm:$src3))),
10573                                  To.ImmAllZerosV)),
10574             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10575                                                    To.RC:$src1, addr:$src2,
10576                                                    (ImmXForm timm:$src3))>;
10577 }
10578
10579 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10580                                            X86VectorVTInfo From,
10581                                            X86VectorVTInfo To,
10582                                            SDNodeXForm ImmXForm> :
10583       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10584   def : Pat<(From.VT (OpNode From.RC:$src1,
10585                              (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10586                              timm:$src3)),
10587             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10588                                                   (ImmXForm timm:$src3))>;
10589
10590   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10591                                  (bitconvert
10592                                   (From.VT (OpNode From.RC:$src1,
10593                                            (bitconvert
10594                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
10595                                            timm:$src3))),
10596                                  To.RC:$src0)),
10597             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10598                                                    To.RC:$src1, addr:$src2,
10599                                                    (ImmXForm timm:$src3))>;
10600
10601   def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10602                                  (bitconvert
10603                                   (From.VT (OpNode From.RC:$src1,
10604                                            (bitconvert
10605                                             (To.VT (To.BroadcastLdFrag addr:$src2))),
10606                                            timm:$src3))),
10607                                  To.ImmAllZerosV)),
10608             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10609                                                     To.RC:$src1, addr:$src2,
10610                                                     (ImmXForm timm:$src3))>;
10611 }
10612
10613 let Predicates = [HasAVX512] in {
10614   // For 512-bit we lower to the widest element type we can. So we only need
10615   // to handle converting valignq to valignd.
10616   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10617                                          v16i32_info, ValignqImm32XForm>;
10618 }
10619
10620 let Predicates = [HasVLX] in {
10621   // For 128-bit we lower to the widest element type we can. So we only need
10622   // to handle converting valignq to valignd.
10623   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10624                                          v4i32x_info, ValignqImm32XForm>;
10625   // For 256-bit we lower to the widest element type we can. So we only need
10626   // to handle converting valignq to valignd.
10627   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10628                                          v8i32x_info, ValignqImm32XForm>;
10629 }
10630
10631 let Predicates = [HasVLX, HasBWI] in {
10632   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10633   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10634                                       v16i8x_info, ValignqImm8XForm>;
10635   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10636                                       v16i8x_info, ValigndImm8XForm>;
10637 }
10638
10639 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10640                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10641                 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10642
10643 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10644                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10645   let ExeDomain = _.ExeDomain in {
10646   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10647                     (ins _.RC:$src1), OpcodeStr,
10648                     "$src1", "$src1",
10649                     (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10650                     Sched<[sched]>;
10651
10652   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10653                   (ins _.MemOp:$src1), OpcodeStr,
10654                   "$src1", "$src1",
10655                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10656             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10657             Sched<[sched.Folded]>;
10658   }
10659 }
10660
10661 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10662                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10663            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10664   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10665                   (ins _.ScalarMemOp:$src1), OpcodeStr,
10666                   "${src1}"#_.BroadcastStr,
10667                   "${src1}"#_.BroadcastStr,
10668                   (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10669              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10670              Sched<[sched.Folded]>;
10671 }
10672
10673 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10674                               X86SchedWriteWidths sched,
10675                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10676   let Predicates = [prd] in
10677     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10678                              EVEX_V512;
10679
10680   let Predicates = [prd, HasVLX] in {
10681     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10682                               EVEX_V256;
10683     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10684                               EVEX_V128;
10685   }
10686 }
10687
10688 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10689                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10690                                Predicate prd> {
10691   let Predicates = [prd] in
10692     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10693                               EVEX_V512;
10694
10695   let Predicates = [prd, HasVLX] in {
10696     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10697                                  EVEX_V256;
10698     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10699                                  EVEX_V128;
10700   }
10701 }
10702
10703 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10704                                  SDNode OpNode, X86SchedWriteWidths sched,
10705                                  Predicate prd> {
10706   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10707                                avx512vl_i64_info, prd>, VEX_W;
10708   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10709                                avx512vl_i32_info, prd>;
10710 }
10711
10712 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10713                                  SDNode OpNode, X86SchedWriteWidths sched,
10714                                  Predicate prd> {
10715   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10716                               avx512vl_i16_info, prd>, VEX_WIG;
10717   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10718                               avx512vl_i8_info, prd>, VEX_WIG;
10719 }
10720
10721 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10722                                   bits<8> opc_d, bits<8> opc_q,
10723                                   string OpcodeStr, SDNode OpNode,
10724                                   X86SchedWriteWidths sched> {
10725   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10726                                     HasAVX512>,
10727               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10728                                     HasBWI>;
10729 }
10730
10731 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10732                                     SchedWriteVecALU>;
10733
10734 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10735 let Predicates = [HasAVX512, NoVLX] in {
10736   def : Pat<(v4i64 (abs VR256X:$src)),
10737             (EXTRACT_SUBREG
10738                 (VPABSQZrr
10739                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10740              sub_ymm)>;
10741   def : Pat<(v2i64 (abs VR128X:$src)),
10742             (EXTRACT_SUBREG
10743                 (VPABSQZrr
10744                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10745              sub_xmm)>;
10746 }
10747
10748 // Use 512bit version to implement 128/256 bit.
10749 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10750                                  AVX512VLVectorVTInfo _, Predicate prd> {
10751   let Predicates = [prd, NoVLX] in {
10752     def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10753               (EXTRACT_SUBREG
10754                 (!cast<Instruction>(InstrStr # "Zrr")
10755                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10756                                  _.info256.RC:$src1,
10757                                  _.info256.SubRegIdx)),
10758               _.info256.SubRegIdx)>;
10759
10760     def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10761               (EXTRACT_SUBREG
10762                 (!cast<Instruction>(InstrStr # "Zrr")
10763                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10764                                  _.info128.RC:$src1,
10765                                  _.info128.SubRegIdx)),
10766               _.info128.SubRegIdx)>;
10767   }
10768 }
10769
10770 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10771                                         SchedWriteVecIMul, HasCDI>;
10772
10773 // FIXME: Is there a better scheduler class for VPCONFLICT?
10774 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10775                                         SchedWriteVecALU, HasCDI>;
10776
10777 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10778 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10779 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10780
10781 //===---------------------------------------------------------------------===//
10782 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10783 //===---------------------------------------------------------------------===//
10784
10785 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10786 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10787                                      SchedWriteVecALU, HasVPOPCNTDQ>;
10788
10789 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10790 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10791
10792 //===---------------------------------------------------------------------===//
10793 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10794 //===---------------------------------------------------------------------===//
10795
10796 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10797                             X86SchedWriteWidths sched> {
10798   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10799                                       avx512vl_f32_info, HasAVX512>, XS;
10800 }
10801
10802 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10803                                   SchedWriteFShuffle>;
10804 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10805                                   SchedWriteFShuffle>;
10806
10807 //===----------------------------------------------------------------------===//
10808 // AVX-512 - MOVDDUP
10809 //===----------------------------------------------------------------------===//
10810
10811 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10812                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10813   let ExeDomain = _.ExeDomain in {
10814   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10815                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
10816                    (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10817                    Sched<[sched]>;
10818   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10819                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10820                  (_.VT (_.BroadcastLdFrag addr:$src))>,
10821                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10822                  Sched<[sched.Folded]>;
10823   }
10824 }
10825
10826 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10827                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10828   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10829                            VTInfo.info512>, EVEX_V512;
10830
10831   let Predicates = [HasAVX512, HasVLX] in {
10832     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10833                                 VTInfo.info256>, EVEX_V256;
10834     defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10835                                    VTInfo.info128>, EVEX_V128;
10836   }
10837 }
10838
10839 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10840                           X86SchedWriteWidths sched> {
10841   defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10842                                         avx512vl_f64_info>, XD, VEX_W;
10843 }
10844
10845 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10846
10847 let Predicates = [HasVLX] in {
10848 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10849           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10850
10851 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10852                         (v2f64 VR128X:$src0)),
10853           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10854                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10855 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10856                         immAllZerosV),
10857           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10858 }
10859
10860 //===----------------------------------------------------------------------===//
10861 // AVX-512 - Unpack Instructions
10862 //===----------------------------------------------------------------------===//
10863
10864 let Uses = []<Register>, mayRaiseFPException = 0 in {
10865 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
10866                                  SchedWriteFShuffleSizes, 0, 1>;
10867 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
10868                                  SchedWriteFShuffleSizes>;
10869 }
10870
10871 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10872                                        SchedWriteShuffle, HasBWI>;
10873 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10874                                        SchedWriteShuffle, HasBWI>;
10875 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10876                                        SchedWriteShuffle, HasBWI>;
10877 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10878                                        SchedWriteShuffle, HasBWI>;
10879
10880 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10881                                        SchedWriteShuffle, HasAVX512>;
10882 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10883                                        SchedWriteShuffle, HasAVX512>;
10884 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10885                                         SchedWriteShuffle, HasAVX512>;
10886 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10887                                         SchedWriteShuffle, HasAVX512>;
10888
10889 //===----------------------------------------------------------------------===//
10890 // AVX-512 - Extract & Insert Integer Instructions
10891 //===----------------------------------------------------------------------===//
10892
10893 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10894                                                             X86VectorVTInfo _> {
10895   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10896               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10897               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10898               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10899                        addr:$dst)]>,
10900               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10901 }
10902
10903 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10904   let Predicates = [HasBWI] in {
10905     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10906                   (ins _.RC:$src1, u8imm:$src2),
10907                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10908                   [(set GR32orGR64:$dst,
10909                         (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10910                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10911
10912     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10913   }
10914 }
10915
10916 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10917   let Predicates = [HasBWI] in {
10918     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10919                   (ins _.RC:$src1, u8imm:$src2),
10920                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10921                   [(set GR32orGR64:$dst,
10922                         (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10923                   EVEX, PD, Sched<[WriteVecExtract]>;
10924
10925     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10926     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10927                    (ins _.RC:$src1, u8imm:$src2),
10928                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10929                    EVEX, TAPD, FoldGenData<NAME#rr>,
10930                    Sched<[WriteVecExtract]>;
10931
10932     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10933   }
10934 }
10935
10936 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10937                                                             RegisterClass GRC> {
10938   let Predicates = [HasDQI] in {
10939     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10940                   (ins _.RC:$src1, u8imm:$src2),
10941                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10942                   [(set GRC:$dst,
10943                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10944                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10945
10946     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10947                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10948                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10949                 [(store (extractelt (_.VT _.RC:$src1),
10950                                     imm:$src2),addr:$dst)]>,
10951                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10952                 Sched<[WriteVecExtractSt]>;
10953   }
10954 }
10955
10956 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10957 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10958 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10959 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10960
10961 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10962                                             X86VectorVTInfo _, PatFrag LdFrag> {
10963   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10964       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10965       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10966       [(set _.RC:$dst,
10967           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10968       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10969 }
10970
10971 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10972                                             X86VectorVTInfo _, PatFrag LdFrag> {
10973   let Predicates = [HasBWI] in {
10974     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10975         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10976         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10977         [(set _.RC:$dst,
10978             (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10979         Sched<[WriteVecInsert]>;
10980
10981     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10982   }
10983 }
10984
10985 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10986                                          X86VectorVTInfo _, RegisterClass GRC> {
10987   let Predicates = [HasDQI] in {
10988     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10989         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10990         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10991         [(set _.RC:$dst,
10992             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10993         EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10994
10995     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10996                                     _.ScalarLdFrag>, TAPD;
10997   }
10998 }
10999
11000 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11001                                      extloadi8>, TAPD, VEX_WIG;
11002 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11003                                      extloadi16>, PD, VEX_WIG;
11004 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11005 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11006
11007 //===----------------------------------------------------------------------===//
11008 // VSHUFPS - VSHUFPD Operations
11009 //===----------------------------------------------------------------------===//
11010
11011 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11012                         AVX512VLVectorVTInfo VTInfo_FP>{
11013   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11014                                     SchedWriteFShuffle>,
11015                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11016                                     AVX512AIi8Base, EVEX_4V;
11017 }
11018
11019 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11020 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11021
11022 //===----------------------------------------------------------------------===//
11023 // AVX-512 - Byte shift Left/Right
11024 //===----------------------------------------------------------------------===//
11025
11026 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11027                                Format MRMm, string OpcodeStr,
11028                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11029   def ri : AVX512<opc, MRMr,
11030              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11031              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11032              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11033              Sched<[sched]>;
11034   def mi : AVX512<opc, MRMm,
11035            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11036            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11037            [(set _.RC:$dst,(_.VT (OpNode
11038                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
11039                                  (i8 timm:$src2))))]>,
11040            Sched<[sched.Folded, sched.ReadAfterFold]>;
11041 }
11042
11043 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11044                                    Format MRMm, string OpcodeStr,
11045                                    X86SchedWriteWidths sched, Predicate prd>{
11046   let Predicates = [prd] in
11047     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11048                                  sched.ZMM, v64i8_info>, EVEX_V512;
11049   let Predicates = [prd, HasVLX] in {
11050     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11051                                     sched.YMM, v32i8x_info>, EVEX_V256;
11052     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11053                                     sched.XMM, v16i8x_info>, EVEX_V128;
11054   }
11055 }
11056 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11057                                        SchedWriteShuffle, HasBWI>,
11058                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11059 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11060                                        SchedWriteShuffle, HasBWI>,
11061                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11062
11063 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11064                                 string OpcodeStr, X86FoldableSchedWrite sched,
11065                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11066   let isCommutable = 1 in
11067   def rr : AVX512BI<opc, MRMSrcReg,
11068              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11069              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11070              [(set _dst.RC:$dst,(_dst.VT
11071                                 (OpNode (_src.VT _src.RC:$src1),
11072                                         (_src.VT _src.RC:$src2))))]>,
11073              Sched<[sched]>;
11074   def rm : AVX512BI<opc, MRMSrcMem,
11075            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11076            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11077            [(set _dst.RC:$dst,(_dst.VT
11078                               (OpNode (_src.VT _src.RC:$src1),
11079                               (_src.VT (bitconvert
11080                                         (_src.LdFrag addr:$src2))))))]>,
11081            Sched<[sched.Folded, sched.ReadAfterFold]>;
11082 }
11083
11084 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11085                                     string OpcodeStr, X86SchedWriteWidths sched,
11086                                     Predicate prd> {
11087   let Predicates = [prd] in
11088     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11089                                   v8i64_info, v64i8_info>, EVEX_V512;
11090   let Predicates = [prd, HasVLX] in {
11091     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11092                                      v4i64x_info, v32i8x_info>, EVEX_V256;
11093     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11094                                      v2i64x_info, v16i8x_info>, EVEX_V128;
11095   }
11096 }
11097
11098 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11099                                         SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11100
11101 // Transforms to swizzle an immediate to enable better matching when
11102 // memory operand isn't in the right place.
11103 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11104   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11105   uint8_t Imm = N->getZExtValue();
11106   // Swap bits 1/4 and 3/6.
11107   uint8_t NewImm = Imm & 0xa5;
11108   if (Imm & 0x02) NewImm |= 0x10;
11109   if (Imm & 0x10) NewImm |= 0x02;
11110   if (Imm & 0x08) NewImm |= 0x40;
11111   if (Imm & 0x40) NewImm |= 0x08;
11112   return getI8Imm(NewImm, SDLoc(N));
11113 }]>;
11114 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11115   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11116   uint8_t Imm = N->getZExtValue();
11117   // Swap bits 2/4 and 3/5.
11118   uint8_t NewImm = Imm & 0xc3;
11119   if (Imm & 0x04) NewImm |= 0x10;
11120   if (Imm & 0x10) NewImm |= 0x04;
11121   if (Imm & 0x08) NewImm |= 0x20;
11122   if (Imm & 0x20) NewImm |= 0x08;
11123   return getI8Imm(NewImm, SDLoc(N));
11124 }]>;
11125 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11126   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11127   uint8_t Imm = N->getZExtValue();
11128   // Swap bits 1/2 and 5/6.
11129   uint8_t NewImm = Imm & 0x99;
11130   if (Imm & 0x02) NewImm |= 0x04;
11131   if (Imm & 0x04) NewImm |= 0x02;
11132   if (Imm & 0x20) NewImm |= 0x40;
11133   if (Imm & 0x40) NewImm |= 0x20;
11134   return getI8Imm(NewImm, SDLoc(N));
11135 }]>;
11136 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11137   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11138   uint8_t Imm = N->getZExtValue();
11139   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11140   uint8_t NewImm = Imm & 0x81;
11141   if (Imm & 0x02) NewImm |= 0x04;
11142   if (Imm & 0x04) NewImm |= 0x10;
11143   if (Imm & 0x08) NewImm |= 0x40;
11144   if (Imm & 0x10) NewImm |= 0x02;
11145   if (Imm & 0x20) NewImm |= 0x08;
11146   if (Imm & 0x40) NewImm |= 0x20;
11147   return getI8Imm(NewImm, SDLoc(N));
11148 }]>;
11149 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11150   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11151   uint8_t Imm = N->getZExtValue();
11152   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11153   uint8_t NewImm = Imm & 0x81;
11154   if (Imm & 0x02) NewImm |= 0x10;
11155   if (Imm & 0x04) NewImm |= 0x02;
11156   if (Imm & 0x08) NewImm |= 0x20;
11157   if (Imm & 0x10) NewImm |= 0x04;
11158   if (Imm & 0x20) NewImm |= 0x40;
11159   if (Imm & 0x40) NewImm |= 0x08;
11160   return getI8Imm(NewImm, SDLoc(N));
11161 }]>;
11162
11163 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11164                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
11165                           string Name>{
11166   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11167   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11168                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11169                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11170                       (OpNode (_.VT _.RC:$src1),
11171                               (_.VT _.RC:$src2),
11172                               (_.VT _.RC:$src3),
11173                               (i8 timm:$src4)), 1, 1>,
11174                       AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11175   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11176                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11177                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11178                     (OpNode (_.VT _.RC:$src1),
11179                             (_.VT _.RC:$src2),
11180                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
11181                             (i8 timm:$src4)), 1, 0>,
11182                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11183                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11184   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11185                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11186                     OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11187                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
11188                     (OpNode (_.VT _.RC:$src1),
11189                             (_.VT _.RC:$src2),
11190                             (_.VT (_.BroadcastLdFrag addr:$src3)),
11191                             (i8 timm:$src4)), 1, 0>, EVEX_B,
11192                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11193                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11194   }// Constraints = "$src1 = $dst"
11195
11196   // Additional patterns for matching passthru operand in other positions.
11197   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11198                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11199                    _.RC:$src1)),
11200             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11201              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11202   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11203                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11204                    _.RC:$src1)),
11205             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11206              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11207
11208   // Additional patterns for matching loads in other positions.
11209   def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11210                           _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11211             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11212                                    addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11213   def : Pat<(_.VT (OpNode _.RC:$src1,
11214                           (bitconvert (_.LdFrag addr:$src3)),
11215                           _.RC:$src2, (i8 timm:$src4))),
11216             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11217                                    addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11218
11219   // Additional patterns for matching zero masking with loads in other
11220   // positions.
11221   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11222                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11223                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11224                    _.ImmAllZerosV)),
11225             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11226              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11227   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11228                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11229                     _.RC:$src2, (i8 timm:$src4)),
11230                    _.ImmAllZerosV)),
11231             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11232              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11233
11234   // Additional patterns for matching masked loads with different
11235   // operand orders.
11236   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11237                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11238                     _.RC:$src2, (i8 timm:$src4)),
11239                    _.RC:$src1)),
11240             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11241              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11242   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11243                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11244                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11245                    _.RC:$src1)),
11246             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11247              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11248   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11249                    (OpNode _.RC:$src2, _.RC:$src1,
11250                     (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11251                    _.RC:$src1)),
11252             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11253              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11254   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11255                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11256                     _.RC:$src1, (i8 timm:$src4)),
11257                    _.RC:$src1)),
11258             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11259              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11260   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11261                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11262                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11263                    _.RC:$src1)),
11264             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11265              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11266
11267   // Additional patterns for matching broadcasts in other positions.
11268   def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
11269                           _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11270             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11271                                    addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11272   def : Pat<(_.VT (OpNode _.RC:$src1,
11273                           (_.BroadcastLdFrag addr:$src3),
11274                           _.RC:$src2, (i8 timm:$src4))),
11275             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11276                                    addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11277
11278   // Additional patterns for matching zero masking with broadcasts in other
11279   // positions.
11280   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11281                    (OpNode (_.BroadcastLdFrag addr:$src3),
11282                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11283                    _.ImmAllZerosV)),
11284             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11285              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11286              (VPTERNLOG321_imm8 timm:$src4))>;
11287   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11288                    (OpNode _.RC:$src1,
11289                     (_.BroadcastLdFrag addr:$src3),
11290                     _.RC:$src2, (i8 timm:$src4)),
11291                    _.ImmAllZerosV)),
11292             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11293              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11294              (VPTERNLOG132_imm8 timm:$src4))>;
11295
11296   // Additional patterns for matching masked broadcasts with different
11297   // operand orders.
11298   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11299                    (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11300                     _.RC:$src2, (i8 timm:$src4)),
11301                    _.RC:$src1)),
11302             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11303              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11304   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11305                    (OpNode (_.BroadcastLdFrag addr:$src3),
11306                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11307                    _.RC:$src1)),
11308             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11309              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11310   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11311                    (OpNode _.RC:$src2, _.RC:$src1,
11312                     (_.BroadcastLdFrag addr:$src3),
11313                     (i8 timm:$src4)), _.RC:$src1)),
11314             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11315              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11316   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11317                    (OpNode _.RC:$src2,
11318                     (_.BroadcastLdFrag addr:$src3),
11319                     _.RC:$src1, (i8 timm:$src4)),
11320                    _.RC:$src1)),
11321             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11322              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11323   def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11324                    (OpNode (_.BroadcastLdFrag addr:$src3),
11325                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11326                    _.RC:$src1)),
11327             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11328              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11329 }
11330
11331 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11332                                  AVX512VLVectorVTInfo _> {
11333   let Predicates = [HasAVX512] in
11334     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11335                                _.info512, NAME>, EVEX_V512;
11336   let Predicates = [HasAVX512, HasVLX] in {
11337     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11338                                _.info128, NAME>, EVEX_V128;
11339     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11340                                _.info256, NAME>, EVEX_V256;
11341   }
11342 }
11343
11344 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11345                                         avx512vl_i32_info>;
11346 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11347                                         avx512vl_i64_info>, VEX_W;
11348
11349 // Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11350 let Predicates = [HasVLX] in {
11351   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11352                                  (i8 timm:$src4))),
11353             (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11354                                timm:$src4)>;
11355   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11356                                  (loadv16i8 addr:$src3), (i8 timm:$src4))),
11357             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11358                                timm:$src4)>;
11359   def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11360                                  VR128X:$src1, (i8 timm:$src4))),
11361             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11362                                (VPTERNLOG321_imm8 timm:$src4))>;
11363   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11364                                  VR128X:$src2, (i8 timm:$src4))),
11365             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11366                                (VPTERNLOG132_imm8 timm:$src4))>;
11367
11368   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11369                                  (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11370                                  (i8 timm:$src4))),
11371             (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11372                                 timm:$src4)>;
11373   def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11374                                  VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11375             (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11376                                 (VPTERNLOG321_imm8 timm:$src4))>;
11377   def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
11378                                  (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11379                                  VR128X:$src2, (i8 timm:$src4))),
11380             (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11381                                (VPTERNLOG132_imm8 timm:$src4))>;
11382
11383   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11384                                  (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11385                                  (i8 timm:$src4))),
11386             (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11387                                 timm:$src4)>;
11388   def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11389                                  VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11390             (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11391                                 (VPTERNLOG321_imm8 timm:$src4))>;
11392   def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
11393                                  (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11394                                  VR128X:$src2, (i8 timm:$src4))),
11395             (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11396                                 (VPTERNLOG132_imm8 timm:$src4))>;
11397
11398   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11399                                  (i8 timm:$src4))),
11400             (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11401                                timm:$src4)>;
11402   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11403                                  (loadv8i16 addr:$src3), (i8 timm:$src4))),
11404             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11405                                timm:$src4)>;
11406   def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11407                                  VR128X:$src1, (i8 timm:$src4))),
11408             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11409                                (VPTERNLOG321_imm8 timm:$src4))>;
11410   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11411                                  VR128X:$src2, (i8 timm:$src4))),
11412             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11413                                (VPTERNLOG132_imm8 timm:$src4))>;
11414
11415   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11416                                  (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11417                                  (i8 timm:$src4))),
11418             (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11419                                 timm:$src4)>;
11420   def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11421                                  VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11422             (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11423                                 (VPTERNLOG321_imm8 timm:$src4))>;
11424   def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
11425                                  (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11426                                  VR128X:$src2, (i8 timm:$src4))),
11427             (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11428                                (VPTERNLOG132_imm8 timm:$src4))>;
11429
11430   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11431                                  (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11432                                  (i8 timm:$src4))),
11433             (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11434                                 timm:$src4)>;
11435   def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11436                                  VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11437             (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11438                                 (VPTERNLOG321_imm8 timm:$src4))>;
11439   def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
11440                                  (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11441                                  VR128X:$src2, (i8 timm:$src4))),
11442             (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11443                                 (VPTERNLOG132_imm8 timm:$src4))>;
11444
11445   def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
11446                                  (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11447                                  (i8 timm:$src4))),
11448             (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11449                                 timm:$src4)>;
11450   def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11451                                  VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11452             (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11453                                 (VPTERNLOG321_imm8 timm:$src4))>;
11454   def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
11455                                  (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11456                                  VR128X:$src2, (i8 timm:$src4))),
11457             (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11458                                 (VPTERNLOG132_imm8 timm:$src4))>;
11459
11460   def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
11461                                  (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11462                                  (i8 timm:$src4))),
11463             (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11464                                 timm:$src4)>;
11465   def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11466                                  VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11467             (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11468                                 (VPTERNLOG321_imm8 timm:$src4))>;
11469   def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
11470                                  (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11471                                  VR128X:$src2, (i8 timm:$src4))),
11472             (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11473                                (VPTERNLOG132_imm8 timm:$src4))>;
11474
11475   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11476                                  (i8 timm:$src4))),
11477             (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11478                                timm:$src4)>;
11479   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11480                                  (loadv32i8 addr:$src3), (i8 timm:$src4))),
11481             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11482                                timm:$src4)>;
11483   def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11484                                  VR256X:$src1, (i8 timm:$src4))),
11485             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11486                                (VPTERNLOG321_imm8 timm:$src4))>;
11487   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11488                                  VR256X:$src2, (i8 timm:$src4))),
11489             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11490                                (VPTERNLOG132_imm8 timm:$src4))>;
11491
11492   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11493                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11494                                  (i8 timm:$src4))),
11495             (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11496                                 timm:$src4)>;
11497   def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11498                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11499             (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11500                                 (VPTERNLOG321_imm8 timm:$src4))>;
11501   def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
11502                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11503                                  VR256X:$src2, (i8 timm:$src4))),
11504             (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11505                                (VPTERNLOG132_imm8 timm:$src4))>;
11506
11507   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11508                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11509                                  (i8 timm:$src4))),
11510             (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11511                                 timm:$src4)>;
11512   def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11513                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11514             (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11515                                 (VPTERNLOG321_imm8 timm:$src4))>;
11516   def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
11517                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11518                                  VR256X:$src2, (i8 timm:$src4))),
11519             (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11520                                 (VPTERNLOG132_imm8 timm:$src4))>;
11521
11522   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11523                                   (i8 timm:$src4))),
11524             (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11525                                timm:$src4)>;
11526   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11527                                   (loadv16i16 addr:$src3), (i8 timm:$src4))),
11528             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11529                                timm:$src4)>;
11530   def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11531                                   VR256X:$src1, (i8 timm:$src4))),
11532             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11533                                (VPTERNLOG321_imm8 timm:$src4))>;
11534   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11535                                   VR256X:$src2, (i8 timm:$src4))),
11536             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11537                                (VPTERNLOG132_imm8 timm:$src4))>;
11538
11539   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11540                                   (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11541                                   (i8 timm:$src4))),
11542             (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11543                                 timm:$src4)>;
11544   def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11545                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11546             (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11547                                 (VPTERNLOG321_imm8 timm:$src4))>;
11548   def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
11549                                   (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11550                                   VR256X:$src2, (i8 timm:$src4))),
11551             (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11552                                (VPTERNLOG132_imm8 timm:$src4))>;
11553
11554   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11555                                   (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11556                                   (i8 timm:$src4))),
11557             (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11558                                 timm:$src4)>;
11559   def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11560                                   VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11561             (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11562                                 (VPTERNLOG321_imm8 timm:$src4))>;
11563   def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
11564                                   (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11565                                   VR256X:$src2, (i8 timm:$src4))),
11566             (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11567                                 (VPTERNLOG132_imm8 timm:$src4))>;
11568
11569   def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
11570                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11571                                  (i8 timm:$src4))),
11572             (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11573                                 timm:$src4)>;
11574   def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11575                                   VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11576             (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11577                                 (VPTERNLOG321_imm8 timm:$src4))>;
11578   def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
11579                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11580                                  VR256X:$src2, (i8 timm:$src4))),
11581             (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11582                                 (VPTERNLOG132_imm8 timm:$src4))>;
11583
11584   def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
11585                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11586                                  (i8 timm:$src4))),
11587             (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11588                                 timm:$src4)>;
11589   def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11590                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11591             (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11592                                 (VPTERNLOG321_imm8 timm:$src4))>;
11593   def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
11594                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11595                                  VR256X:$src2, (i8 timm:$src4))),
11596             (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11597                                (VPTERNLOG132_imm8 timm:$src4))>;
11598 }
11599
11600 let Predicates = [HasAVX512] in {
11601   def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11602                                  (i8 timm:$src4))),
11603             (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11604                             timm:$src4)>;
11605   def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11606                                  (loadv64i8 addr:$src3), (i8 timm:$src4))),
11607             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11608                             timm:$src4)>;
11609   def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11610                                   VR512:$src1, (i8 timm:$src4))),
11611             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11612                             (VPTERNLOG321_imm8 timm:$src4))>;
11613   def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11614                                  VR512:$src2, (i8 timm:$src4))),
11615             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11616                             (VPTERNLOG132_imm8 timm:$src4))>;
11617
11618   def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11619                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11620                                  (i8 timm:$src4))),
11621             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11622                              timm:$src4)>;
11623   def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11624                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11625             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11626                              (VPTERNLOG321_imm8 timm:$src4))>;
11627   def : Pat<(v64i8 (X86vpternlog VR512:$src1,
11628                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11629                                  VR512:$src2, (i8 timm:$src4))),
11630             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11631                              (VPTERNLOG132_imm8 timm:$src4))>;
11632
11633   def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11634                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11635                                  (i8 timm:$src4))),
11636             (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11637                                 timm:$src4)>;
11638   def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11639                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11640             (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11641                                 (VPTERNLOG321_imm8 timm:$src4))>;
11642   def : Pat<(v64i8 (X86vpternlog VR512:$src1,
11643                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11644                                  VR512:$src2, (i8 timm:$src4))),
11645             (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11646                                 (VPTERNLOG132_imm8 timm:$src4))>;
11647
11648   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11649                                   (i8 timm:$src4))),
11650             (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11651                             timm:$src4)>;
11652   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11653                                   (loadv32i16 addr:$src3), (i8 timm:$src4))),
11654             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11655                             timm:$src4)>;
11656   def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11657                                   VR512:$src1, (i8 timm:$src4))),
11658             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11659                             (VPTERNLOG321_imm8 timm:$src4))>;
11660   def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11661                                   VR512:$src2, (i8 timm:$src4))),
11662             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11663                             (VPTERNLOG132_imm8 timm:$src4))>;
11664
11665   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11666                                   (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11667                                   (i8 timm:$src4))),
11668             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11669                              timm:$src4)>;
11670   def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11671                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11672             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11673                                 (VPTERNLOG321_imm8 timm:$src4))>;
11674   def : Pat<(v32i16 (X86vpternlog VR512:$src1,
11675                                   (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11676                                   VR512:$src2, (i8 timm:$src4))),
11677             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11678                                (VPTERNLOG132_imm8 timm:$src4))>;
11679
11680   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11681                                   (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11682                                   (i8 timm:$src4))),
11683             (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11684                              timm:$src4)>;
11685   def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11686                                   VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11687             (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11688                              (VPTERNLOG321_imm8 timm:$src4))>;
11689   def : Pat<(v32i16 (X86vpternlog VR512:$src1,
11690                                   (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11691                                   VR512:$src2, (i8 timm:$src4))),
11692             (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11693                              (VPTERNLOG132_imm8 timm:$src4))>;
11694
11695   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11696                                   (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11697                                   (i8 timm:$src4))),
11698             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11699                              timm:$src4)>;
11700   def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11701                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11702             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11703                                 (VPTERNLOG321_imm8 timm:$src4))>;
11704   def : Pat<(v32i16 (X86vpternlog VR512:$src1,
11705                                   (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11706                                   VR512:$src2, (i8 timm:$src4))),
11707             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11708                                (VPTERNLOG132_imm8 timm:$src4))>;
11709
11710   def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
11711                                   (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11712                                   (i8 timm:$src4))),
11713             (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11714                              timm:$src4)>;
11715   def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11716                                   VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11717             (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11718                              (VPTERNLOG321_imm8 timm:$src4))>;
11719   def : Pat<(v16i32 (X86vpternlog VR512:$src1,
11720                                   (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11721                                   VR512:$src2, (i8 timm:$src4))),
11722             (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11723                              (VPTERNLOG132_imm8 timm:$src4))>;
11724
11725   def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
11726                                   (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11727                                   (i8 timm:$src4))),
11728             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11729                              timm:$src4)>;
11730   def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11731                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11732             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11733                                 (VPTERNLOG321_imm8 timm:$src4))>;
11734   def : Pat<(v8i64 (X86vpternlog VR512:$src1,
11735                                   (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11736                                   VR512:$src2, (i8 timm:$src4))),
11737             (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11738                                (VPTERNLOG132_imm8 timm:$src4))>;
11739 }
11740
11741 // Patterns to implement vnot using vpternlog instead of creating all ones
11742 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11743 // so that the result is only dependent on src0. But we use the same source
11744 // for all operands to prevent a false dependency.
11745 // TODO: We should maybe have a more generalized algorithm for folding to
11746 // vpternlog.
11747 let Predicates = [HasAVX512] in {
11748   def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11749             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11750   def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11751             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11752   def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11753             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11754   def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11755             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11756 }
11757
11758 let Predicates = [HasAVX512, NoVLX] in {
11759   def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11760             (EXTRACT_SUBREG
11761              (VPTERNLOGQZrri
11762               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11763               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11764               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11765               (i8 15)), sub_xmm)>;
11766   def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11767             (EXTRACT_SUBREG
11768              (VPTERNLOGQZrri
11769               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11770               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11771               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11772               (i8 15)), sub_xmm)>;
11773   def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11774             (EXTRACT_SUBREG
11775              (VPTERNLOGQZrri
11776               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11777               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11778               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11779               (i8 15)), sub_xmm)>;
11780   def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11781             (EXTRACT_SUBREG
11782              (VPTERNLOGQZrri
11783               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11784               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11785               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11786               (i8 15)), sub_xmm)>;
11787
11788   def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11789             (EXTRACT_SUBREG
11790              (VPTERNLOGQZrri
11791               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11792               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11793               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11794               (i8 15)), sub_ymm)>;
11795   def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11796             (EXTRACT_SUBREG
11797              (VPTERNLOGQZrri
11798               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11799               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11800               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11801               (i8 15)), sub_ymm)>;
11802   def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11803             (EXTRACT_SUBREG
11804              (VPTERNLOGQZrri
11805               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11806               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11807               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11808               (i8 15)), sub_ymm)>;
11809   def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11810             (EXTRACT_SUBREG
11811              (VPTERNLOGQZrri
11812               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11813               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11814               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11815               (i8 15)), sub_ymm)>;
11816 }
11817
11818 let Predicates = [HasVLX] in {
11819   def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11820             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11821   def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11822             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11823   def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11824             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11825   def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11826             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11827
11828   def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11829             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11830   def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11831             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11832   def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11833             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11834   def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11835             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11836 }
11837
11838 //===----------------------------------------------------------------------===//
11839 // AVX-512 - FixupImm
11840 //===----------------------------------------------------------------------===//
11841
11842 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11843                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11844                                   X86VectorVTInfo TblVT>{
11845   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11846       Uses = [MXCSR], mayRaiseFPException = 1 in {
11847     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11848                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11849                          OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11850                         (X86VFixupimm (_.VT _.RC:$src1),
11851                                       (_.VT _.RC:$src2),
11852                                       (TblVT.VT _.RC:$src3),
11853                                       (i32 timm:$src4))>, Sched<[sched]>;
11854     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11855                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11856                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11857                       (X86VFixupimm (_.VT _.RC:$src1),
11858                                     (_.VT _.RC:$src2),
11859                                     (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11860                                     (i32 timm:$src4))>,
11861                       Sched<[sched.Folded, sched.ReadAfterFold]>;
11862     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11863                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11864                     OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11865                     "$src2, ${src3}"#_.BroadcastStr#", $src4",
11866                       (X86VFixupimm (_.VT _.RC:$src1),
11867                                     (_.VT _.RC:$src2),
11868                                     (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11869                                     (i32 timm:$src4))>,
11870                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11871   } // Constraints = "$src1 = $dst"
11872 }
11873
11874 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11875                                       X86FoldableSchedWrite sched,
11876                                       X86VectorVTInfo _, X86VectorVTInfo TblVT>
11877   : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11878 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11879   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11880                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11881                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11882                       "$src2, $src3, {sae}, $src4",
11883                       (X86VFixupimmSAE (_.VT _.RC:$src1),
11884                                        (_.VT _.RC:$src2),
11885                                        (TblVT.VT _.RC:$src3),
11886                                        (i32 timm:$src4))>,
11887                       EVEX_B, Sched<[sched]>;
11888   }
11889 }
11890
11891 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11892                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11893                                   X86VectorVTInfo _src3VT> {
11894   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11895       ExeDomain = _.ExeDomain in {
11896     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11897                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11898                       OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11899                       (X86VFixupimms (_.VT _.RC:$src1),
11900                                      (_.VT _.RC:$src2),
11901                                      (_src3VT.VT _src3VT.RC:$src3),
11902                                      (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11903     let Uses = [MXCSR] in
11904     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11905                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11906                       OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11907                       "$src2, $src3, {sae}, $src4",
11908                       (X86VFixupimmSAEs (_.VT _.RC:$src1),
11909                                         (_.VT _.RC:$src2),
11910                                         (_src3VT.VT _src3VT.RC:$src3),
11911                                         (i32 timm:$src4))>,
11912                       EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11913     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11914                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11915                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11916                      (X86VFixupimms (_.VT _.RC:$src1),
11917                                     (_.VT _.RC:$src2),
11918                                     (_src3VT.VT (scalar_to_vector
11919                                               (_src3VT.ScalarLdFrag addr:$src3))),
11920                                     (i32 timm:$src4))>,
11921                      Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11922   }
11923 }
11924
11925 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11926                                       AVX512VLVectorVTInfo _Vec,
11927                                       AVX512VLVectorVTInfo _Tbl> {
11928   let Predicates = [HasAVX512] in
11929     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11930                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11931                                 EVEX_4V, EVEX_V512;
11932   let Predicates = [HasAVX512, HasVLX] in {
11933     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11934                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11935                             EVEX_4V, EVEX_V128;
11936     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11937                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11938                             EVEX_4V, EVEX_V256;
11939   }
11940 }
11941
11942 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11943                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11944                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11945 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11946                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11947                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11948 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11949                          avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11950 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11951                          avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11952
11953 // Patterns used to select SSE scalar fp arithmetic instructions from
11954 // either:
11955 //
11956 // (1) a scalar fp operation followed by a blend
11957 //
11958 // The effect is that the backend no longer emits unnecessary vector
11959 // insert instructions immediately after SSE scalar fp instructions
11960 // like addss or mulss.
11961 //
11962 // For example, given the following code:
11963 //   __m128 foo(__m128 A, __m128 B) {
11964 //     A[0] += B[0];
11965 //     return A;
11966 //   }
11967 //
11968 // Previously we generated:
11969 //   addss %xmm0, %xmm1
11970 //   movss %xmm1, %xmm0
11971 //
11972 // We now generate:
11973 //   addss %xmm1, %xmm0
11974 //
11975 // (2) a vector packed single/double fp operation followed by a vector insert
11976 //
11977 // The effect is that the backend converts the packed fp instruction
11978 // followed by a vector insert into a single SSE scalar fp instruction.
11979 //
11980 // For example, given the following code:
11981 //   __m128 foo(__m128 A, __m128 B) {
11982 //     __m128 C = A + B;
11983 //     return (__m128) {c[0], a[1], a[2], a[3]};
11984 //   }
11985 //
11986 // Previously we generated:
11987 //   addps %xmm0, %xmm1
11988 //   movss %xmm1, %xmm0
11989 //
11990 // We now generate:
11991 //   addss %xmm1, %xmm0
11992
11993 // TODO: Some canonicalization in lowering would simplify the number of
11994 // patterns we have to try to match.
11995 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp,
11996                                           string OpcPrefix, SDNode MoveNode,
11997                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
11998   let Predicates = [HasAVX512] in {
11999     // extracted scalar math op with insert via movss
12000     def : Pat<(MoveNode
12001                (_.VT VR128X:$dst),
12002                (_.VT (scalar_to_vector
12003                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12004                           _.FRC:$src)))),
12005               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12006                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12007     def : Pat<(MoveNode
12008                (_.VT VR128X:$dst),
12009                (_.VT (scalar_to_vector
12010                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12011                           (_.ScalarLdFrag addr:$src))))),
12012               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12013
12014     // extracted masked scalar math op with insert via movss
12015     def : Pat<(MoveNode (_.VT VR128X:$src1),
12016                (scalar_to_vector
12017                 (X86selects_mask VK1WM:$mask,
12018                             (MaskedOp (_.EltVT
12019                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12020                                       _.FRC:$src2),
12021                             _.FRC:$src0))),
12022               (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12023                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12024                VK1WM:$mask, _.VT:$src1,
12025                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12026     def : Pat<(MoveNode (_.VT VR128X:$src1),
12027                (scalar_to_vector
12028                 (X86selects_mask VK1WM:$mask,
12029                             (MaskedOp (_.EltVT
12030                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12031                                       (_.ScalarLdFrag addr:$src2)),
12032                             _.FRC:$src0))),
12033               (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12034                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12035                VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12036
12037     // extracted masked scalar math op with insert via movss
12038     def : Pat<(MoveNode (_.VT VR128X:$src1),
12039                (scalar_to_vector
12040                 (X86selects_mask VK1WM:$mask,
12041                             (MaskedOp (_.EltVT
12042                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12043                                       _.FRC:$src2), (_.EltVT ZeroFP)))),
12044       (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12045           VK1WM:$mask, _.VT:$src1,
12046           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12047     def : Pat<(MoveNode (_.VT VR128X:$src1),
12048                (scalar_to_vector
12049                 (X86selects_mask VK1WM:$mask,
12050                             (MaskedOp (_.EltVT
12051                                        (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12052                                       (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12053       (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12054   }
12055 }
12056
12057 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12058 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12059 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12060 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12061
12062 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12063 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12064 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12065 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12066
12067 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
12068                                              SDNode Move, X86VectorVTInfo _> {
12069   let Predicates = [HasAVX512] in {
12070     def : Pat<(_.VT (Move _.VT:$dst,
12071                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12072               (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12073   }
12074 }
12075
12076 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12077 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12078
12079 //===----------------------------------------------------------------------===//
12080 // AES instructions
12081 //===----------------------------------------------------------------------===//
12082
12083 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12084   let Predicates = [HasVLX, HasVAES] in {
12085     defm Z128 : AESI_binop_rm_int<Op, OpStr,
12086                                   !cast<Intrinsic>(IntPrefix),
12087                                   loadv2i64, 0, VR128X, i128mem>,
12088                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12089     defm Z256 : AESI_binop_rm_int<Op, OpStr,
12090                                   !cast<Intrinsic>(IntPrefix#"_256"),
12091                                   loadv4i64, 0, VR256X, i256mem>,
12092                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12093     }
12094     let Predicates = [HasAVX512, HasVAES] in
12095     defm Z    : AESI_binop_rm_int<Op, OpStr,
12096                                   !cast<Intrinsic>(IntPrefix#"_512"),
12097                                   loadv8i64, 0, VR512, i512mem>,
12098                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12099 }
12100
12101 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12102 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12103 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12104 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12105
12106 //===----------------------------------------------------------------------===//
12107 // PCLMUL instructions - Carry less multiplication
12108 //===----------------------------------------------------------------------===//
12109
12110 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12111 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12112                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12113
12114 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12115 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12116                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12117
12118 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12119                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12120                                 EVEX_CD8<64, CD8VF>, VEX_WIG;
12121 }
12122
12123 // Aliases
12124 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12125 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12126 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12127
12128 //===----------------------------------------------------------------------===//
12129 // VBMI2
12130 //===----------------------------------------------------------------------===//
12131
12132 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12133                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12134   let Constraints = "$src1 = $dst",
12135       ExeDomain   = VTI.ExeDomain in {
12136     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12137                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12138                 "$src3, $src2", "$src2, $src3",
12139                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12140                 AVX512FMA3Base, Sched<[sched]>;
12141     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12142                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12143                 "$src3, $src2", "$src2, $src3",
12144                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12145                         (VTI.VT (VTI.LdFrag addr:$src3))))>,
12146                 AVX512FMA3Base,
12147                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12148   }
12149 }
12150
12151 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12152                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12153          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12154   let Constraints = "$src1 = $dst",
12155       ExeDomain   = VTI.ExeDomain in
12156   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12157               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12158               "${src3}"#VTI.BroadcastStr#", $src2",
12159               "$src2, ${src3}"#VTI.BroadcastStr,
12160               (OpNode VTI.RC:$src1, VTI.RC:$src2,
12161                (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12162               AVX512FMA3Base, EVEX_B,
12163               Sched<[sched.Folded, sched.ReadAfterFold]>;
12164 }
12165
12166 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12167                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12168   let Predicates = [HasVBMI2] in
12169   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12170                                    EVEX_V512;
12171   let Predicates = [HasVBMI2, HasVLX] in {
12172     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12173                                    EVEX_V256;
12174     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12175                                    EVEX_V128;
12176   }
12177 }
12178
12179 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12180                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12181   let Predicates = [HasVBMI2] in
12182   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12183                                     EVEX_V512;
12184   let Predicates = [HasVBMI2, HasVLX] in {
12185     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12186                                     EVEX_V256;
12187     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12188                                     EVEX_V128;
12189   }
12190 }
12191 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12192                            SDNode OpNode, X86SchedWriteWidths sched> {
12193   defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12194              avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12195   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12196              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12197   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12198              avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12199 }
12200
12201 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12202                            SDNode OpNode, X86SchedWriteWidths sched> {
12203   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12204              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12205              VEX_W, EVEX_CD8<16, CD8VF>;
12206   defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12207              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12208   defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12209              sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12210 }
12211
12212 // Concat & Shift
12213 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12214 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12215 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12216 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12217
12218 // Compress
12219 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12220                                          avx512vl_i8_info, HasVBMI2>, EVEX,
12221                                          NotMemoryFoldable;
12222 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12223                                           avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12224                                           NotMemoryFoldable;
12225 // Expand
12226 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12227                                       avx512vl_i8_info, HasVBMI2>, EVEX;
12228 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12229                                       avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12230
12231 //===----------------------------------------------------------------------===//
12232 // VNNI
12233 //===----------------------------------------------------------------------===//
12234
12235 let Constraints = "$src1 = $dst" in
12236 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12237                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12238                     bit IsCommutable> {
12239   let ExeDomain = VTI.ExeDomain in {
12240   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12241                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12242                                    "$src3, $src2", "$src2, $src3",
12243                                    (VTI.VT (OpNode VTI.RC:$src1,
12244                                             VTI.RC:$src2, VTI.RC:$src3)),
12245                                    IsCommutable, IsCommutable>,
12246                                    EVEX_4V, T8PD, Sched<[sched]>;
12247   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12248                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12249                                    "$src3, $src2", "$src2, $src3",
12250                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12251                                             (VTI.VT (VTI.LdFrag addr:$src3))))>,
12252                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12253                                    Sched<[sched.Folded, sched.ReadAfterFold]>;
12254   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12255                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12256                                    OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12257                                    "$src2, ${src3}"#VTI.BroadcastStr,
12258                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
12259                                     (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12260                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12261                                    T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12262   }
12263 }
12264
12265 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12266                        X86SchedWriteWidths sched, bit IsCommutable> {
12267   let Predicates = [HasVNNI] in
12268   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12269                            IsCommutable>, EVEX_V512;
12270   let Predicates = [HasVNNI, HasVLX] in {
12271     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12272                            IsCommutable>, EVEX_V256;
12273     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12274                            IsCommutable>, EVEX_V128;
12275   }
12276 }
12277
12278 // FIXME: Is there a better scheduler class for VPDP?
12279 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12280 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12281 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12282 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12283
12284 def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
12285                              (X86vpmaddwd node:$lhs, node:$rhs), [{
12286   return N->hasOneUse();
12287 }]>;
12288
12289 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12290 let Predicates = [HasVNNI] in {
12291   def : Pat<(v16i32 (add VR512:$src1,
12292                          (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12293             (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12294   def : Pat<(v16i32 (add VR512:$src1,
12295                          (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12296             (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12297 }
12298 let Predicates = [HasVNNI,HasVLX] in {
12299   def : Pat<(v8i32 (add VR256X:$src1,
12300                         (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12301             (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12302   def : Pat<(v8i32 (add VR256X:$src1,
12303                         (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12304             (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12305   def : Pat<(v4i32 (add VR128X:$src1,
12306                         (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12307             (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12308   def : Pat<(v4i32 (add VR128X:$src1,
12309                         (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12310             (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12311 }
12312
12313 //===----------------------------------------------------------------------===//
12314 // Bit Algorithms
12315 //===----------------------------------------------------------------------===//
12316
12317 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12318 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12319                                    avx512vl_i8_info, HasBITALG>;
12320 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12321                                    avx512vl_i16_info, HasBITALG>, VEX_W;
12322
12323 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12324 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12325
12326 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12327                                  (X86Vpshufbitqmb node:$src1, node:$src2), [{
12328   return N->hasOneUse();
12329 }]>;
12330
12331 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12332   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12333                                 (ins VTI.RC:$src1, VTI.RC:$src2),
12334                                 "vpshufbitqmb",
12335                                 "$src2, $src1", "$src1, $src2",
12336                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12337                                 (VTI.VT VTI.RC:$src2)),
12338                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12339                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12340                                 Sched<[sched]>;
12341   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12342                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12343                                 "vpshufbitqmb",
12344                                 "$src2, $src1", "$src1, $src2",
12345                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12346                                 (VTI.VT (VTI.LdFrag addr:$src2))),
12347                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12348                                 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12349                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12350                                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12351 }
12352
12353 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12354   let Predicates = [HasBITALG] in
12355   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12356   let Predicates = [HasBITALG, HasVLX] in {
12357     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12358     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12359   }
12360 }
12361
12362 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12363 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12364
12365 //===----------------------------------------------------------------------===//
12366 // GFNI
12367 //===----------------------------------------------------------------------===//
12368
12369 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12370                                    X86SchedWriteWidths sched> {
12371   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12372   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12373                                 EVEX_V512;
12374   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12375     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12376                                 EVEX_V256;
12377     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12378                                 EVEX_V128;
12379   }
12380 }
12381
12382 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12383                                           SchedWriteVecALU>,
12384                                           EVEX_CD8<8, CD8VF>, T8PD;
12385
12386 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12387                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12388                                       X86VectorVTInfo BcstVTI>
12389            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12390   let ExeDomain = VTI.ExeDomain in
12391   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12392                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12393                 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12394                 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12395                 (OpNode (VTI.VT VTI.RC:$src1),
12396                  (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12397                  (i8 timm:$src3))>, EVEX_B,
12398                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12399 }
12400
12401 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12402                                      X86SchedWriteWidths sched> {
12403   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12404   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12405                                            v64i8_info, v8i64_info>, EVEX_V512;
12406   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12407     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12408                                            v32i8x_info, v4i64x_info>, EVEX_V256;
12409     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12410                                            v16i8x_info, v2i64x_info>, EVEX_V128;
12411   }
12412 }
12413
12414 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12415                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
12416                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12417 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12418                          X86GF2P8affineqb, SchedWriteVecIMul>,
12419                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12420
12421
12422 //===----------------------------------------------------------------------===//
12423 // AVX5124FMAPS
12424 //===----------------------------------------------------------------------===//
12425
12426 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12427     Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12428 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12429                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12430                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
12431                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12432                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12433
12434 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12435                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12436                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12437                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12438                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12439
12440 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12441                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12442                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
12443                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12444                     Sched<[SchedWriteFMA.Scl.Folded]>;
12445
12446 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12447                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12448                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12449                      []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12450                      Sched<[SchedWriteFMA.Scl.Folded]>;
12451 }
12452
12453 //===----------------------------------------------------------------------===//
12454 // AVX5124VNNIW
12455 //===----------------------------------------------------------------------===//
12456
12457 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12458     Constraints = "$src1 = $dst" in {
12459 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12460                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12461                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12462                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12463                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12464
12465 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12466                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12467                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12468                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12469                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12470 }
12471
12472 let hasSideEffects = 0 in {
12473   let mayStore = 1, SchedRW = [WriteFStoreX] in
12474   def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12475   let mayLoad = 1, SchedRW = [WriteFLoadX] in
12476   def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12477 }
12478
12479 //===----------------------------------------------------------------------===//
12480 // VP2INTERSECT
12481 //===----------------------------------------------------------------------===//
12482
12483 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12484   def rr : I<0x68, MRMSrcReg,
12485                   (outs _.KRPC:$dst),
12486                   (ins _.RC:$src1, _.RC:$src2),
12487                   !strconcat("vp2intersect", _.Suffix,
12488                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12489                   [(set _.KRPC:$dst, (X86vp2intersect
12490                             _.RC:$src1, (_.VT _.RC:$src2)))]>,
12491                   EVEX_4V, T8XD, Sched<[sched]>;
12492
12493   def rm : I<0x68, MRMSrcMem,
12494                   (outs _.KRPC:$dst),
12495                   (ins  _.RC:$src1, _.MemOp:$src2),
12496                   !strconcat("vp2intersect", _.Suffix,
12497                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12498                   [(set _.KRPC:$dst, (X86vp2intersect
12499                             _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12500                   EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12501                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12502
12503   def rmb : I<0x68, MRMSrcMem,
12504                   (outs _.KRPC:$dst),
12505                   (ins _.RC:$src1, _.ScalarMemOp:$src2),
12506                   !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12507                              ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12508                   [(set _.KRPC:$dst, (X86vp2intersect
12509                              _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12510                   EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12511                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12512 }
12513
12514 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12515   let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12516     defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12517
12518   let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12519     defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12520     defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12521   }
12522 }
12523
12524 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12525 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12526
12527 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12528                              X86SchedWriteWidths sched,
12529                              AVX512VLVectorVTInfo _SrcVTInfo,
12530                              AVX512VLVectorVTInfo _DstVTInfo,
12531                              SDNode OpNode, Predicate prd,
12532                              bit IsCommutable = 0> {
12533   let Predicates = [prd] in
12534     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12535                                    _SrcVTInfo.info512, _DstVTInfo.info512,
12536                                    _SrcVTInfo.info512, IsCommutable>,
12537                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
12538   let Predicates = [HasVLX, prd] in {
12539     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12540                                       _SrcVTInfo.info256, _DstVTInfo.info256,
12541                                       _SrcVTInfo.info256, IsCommutable>,
12542                                      EVEX_V256, EVEX_CD8<32, CD8VF>;
12543     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12544                                       _SrcVTInfo.info128, _DstVTInfo.info128,
12545                                       _SrcVTInfo.info128, IsCommutable>,
12546                                       EVEX_V128, EVEX_CD8<32, CD8VF>;
12547   }
12548 }
12549
12550 let ExeDomain = SSEPackedSingle in
12551 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12552                                         SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12553                                         avx512vl_f32_info, avx512vl_i16_info,
12554                                         X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12555
12556 // Truncate Float to BFloat16
12557 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12558                              X86SchedWriteWidths sched> {
12559   let ExeDomain = SSEPackedSingle in {
12560   let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12561     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12562                             X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12563   }
12564   let Predicates = [HasBF16, HasVLX] in {
12565     let Uses = []<Register>, mayRaiseFPException = 0 in {
12566     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12567                                null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12568                                VK4WM>, EVEX_V128;
12569     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12570                                X86cvtneps2bf16, X86cvtneps2bf16,
12571                                sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12572     }
12573   } // Predicates = [HasBF16, HasVLX]
12574   } // ExeDomain = SSEPackedSingle
12575
12576   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12577                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12578                   VR128X:$src), 0>;
12579   def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12580                   (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12581                   f128mem:$src), 0, "intel">;
12582   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12583                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12584                   VR256X:$src), 0>;
12585   def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12586                   (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12587                   f256mem:$src), 0, "intel">;
12588 }
12589
12590 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12591                                        SchedWriteCvtPD2PS>, T8XS,
12592                                        EVEX_CD8<32, CD8VF>;
12593
12594 let Predicates = [HasBF16, HasVLX] in {
12595   // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12596   // patterns have been disabled with null_frag.
12597   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12598             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12599   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12600                               VK4WM:$mask),
12601             (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12602   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12603                               VK4WM:$mask),
12604             (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12605
12606   def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12607             (VCVTNEPS2BF16Z128rm addr:$src)>;
12608   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12609                               VK4WM:$mask),
12610             (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12611   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12612                               VK4WM:$mask),
12613             (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12614
12615   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12616                                      (X86VBroadcastld32 addr:$src)))),
12617             (VCVTNEPS2BF16Z128rmb addr:$src)>;
12618   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12619                               (v8i16 VR128X:$src0), VK4WM:$mask),
12620             (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12621   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12622                               v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12623             (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12624 }
12625
12626 let Constraints = "$src1 = $dst" in {
12627 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12628                               X86FoldableSchedWrite sched,
12629                               X86VectorVTInfo _, X86VectorVTInfo src_v> {
12630   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12631                            (ins src_v.RC:$src2, src_v.RC:$src3),
12632                            OpcodeStr, "$src3, $src2", "$src2, $src3",
12633                            (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12634                            EVEX_4V, Sched<[sched]>;
12635
12636   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12637                                (ins src_v.RC:$src2, src_v.MemOp:$src3),
12638                                OpcodeStr, "$src3, $src2", "$src2, $src3",
12639                                (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12640                                (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12641                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12642
12643   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12644                   (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12645                   OpcodeStr,
12646                   !strconcat("${src3}", _.BroadcastStr,", $src2"),
12647                   !strconcat("$src2, ${src3}", _.BroadcastStr),
12648                   (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12649                   (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12650                   EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12651
12652 }
12653 } // Constraints = "$src1 = $dst"
12654
12655 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12656                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12657                                  AVX512VLVectorVTInfo src_v, Predicate prd> {
12658   let Predicates = [prd] in {
12659     defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12660                                    src_v.info512>, EVEX_V512;
12661   }
12662   let Predicates = [HasVLX, prd] in {
12663     defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12664                                    src_v.info256>, EVEX_V256;
12665     defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12666                                    src_v.info128>, EVEX_V128;
12667   }
12668 }
12669
12670 let ExeDomain = SSEPackedSingle in
12671 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12672                                        avx512vl_f32_info, avx512vl_i32_info,
12673                                        HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;