]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
[FreeBSD/FreeBSD.git] / contrib / llvm-project / llvm / lib / Target / X86 / X86InstrAVX512.td
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
12 //
13 //===----------------------------------------------------------------------===//
14
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT).  These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                       string suffix = ""> {
22   RegisterClass RC = rc;
23   ValueType EltVT = eltvt;
24   int NumElts = numelts;
25
26   // Corresponding mask register class.
27   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29   // Corresponding mask register pair class.
30   RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                               !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33   // Corresponding write-mask register class.
34   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36   // The mask VT.
37   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39   // Suffix used in the instruction mnemonic.
40   string Suffix = suffix;
41
42   // VTName is a string name for vector VT. For vector types it will be
43   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44   // It is a little bit complex for scalar types, where NumElts = 1.
45   // In this case we build v4f32 or v2f64
46   string VTName = "v" # !if (!eq (NumElts, 1),
47                         !if (!eq (EltVT.Size, 32), 4,
48                         !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
49
50   // The vector VT.
51   ValueType VT = !cast<ValueType>(VTName);
52
53   string EltTypeName = !cast<string>(EltVT);
54   // Size of the element type in bits, e.g. 32 for v16i32.
55   string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56   int EltSize = EltVT.Size;
57
58   // "i" for integer types and "f" for floating-point types
59   string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
60
61   // Size of RC in bits, e.g. 512 for VR512.
62   int Size = VT.Size;
63
64   // The corresponding memory operand, e.g. i512mem for VR512.
65   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67   // FP scalar memory operand for intrinsics - ssmem/sdmem.
68   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
70
71   // Load patterns
72   PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
73
74   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
75
76   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77   PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
78
79   ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
80                                           !cast<ComplexPattern>("sse_load_f32"),
81                                     !if (!eq (EltTypeName, "f64"),
82                                           !cast<ComplexPattern>("sse_load_f64"),
83                                     ?));
84
85   // The string to specify embedded broadcast in assembly.
86   string BroadcastStr = "{1to" # NumElts # "}";
87
88   // 8-bit compressed displacement tuple/subvector format.  This is only
89   // defined for NumElts <= 8.
90   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                           !if (!eq (Size, 256), sub_ymm, ?));
95
96   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                      SSEPackedInt));
99
100   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
101
102   dag ImmAllZerosV = (VT immAllZerosV);
103
104   string ZSuffix = !if (!eq (Size, 128), "Z128",
105                    !if (!eq (Size, 256), "Z256", "Z"));
106 }
107
108 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
109 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
112 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
114
115 // "x" in v32i8x_info means RC = VR256X
116 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
117 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
119 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
120 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
121 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
122
123 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
124 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
125 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
126 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
127 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
128 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
129
130 // We map scalar types to the smallest (128-bit) vector type
131 // with the appropriate element type. This allows to use the same masking logic.
132 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
133 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
134 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
135 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
136
137 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138                            X86VectorVTInfo i128> {
139   X86VectorVTInfo info512 = i512;
140   X86VectorVTInfo info256 = i256;
141   X86VectorVTInfo info128 = i128;
142 }
143
144 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145                                              v16i8x_info>;
146 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147                                              v8i16x_info>;
148 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149                                              v4i32x_info>;
150 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151                                              v2i64x_info>;
152 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153                                              v4f32x_info>;
154 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
155                                              v2f64x_info>;
156
157 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158                        ValueType _vt> {
159   RegisterClass KRC = _krc;
160   RegisterClass KRCWM = _krcwm;
161   ValueType KVT = _vt;
162 }
163
164 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171
172 // This multiclass generates the masking variants from the non-masking
173 // variant.  It only provides the assembly pieces for the masking variants.
174 // It assumes custom ISel patterns for masking which can be provided as
175 // template arguments.
176 multiclass AVX512_maskable_custom<bits<8> O, Format F,
177                                   dag Outs,
178                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
179                                   string OpcodeStr,
180                                   string AttSrcAsm, string IntelSrcAsm,
181                                   list<dag> Pattern,
182                                   list<dag> MaskingPattern,
183                                   list<dag> ZeroMaskingPattern,
184                                   string MaskingConstraint = "",
185                                   bit IsCommutable = 0,
186                                   bit IsKCommutable = 0,
187                                   bit IsKZCommutable = IsCommutable> {
188   let isCommutable = IsCommutable in
189     def NAME: AVX512<O, F, Outs, Ins,
190                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
191                                      "$dst, "#IntelSrcAsm#"}",
192                        Pattern>;
193
194   // Prefer over VMOV*rrk Pat<>
195   let isCommutable = IsKCommutable in
196     def NAME#k: AVX512<O, F, Outs, MaskingIns,
197                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
198                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
199                        MaskingPattern>,
200               EVEX_K {
201       // In case of the 3src subclass this is overridden with a let.
202       string Constraints = MaskingConstraint;
203     }
204
205   // Zero mask does not add any restrictions to commute operands transformation.
206   // So, it is Ok to use IsCommutable instead of IsKCommutable.
207   let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
208     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
209                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
210                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
211                        ZeroMaskingPattern>,
212               EVEX_KZ;
213 }
214
215
216 // Common base class of AVX512_maskable and AVX512_maskable_3src.
217 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
218                                   dag Outs,
219                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
220                                   string OpcodeStr,
221                                   string AttSrcAsm, string IntelSrcAsm,
222                                   dag RHS, dag MaskingRHS,
223                                   SDNode Select = vselect,
224                                   string MaskingConstraint = "",
225                                   bit IsCommutable = 0,
226                                   bit IsKCommutable = 0,
227                                   bit IsKZCommutable = IsCommutable> :
228   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
229                          AttSrcAsm, IntelSrcAsm,
230                          [(set _.RC:$dst, RHS)],
231                          [(set _.RC:$dst, MaskingRHS)],
232                          [(set _.RC:$dst,
233                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
234                          MaskingConstraint, IsCommutable,
235                          IsKCommutable, IsKZCommutable>;
236
237 // This multiclass generates the unconditional/non-masking, the masking and
238 // the zero-masking variant of the vector instruction.  In the masking case, the
239 // perserved vector elements come from a new dummy input operand tied to $dst.
240 // This version uses a separate dag for non-masking and masking.
241 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
242                            dag Outs, dag Ins, string OpcodeStr,
243                            string AttSrcAsm, string IntelSrcAsm,
244                            dag RHS, dag MaskRHS,
245                            bit IsCommutable = 0, bit IsKCommutable = 0,
246                            SDNode Select = vselect> :
247    AVX512_maskable_custom<O, F, Outs, Ins,
248                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
249                           !con((ins _.KRCWM:$mask), Ins),
250                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
251                           [(set _.RC:$dst, RHS)],
252                           [(set _.RC:$dst,
253                               (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
254                           [(set _.RC:$dst,
255                               (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
256                           "$src0 = $dst", IsCommutable, IsKCommutable>;
257
258 // This multiclass generates the unconditional/non-masking, the masking and
259 // the zero-masking variant of the vector instruction.  In the masking case, the
260 // perserved vector elements come from a new dummy input operand tied to $dst.
261 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
262                            dag Outs, dag Ins, string OpcodeStr,
263                            string AttSrcAsm, string IntelSrcAsm,
264                            dag RHS,
265                            bit IsCommutable = 0, bit IsKCommutable = 0,
266                            bit IsKZCommutable = IsCommutable,
267                            SDNode Select = vselect> :
268    AVX512_maskable_common<O, F, _, Outs, Ins,
269                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
270                           !con((ins _.KRCWM:$mask), Ins),
271                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
272                           (Select _.KRCWM:$mask, RHS, _.RC:$src0),
273                           Select, "$src0 = $dst", IsCommutable, IsKCommutable,
274                           IsKZCommutable>;
275
276 // This multiclass generates the unconditional/non-masking, the masking and
277 // the zero-masking variant of the scalar instruction.
278 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
279                            dag Outs, dag Ins, string OpcodeStr,
280                            string AttSrcAsm, string IntelSrcAsm,
281                            dag RHS> :
282    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
283                    RHS, 0, 0, 0, X86selects>;
284
285 // Similar to AVX512_maskable but in this case one of the source operands
286 // ($src1) is already tied to $dst so we just use that for the preserved
287 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
288 // $src1.
289 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
290                                 dag Outs, dag NonTiedIns, string OpcodeStr,
291                                 string AttSrcAsm, string IntelSrcAsm,
292                                 dag RHS,
293                                 bit IsCommutable = 0,
294                                 bit IsKCommutable = 0,
295                                 SDNode Select = vselect,
296                                 bit MaskOnly = 0> :
297    AVX512_maskable_common<O, F, _, Outs,
298                           !con((ins _.RC:$src1), NonTiedIns),
299                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
301                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
302                           !if(MaskOnly, (null_frag), RHS),
303                           (Select _.KRCWM:$mask, RHS, _.RC:$src1),
304                           Select, "", IsCommutable, IsKCommutable>;
305
306 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
307 // operand differs from the output VT. This requires a bitconvert on
308 // the preserved vector going into the vselect.
309 // NOTE: The unmasked pattern is disabled.
310 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
311                                      X86VectorVTInfo InVT,
312                                      dag Outs, dag NonTiedIns, string OpcodeStr,
313                                      string AttSrcAsm, string IntelSrcAsm,
314                                      dag RHS, bit IsCommutable = 0> :
315    AVX512_maskable_common<O, F, OutVT, Outs,
316                           !con((ins InVT.RC:$src1), NonTiedIns),
317                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318                           !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
319                           OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
320                           (vselect InVT.KRCWM:$mask, RHS,
321                            (bitconvert InVT.RC:$src1)),
322                            vselect, "", IsCommutable>;
323
324 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
325                                      dag Outs, dag NonTiedIns, string OpcodeStr,
326                                      string AttSrcAsm, string IntelSrcAsm,
327                                      dag RHS,
328                                      bit IsCommutable = 0,
329                                      bit IsKCommutable = 0,
330                                      bit MaskOnly = 0> :
331    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
332                         IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
333                         X86selects, MaskOnly>;
334
335 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
336                                   dag Outs, dag Ins,
337                                   string OpcodeStr,
338                                   string AttSrcAsm, string IntelSrcAsm,
339                                   list<dag> Pattern> :
340    AVX512_maskable_custom<O, F, Outs, Ins,
341                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
342                           !con((ins _.KRCWM:$mask), Ins),
343                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
344                           "$src0 = $dst">;
345
346 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
347                                        dag Outs, dag NonTiedIns,
348                                        string OpcodeStr,
349                                        string AttSrcAsm, string IntelSrcAsm,
350                                        list<dag> Pattern> :
351    AVX512_maskable_custom<O, F, Outs,
352                           !con((ins _.RC:$src1), NonTiedIns),
353                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
355                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
356                           "">;
357
358 // Instruction with mask that puts result in mask register,
359 // like "compare" and "vptest"
360 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
361                                   dag Outs,
362                                   dag Ins, dag MaskingIns,
363                                   string OpcodeStr,
364                                   string AttSrcAsm, string IntelSrcAsm,
365                                   list<dag> Pattern,
366                                   list<dag> MaskingPattern,
367                                   bit IsCommutable = 0> {
368     let isCommutable = IsCommutable in {
369     def NAME: AVX512<O, F, Outs, Ins,
370                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
371                                      "$dst, "#IntelSrcAsm#"}",
372                        Pattern>;
373
374     def NAME#k: AVX512<O, F, Outs, MaskingIns,
375                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
376                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
377                        MaskingPattern>, EVEX_K;
378     }
379 }
380
381 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
382                                   dag Outs,
383                                   dag Ins, dag MaskingIns,
384                                   string OpcodeStr,
385                                   string AttSrcAsm, string IntelSrcAsm,
386                                   dag RHS, dag MaskingRHS,
387                                   bit IsCommutable = 0> :
388   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
389                          AttSrcAsm, IntelSrcAsm,
390                          [(set _.KRC:$dst, RHS)],
391                          [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
392
393 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
394                            dag Outs, dag Ins, string OpcodeStr,
395                            string AttSrcAsm, string IntelSrcAsm,
396                            dag RHS, dag RHS_su, bit IsCommutable = 0> :
397    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
398                           !con((ins _.KRCWM:$mask), Ins),
399                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
400                           (and _.KRCWM:$mask, RHS_su), IsCommutable>;
401
402
403 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
404 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
405 // swizzled by ExecutionDomainFix to pxor.
406 // We set canFoldAsLoad because this can be converted to a constant-pool
407 // load of an all-zeros value if folding it would be beneficial.
408 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
409     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
410 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
411                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
412 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
413                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
414 }
415
416 let Predicates = [HasAVX512] in {
417 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
418 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
419 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
420 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
421 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
422 }
423
424 // Alias instructions that allow VPTERNLOG to be used with a mask to create
425 // a mix of all ones and all zeros elements. This is done this way to force
426 // the same register to be used as input for all three sources.
427 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
428 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
429                                 (ins VK16WM:$mask), "",
430                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
431                                                       (v16i32 immAllOnesV),
432                                                       (v16i32 immAllZerosV)))]>;
433 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
434                                 (ins VK8WM:$mask), "",
435                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
436                                            (v8i64 immAllOnesV),
437                                            (v8i64 immAllZerosV)))]>;
438 }
439
440 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
441     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
442 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
443                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
444 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
445                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
446 }
447
448 let Predicates = [HasAVX512] in {
449 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
450 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
451 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
452 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
453 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
454 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
455 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
456 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
457 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
458 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
459 }
460
461 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
462 // This is expanded by ExpandPostRAPseudos.
463 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
464     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
465   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
466                           [(set FR32X:$dst, fp32imm0)]>;
467   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
468                           [(set FR64X:$dst, fp64imm0)]>;
469   def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
470                             [(set VR128X:$dst, fp128imm0)]>;
471 }
472
473 //===----------------------------------------------------------------------===//
474 // AVX-512 - VECTOR INSERT
475 //
476
477 // Supports two different pattern operators for mask and unmasked ops. Allows
478 // null_frag to be passed for one.
479 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
480                                   X86VectorVTInfo To,
481                                   SDPatternOperator vinsert_insert,
482                                   SDPatternOperator vinsert_for_mask,
483                                   X86FoldableSchedWrite sched> {
484   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
485     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
486                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
487                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
488                    "$src3, $src2, $src1", "$src1, $src2, $src3",
489                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
490                                          (From.VT From.RC:$src2),
491                                          (iPTR imm)),
492                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
493                                            (From.VT From.RC:$src2),
494                                            (iPTR imm))>,
495                    AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
496     let mayLoad = 1 in
497     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
498                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
499                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
500                    "$src3, $src2, $src1", "$src1, $src2, $src3",
501                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
502                                (From.VT (From.LdFrag addr:$src2)),
503                                (iPTR imm)),
504                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
505                                (From.VT (From.LdFrag addr:$src2)),
506                                (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
507                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
508                    Sched<[sched.Folded, sched.ReadAfterFold]>;
509   }
510 }
511
512 // Passes the same pattern operator for masked and unmasked ops.
513 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
514                             X86VectorVTInfo To,
515                             SDPatternOperator vinsert_insert,
516                             X86FoldableSchedWrite sched> :
517   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
518
519 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
520                        X86VectorVTInfo To, PatFrag vinsert_insert,
521                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
522   let Predicates = p in {
523     def : Pat<(vinsert_insert:$ins
524                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
525               (To.VT (!cast<Instruction>(InstrStr#"rr")
526                      To.RC:$src1, From.RC:$src2,
527                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
528
529     def : Pat<(vinsert_insert:$ins
530                   (To.VT To.RC:$src1),
531                   (From.VT (From.LdFrag addr:$src2)),
532                   (iPTR imm)),
533               (To.VT (!cast<Instruction>(InstrStr#"rm")
534                   To.RC:$src1, addr:$src2,
535                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
536   }
537 }
538
539 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
540                             ValueType EltVT64, int Opcode256,
541                             X86FoldableSchedWrite sched> {
542
543   let Predicates = [HasVLX] in
544     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
545                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
546                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
547                                  vinsert128_insert, sched>, EVEX_V256;
548
549   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
550                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
551                                  X86VectorVTInfo<16, EltVT32, VR512>,
552                                  vinsert128_insert, sched>, EVEX_V512;
553
554   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
555                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
556                                  X86VectorVTInfo< 8, EltVT64, VR512>,
557                                  vinsert256_insert, sched>, VEX_W, EVEX_V512;
558
559   // Even with DQI we'd like to only use these instructions for masking.
560   let Predicates = [HasVLX, HasDQI] in
561     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
562                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
563                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
564                                    null_frag, vinsert128_insert, sched>,
565                                    VEX_W1X, EVEX_V256;
566
567   // Even with DQI we'd like to only use these instructions for masking.
568   let Predicates = [HasDQI] in {
569     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
570                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
571                                  X86VectorVTInfo< 8, EltVT64, VR512>,
572                                  null_frag, vinsert128_insert, sched>,
573                                  VEX_W, EVEX_V512;
574
575     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
576                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
577                                    X86VectorVTInfo<16, EltVT32, VR512>,
578                                    null_frag, vinsert256_insert, sched>,
579                                    EVEX_V512;
580   }
581 }
582
583 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
584 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
585 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
586
587 // Codegen pattern with the alternative types,
588 // Even with AVX512DQ we'll still use these for unmasked operations.
589 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
590               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
591 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
592               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
593
594 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
595               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
596 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
597               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
598
599 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
600               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
601 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
602               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
603
604 // Codegen pattern with the alternative types insert VEC128 into VEC256
605 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
606               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
607 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
608               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
609 // Codegen pattern with the alternative types insert VEC128 into VEC512
610 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
611               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
612 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
613                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
614 // Codegen pattern with the alternative types insert VEC256 into VEC512
615 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
616               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
617 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
618               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
619
620
621 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
622                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
623                                  PatFrag vinsert_insert,
624                                  SDNodeXForm INSERT_get_vinsert_imm,
625                                  list<Predicate> p> {
626 let Predicates = p in {
627   def : Pat<(Cast.VT
628              (vselect Cast.KRCWM:$mask,
629                       (bitconvert
630                        (vinsert_insert:$ins (To.VT To.RC:$src1),
631                                             (From.VT From.RC:$src2),
632                                             (iPTR imm))),
633                       Cast.RC:$src0)),
634             (!cast<Instruction>(InstrStr#"rrk")
635              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
636              (INSERT_get_vinsert_imm To.RC:$ins))>;
637   def : Pat<(Cast.VT
638              (vselect Cast.KRCWM:$mask,
639                       (bitconvert
640                        (vinsert_insert:$ins (To.VT To.RC:$src1),
641                                             (From.VT
642                                              (bitconvert
643                                               (From.LdFrag addr:$src2))),
644                                             (iPTR imm))),
645                       Cast.RC:$src0)),
646             (!cast<Instruction>(InstrStr#"rmk")
647              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
648              (INSERT_get_vinsert_imm To.RC:$ins))>;
649
650   def : Pat<(Cast.VT
651              (vselect Cast.KRCWM:$mask,
652                       (bitconvert
653                        (vinsert_insert:$ins (To.VT To.RC:$src1),
654                                             (From.VT From.RC:$src2),
655                                             (iPTR imm))),
656                       Cast.ImmAllZerosV)),
657             (!cast<Instruction>(InstrStr#"rrkz")
658              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659              (INSERT_get_vinsert_imm To.RC:$ins))>;
660   def : Pat<(Cast.VT
661              (vselect Cast.KRCWM:$mask,
662                       (bitconvert
663                        (vinsert_insert:$ins (To.VT To.RC:$src1),
664                                             (From.VT (From.LdFrag addr:$src2)),
665                                             (iPTR imm))),
666                       Cast.ImmAllZerosV)),
667             (!cast<Instruction>(InstrStr#"rmkz")
668              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
669              (INSERT_get_vinsert_imm To.RC:$ins))>;
670 }
671 }
672
673 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
674                              v8f32x_info, vinsert128_insert,
675                              INSERT_get_vinsert128_imm, [HasVLX]>;
676 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
677                              v4f64x_info, vinsert128_insert,
678                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
679
680 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
681                              v8i32x_info, vinsert128_insert,
682                              INSERT_get_vinsert128_imm, [HasVLX]>;
683 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
684                              v8i32x_info, vinsert128_insert,
685                              INSERT_get_vinsert128_imm, [HasVLX]>;
686 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
687                              v8i32x_info, vinsert128_insert,
688                              INSERT_get_vinsert128_imm, [HasVLX]>;
689 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
690                              v4i64x_info, vinsert128_insert,
691                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
692 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
693                              v4i64x_info, vinsert128_insert,
694                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
695 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
696                              v4i64x_info, vinsert128_insert,
697                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
698
699 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
700                              v16f32_info, vinsert128_insert,
701                              INSERT_get_vinsert128_imm, [HasAVX512]>;
702 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
703                              v8f64_info, vinsert128_insert,
704                              INSERT_get_vinsert128_imm, [HasDQI]>;
705
706 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
707                              v16i32_info, vinsert128_insert,
708                              INSERT_get_vinsert128_imm, [HasAVX512]>;
709 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
710                              v16i32_info, vinsert128_insert,
711                              INSERT_get_vinsert128_imm, [HasAVX512]>;
712 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
713                              v16i32_info, vinsert128_insert,
714                              INSERT_get_vinsert128_imm, [HasAVX512]>;
715 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
716                              v8i64_info, vinsert128_insert,
717                              INSERT_get_vinsert128_imm, [HasDQI]>;
718 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
719                              v8i64_info, vinsert128_insert,
720                              INSERT_get_vinsert128_imm, [HasDQI]>;
721 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
722                              v8i64_info, vinsert128_insert,
723                              INSERT_get_vinsert128_imm, [HasDQI]>;
724
725 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
726                              v16f32_info, vinsert256_insert,
727                              INSERT_get_vinsert256_imm, [HasDQI]>;
728 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
729                              v8f64_info, vinsert256_insert,
730                              INSERT_get_vinsert256_imm, [HasAVX512]>;
731
732 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
733                              v16i32_info, vinsert256_insert,
734                              INSERT_get_vinsert256_imm, [HasDQI]>;
735 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
736                              v16i32_info, vinsert256_insert,
737                              INSERT_get_vinsert256_imm, [HasDQI]>;
738 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
739                              v16i32_info, vinsert256_insert,
740                              INSERT_get_vinsert256_imm, [HasDQI]>;
741 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
742                              v8i64_info, vinsert256_insert,
743                              INSERT_get_vinsert256_imm, [HasAVX512]>;
744 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
745                              v8i64_info, vinsert256_insert,
746                              INSERT_get_vinsert256_imm, [HasAVX512]>;
747 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
748                              v8i64_info, vinsert256_insert,
749                              INSERT_get_vinsert256_imm, [HasAVX512]>;
750
751 // vinsertps - insert f32 to XMM
752 let ExeDomain = SSEPackedSingle in {
753 let isCommutable = 1 in
754 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
755       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
756       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
757       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
758       EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
759 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
760       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
761       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
762       [(set VR128X:$dst, (X86insertps VR128X:$src1,
763                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
764                           timm:$src3))]>,
765       EVEX_4V, EVEX_CD8<32, CD8VT1>,
766       Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
767 }
768
769 //===----------------------------------------------------------------------===//
770 // AVX-512 VECTOR EXTRACT
771 //---
772
773 // Supports two different pattern operators for mask and unmasked ops. Allows
774 // null_frag to be passed for one.
775 multiclass vextract_for_size_split<int Opcode,
776                                    X86VectorVTInfo From, X86VectorVTInfo To,
777                                    SDPatternOperator vextract_extract,
778                                    SDPatternOperator vextract_for_mask,
779                                    SchedWrite SchedRR, SchedWrite SchedMR> {
780
781   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
782     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
783                 (ins From.RC:$src1, u8imm:$idx),
784                 "vextract" # To.EltTypeName # "x" # To.NumElts,
785                 "$idx, $src1", "$src1, $idx",
786                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
787                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
788                 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
789
790     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
791                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
792                     "vextract" # To.EltTypeName # "x" # To.NumElts #
793                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
794                     [(store (To.VT (vextract_extract:$idx
795                                     (From.VT From.RC:$src1), (iPTR imm))),
796                              addr:$dst)]>, EVEX,
797                     Sched<[SchedMR]>;
798
799     let mayStore = 1, hasSideEffects = 0 in
800     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
801                     (ins To.MemOp:$dst, To.KRCWM:$mask,
802                                         From.RC:$src1, u8imm:$idx),
803                      "vextract" # To.EltTypeName # "x" # To.NumElts #
804                           "\t{$idx, $src1, $dst {${mask}}|"
805                           "$dst {${mask}}, $src1, $idx}", []>,
806                     EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
807   }
808 }
809
810 // Passes the same pattern operator for masked and unmasked ops.
811 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
812                              X86VectorVTInfo To,
813                              SDPatternOperator vextract_extract,
814                              SchedWrite SchedRR, SchedWrite SchedMR> :
815   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
816
817 // Codegen pattern for the alternative types
818 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
819                 X86VectorVTInfo To, PatFrag vextract_extract,
820                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
821   let Predicates = p in {
822      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
823                (To.VT (!cast<Instruction>(InstrStr#"rr")
824                           From.RC:$src1,
825                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
826      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
827                               (iPTR imm))), addr:$dst),
828                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
829                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
830   }
831 }
832
833 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
834                              ValueType EltVT64, int Opcode256,
835                              SchedWrite SchedRR, SchedWrite SchedMR> {
836   let Predicates = [HasAVX512] in {
837     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
838                                    X86VectorVTInfo<16, EltVT32, VR512>,
839                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
840                                    vextract128_extract, SchedRR, SchedMR>,
841                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
842     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
843                                    X86VectorVTInfo< 8, EltVT64, VR512>,
844                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
845                                    vextract256_extract, SchedRR, SchedMR>,
846                                        VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
847   }
848   let Predicates = [HasVLX] in
849     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
850                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
851                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
852                                  vextract128_extract, SchedRR, SchedMR>,
853                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
854
855   // Even with DQI we'd like to only use these instructions for masking.
856   let Predicates = [HasVLX, HasDQI] in
857     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
858                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
859                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
860                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
861                                      VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
862
863   // Even with DQI we'd like to only use these instructions for masking.
864   let Predicates = [HasDQI] in {
865     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
866                                  X86VectorVTInfo< 8, EltVT64, VR512>,
867                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
868                                  null_frag, vextract128_extract, SchedRR, SchedMR>,
869                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
870     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
871                                  X86VectorVTInfo<16, EltVT32, VR512>,
872                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
873                                  null_frag, vextract256_extract, SchedRR, SchedMR>,
874                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
875   }
876 }
877
878 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
879 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
880 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
881
882 // extract_subvector codegen patterns with the alternative types.
883 // Even with AVX512DQ we'll still use these for unmasked operations.
884 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
885           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
887           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
888
889 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
890           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
891 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
892           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
893
894 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
895           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
896 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
897           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
898
899 // Codegen pattern with the alternative types extract VEC128 from VEC256
900 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
901           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
902 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
903           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
904
905 // Codegen pattern with the alternative types extract VEC128 from VEC512
906 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
907                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
908 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
909                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
910 // Codegen pattern with the alternative types extract VEC256 from VEC512
911 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
912                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
913 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
914                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
915
916
917 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
918 // smaller extract to enable EVEX->VEX.
919 let Predicates = [NoVLX] in {
920 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
921           (v2i64 (VEXTRACTI128rr
922                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
923                   (iPTR 1)))>;
924 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
925           (v2f64 (VEXTRACTF128rr
926                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
927                   (iPTR 1)))>;
928 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
929           (v4i32 (VEXTRACTI128rr
930                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
931                   (iPTR 1)))>;
932 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
933           (v4f32 (VEXTRACTF128rr
934                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
935                   (iPTR 1)))>;
936 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
937           (v8i16 (VEXTRACTI128rr
938                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
939                   (iPTR 1)))>;
940 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
941           (v16i8 (VEXTRACTI128rr
942                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
943                   (iPTR 1)))>;
944 }
945
946 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
947 // smaller extract to enable EVEX->VEX.
948 let Predicates = [HasVLX] in {
949 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
950           (v2i64 (VEXTRACTI32x4Z256rr
951                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
952                   (iPTR 1)))>;
953 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
954           (v2f64 (VEXTRACTF32x4Z256rr
955                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
956                   (iPTR 1)))>;
957 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
958           (v4i32 (VEXTRACTI32x4Z256rr
959                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
960                   (iPTR 1)))>;
961 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
962           (v4f32 (VEXTRACTF32x4Z256rr
963                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
964                   (iPTR 1)))>;
965 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
966           (v8i16 (VEXTRACTI32x4Z256rr
967                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
968                   (iPTR 1)))>;
969 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
970           (v16i8 (VEXTRACTI32x4Z256rr
971                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
972                   (iPTR 1)))>;
973 }
974
975
976 // Additional patterns for handling a bitcast between the vselect and the
977 // extract_subvector.
978 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
979                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
980                                   PatFrag vextract_extract,
981                                   SDNodeXForm EXTRACT_get_vextract_imm,
982                                   list<Predicate> p> {
983 let Predicates = p in {
984   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
985                               (bitconvert
986                                (To.VT (vextract_extract:$ext
987                                        (From.VT From.RC:$src), (iPTR imm)))),
988                               To.RC:$src0)),
989             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
990                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
991                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
992
993   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
994                               (bitconvert
995                                (To.VT (vextract_extract:$ext
996                                        (From.VT From.RC:$src), (iPTR imm)))),
997                               Cast.ImmAllZerosV)),
998             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
999                       Cast.KRCWM:$mask, From.RC:$src,
1000                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1001 }
1002 }
1003
1004 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1005                               v4f32x_info, vextract128_extract,
1006                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1007 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1008                               v2f64x_info, vextract128_extract,
1009                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1010
1011 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1012                               v4i32x_info, vextract128_extract,
1013                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1014 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1015                               v4i32x_info, vextract128_extract,
1016                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1017 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1018                               v4i32x_info, vextract128_extract,
1019                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1020 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1021                               v2i64x_info, vextract128_extract,
1022                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1023 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1024                               v2i64x_info, vextract128_extract,
1025                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1026 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1027                               v2i64x_info, vextract128_extract,
1028                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1029
1030 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1031                               v4f32x_info, vextract128_extract,
1032                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1033 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1034                               v2f64x_info, vextract128_extract,
1035                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1036
1037 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1038                               v4i32x_info, vextract128_extract,
1039                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1041                               v4i32x_info, vextract128_extract,
1042                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1043 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1044                               v4i32x_info, vextract128_extract,
1045                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1046 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1047                               v2i64x_info, vextract128_extract,
1048                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1049 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1050                               v2i64x_info, vextract128_extract,
1051                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1052 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1053                               v2i64x_info, vextract128_extract,
1054                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1055
1056 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1057                               v8f32x_info, vextract256_extract,
1058                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1059 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1060                               v4f64x_info, vextract256_extract,
1061                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1062
1063 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1064                               v8i32x_info, vextract256_extract,
1065                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1067                               v8i32x_info, vextract256_extract,
1068                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1069 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1070                               v8i32x_info, vextract256_extract,
1071                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1072 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1073                               v4i64x_info, vextract256_extract,
1074                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1075 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1076                               v4i64x_info, vextract256_extract,
1077                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1078 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1079                               v4i64x_info, vextract256_extract,
1080                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1081
1082 // vextractps - extract 32 bits from XMM
1083 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1084       (ins VR128X:$src1, u8imm:$src2),
1085       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1086       [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1087       EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1088
1089 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1090       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1091       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1092       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1093                           addr:$dst)]>,
1094       EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1095
1096 //===---------------------------------------------------------------------===//
1097 // AVX-512 BROADCAST
1098 //---
1099 // broadcast with a scalar argument.
1100 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1101                             string Name,
1102                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1103   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1104             (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1105              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1106   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1107                                   (X86VBroadcast SrcInfo.FRC:$src),
1108                                   DestInfo.RC:$src0)),
1109             (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1110              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1111              (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1112   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1113                                   (X86VBroadcast SrcInfo.FRC:$src),
1114                                   DestInfo.ImmAllZerosV)),
1115             (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1116              DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1117 }
1118
1119 // Split version to allow mask and broadcast node to be different types. This
1120 // helps support the 32x2 broadcasts.
1121 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1122                                      string Name,
1123                                      SchedWrite SchedRR, SchedWrite SchedRM,
1124                                      X86VectorVTInfo MaskInfo,
1125                                      X86VectorVTInfo DestInfo,
1126                                      X86VectorVTInfo SrcInfo,
1127                                      bit IsConvertibleToThreeAddress,
1128                                      SDPatternOperator UnmaskedOp = X86VBroadcast,
1129                                      SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1130   let hasSideEffects = 0 in
1131   def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1132                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1133                    [(set MaskInfo.RC:$dst,
1134                      (MaskInfo.VT
1135                       (bitconvert
1136                        (DestInfo.VT
1137                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1138                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1139   def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1140                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1141                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1142                       "${dst} {${mask}} {z}, $src}"),
1143                       [(set MaskInfo.RC:$dst,
1144                         (vselect MaskInfo.KRCWM:$mask,
1145                          (MaskInfo.VT
1146                           (bitconvert
1147                            (DestInfo.VT
1148                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1149                          MaskInfo.ImmAllZerosV))],
1150                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1151   let Constraints = "$src0 = $dst" in
1152   def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1153                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1154                          SrcInfo.RC:$src),
1155                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1156                     "${dst} {${mask}}, $src}"),
1157                     [(set MaskInfo.RC:$dst,
1158                       (vselect MaskInfo.KRCWM:$mask,
1159                        (MaskInfo.VT
1160                         (bitconvert
1161                          (DestInfo.VT
1162                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1163                        MaskInfo.RC:$src0))],
1164                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1165
1166   let hasSideEffects = 0, mayLoad = 1 in
1167   def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1168                    (ins SrcInfo.ScalarMemOp:$src),
1169                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1170                    [(set MaskInfo.RC:$dst,
1171                      (MaskInfo.VT
1172                       (bitconvert
1173                        (DestInfo.VT
1174                         (UnmaskedBcastOp addr:$src)))))],
1175                    DestInfo.ExeDomain>, T8PD, EVEX,
1176                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1177
1178   def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1179                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1180                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1181                       "${dst} {${mask}} {z}, $src}"),
1182                       [(set MaskInfo.RC:$dst,
1183                         (vselect MaskInfo.KRCWM:$mask,
1184                          (MaskInfo.VT
1185                           (bitconvert
1186                            (DestInfo.VT
1187                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1188                          MaskInfo.ImmAllZerosV))],
1189                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1190                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1191
1192   let Constraints = "$src0 = $dst",
1193       isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1194   def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1195                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1196                          SrcInfo.ScalarMemOp:$src),
1197                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1198                     "${dst} {${mask}}, $src}"),
1199                     [(set MaskInfo.RC:$dst,
1200                       (vselect MaskInfo.KRCWM:$mask,
1201                        (MaskInfo.VT
1202                         (bitconvert
1203                          (DestInfo.VT
1204                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1205                        MaskInfo.RC:$src0))],
1206                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1207                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1208 }
1209
1210 // Helper class to force mask and broadcast result to same type.
1211 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1212                                SchedWrite SchedRR, SchedWrite SchedRM,
1213                                X86VectorVTInfo DestInfo,
1214                                X86VectorVTInfo SrcInfo,
1215                                bit IsConvertibleToThreeAddress> :
1216   avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1217                             DestInfo, DestInfo, SrcInfo,
1218                             IsConvertibleToThreeAddress>;
1219
1220 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1221                                                        AVX512VLVectorVTInfo _> {
1222   let Predicates = [HasAVX512] in {
1223     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1224                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1225               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1226                                       _.info128>,
1227               EVEX_V512;
1228   }
1229
1230   let Predicates = [HasVLX] in {
1231     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1232                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1233                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1234                                          _.info128>,
1235                  EVEX_V256;
1236   }
1237 }
1238
1239 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240                                                        AVX512VLVectorVTInfo _> {
1241   let Predicates = [HasAVX512] in {
1242     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1243                                   WriteFShuffle256Ld, _.info512, _.info128, 1>,
1244               avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1245                                       _.info128>,
1246               EVEX_V512;
1247   }
1248
1249   let Predicates = [HasVLX] in {
1250     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1251                                      WriteFShuffle256Ld, _.info256, _.info128, 1>,
1252                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1253                                          _.info128>,
1254                  EVEX_V256;
1255     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1256                                      WriteFShuffle256Ld, _.info128, _.info128, 1>,
1257                  avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1258                                          _.info128>,
1259                  EVEX_V128;
1260   }
1261 }
1262 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1263                                        avx512vl_f32_info>;
1264 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1265                                        avx512vl_f64_info>, VEX_W1X;
1266
1267 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1269                                     RegisterClass SrcRC> {
1270   let ExeDomain = _.ExeDomain in
1271   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1272                          (ins SrcRC:$src),
1273                          "vpbroadcast"##_.Suffix, "$src", "$src",
1274                          (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1275                          Sched<[SchedRR]>;
1276 }
1277
1278 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1280                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1281   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282   defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283                         (outs _.RC:$dst), (ins GR32:$src),
1284                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286                         "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1288
1289   def : Pat <(_.VT (OpNode SrcRC:$src)),
1290              (!cast<Instruction>(Name#r)
1291               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1292
1293   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294              (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1296
1297   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298              (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1300 }
1301
1302 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305   let Predicates = [prd] in
1306     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307               OpNode, SrcRC, Subreg>, EVEX_V512;
1308   let Predicates = [prd, HasVLX] in {
1309     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1313   }
1314 }
1315
1316 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317                                        SDPatternOperator OpNode,
1318                                        RegisterClass SrcRC, Predicate prd> {
1319   let Predicates = [prd] in
1320     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1321                                       SrcRC>, EVEX_V512;
1322   let Predicates = [prd, HasVLX] in {
1323     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1324                                          SrcRC>, EVEX_V256;
1325     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1326                                          SrcRC>, EVEX_V128;
1327   }
1328 }
1329
1330 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1334                        HasBWI>;
1335 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336                                                  X86VBroadcast, GR32, HasAVX512>;
1337 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338                                                  X86VBroadcast, GR64, HasAVX512>, VEX_W;
1339
1340 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1341                                         AVX512VLVectorVTInfo _, Predicate prd,
1342                                         bit IsConvertibleToThreeAddress> {
1343   let Predicates = [prd] in {
1344     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1345                                    WriteShuffle256Ld, _.info512, _.info128,
1346                                    IsConvertibleToThreeAddress>,
1347                                   EVEX_V512;
1348   }
1349   let Predicates = [prd, HasVLX] in {
1350     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1351                                     WriteShuffle256Ld, _.info256, _.info128,
1352                                     IsConvertibleToThreeAddress>,
1353                                  EVEX_V256;
1354     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1355                                     WriteShuffleXLd, _.info128, _.info128,
1356                                     IsConvertibleToThreeAddress>,
1357                                  EVEX_V128;
1358   }
1359 }
1360
1361 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1362                                            avx512vl_i8_info, HasBWI, 0>;
1363 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1364                                            avx512vl_i16_info, HasBWI, 0>;
1365 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1366                                            avx512vl_i32_info, HasAVX512, 1>;
1367 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1368                                            avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1369
1370 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1371                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1372   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1373                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1374                            (_Dst.VT (X86SubVBroadcast
1375                              (_Src.VT (_Src.LdFrag addr:$src))))>,
1376                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1377                            AVX5128IBase, EVEX;
1378 }
1379
1380 // This should be used for the AVX512DQ broadcast instructions. It disables
1381 // the unmasked patterns so that we only use the DQ instructions when masking
1382 //  is requested.
1383 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1384                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1385   let hasSideEffects = 0, mayLoad = 1 in
1386   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1388                            (null_frag),
1389                            (_Dst.VT (X86SubVBroadcast
1390                              (_Src.VT (_Src.LdFrag addr:$src))))>,
1391                            Sched<[SchedWriteShuffle.YMM.Folded]>,
1392                            AVX5128IBase, EVEX;
1393 }
1394
1395 let Predicates = [HasAVX512] in {
1396   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1397   def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1398             (VPBROADCASTQZm addr:$src)>;
1399
1400   // FIXME this is to handle aligned extloads from i8.
1401   def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))),
1402             (VPBROADCASTDZm addr:$src)>;
1403 }
1404
1405 let Predicates = [HasVLX] in {
1406   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1407   def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1408             (VPBROADCASTQZ128m addr:$src)>;
1409   def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1410             (VPBROADCASTQZ256m addr:$src)>;
1411
1412   // FIXME this is to handle aligned extloads from i8.
1413   def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
1414             (VPBROADCASTDZ128m addr:$src)>;
1415   def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
1416             (VPBROADCASTDZ256m addr:$src)>;
1417 }
1418 let Predicates = [HasVLX, HasBWI] in {
1419   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1420   // This means we'll encounter truncated i32 loads; match that here.
1421   def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1422             (VPBROADCASTWZ128m addr:$src)>;
1423   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1424             (VPBROADCASTWZ256m addr:$src)>;
1425   def : Pat<(v8i16 (X86VBroadcast
1426               (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1427             (VPBROADCASTWZ128m addr:$src)>;
1428   def : Pat<(v8i16 (X86VBroadcast
1429               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1430             (VPBROADCASTWZ128m addr:$src)>;
1431   def : Pat<(v16i16 (X86VBroadcast
1432               (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1433             (VPBROADCASTWZ256m addr:$src)>;
1434   def : Pat<(v16i16 (X86VBroadcast
1435               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1436             (VPBROADCASTWZ256m addr:$src)>;
1437
1438   // FIXME this is to handle aligned extloads from i8.
1439   def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
1440             (VPBROADCASTWZ128m addr:$src)>;
1441   def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
1442             (VPBROADCASTWZ256m addr:$src)>;
1443 }
1444 let Predicates = [HasBWI] in {
1445   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1446   // This means we'll encounter truncated i32 loads; match that here.
1447   def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1448             (VPBROADCASTWZm addr:$src)>;
1449   def : Pat<(v32i16 (X86VBroadcast
1450               (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1451             (VPBROADCASTWZm addr:$src)>;
1452   def : Pat<(v32i16 (X86VBroadcast
1453               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1454             (VPBROADCASTWZm addr:$src)>;
1455
1456   // FIXME this is to handle aligned extloads from i8.
1457   def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))),
1458             (VPBROADCASTWZm addr:$src)>;
1459 }
1460
1461 //===----------------------------------------------------------------------===//
1462 // AVX-512 BROADCAST SUBVECTORS
1463 //
1464
1465 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1466                        v16i32_info, v4i32x_info>,
1467                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1468 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1469                        v16f32_info, v4f32x_info>,
1470                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1471 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1472                        v8i64_info, v4i64x_info>, VEX_W,
1473                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1474 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1475                        v8f64_info, v4f64x_info>, VEX_W,
1476                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1477
1478 let Predicates = [HasAVX512] in {
1479 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1480           (VBROADCASTF64X4rm addr:$src)>;
1481 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1482           (VBROADCASTI64X4rm addr:$src)>;
1483 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1484           (VBROADCASTI64X4rm addr:$src)>;
1485 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1486           (VBROADCASTI64X4rm addr:$src)>;
1487
1488 // Provide fallback in case the load node that is used in the patterns above
1489 // is used by additional users, which prevents the pattern selection.
1490 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1491           (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1492                            (v4f64 VR256X:$src), 1)>;
1493 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1494           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1495                            (v8f32 VR256X:$src), 1)>;
1496 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1497           (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1498                            (v4i64 VR256X:$src), 1)>;
1499 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1500           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1501                            (v8i32 VR256X:$src), 1)>;
1502 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1503           (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1504                            (v16i16 VR256X:$src), 1)>;
1505 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1506           (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1507                            (v32i8 VR256X:$src), 1)>;
1508
1509 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1510           (VBROADCASTF32X4rm addr:$src)>;
1511 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1512           (VBROADCASTI32X4rm addr:$src)>;
1513 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1514           (VBROADCASTI32X4rm addr:$src)>;
1515 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1516           (VBROADCASTI32X4rm addr:$src)>;
1517
1518 // Patterns for selects of bitcasted operations.
1519 def : Pat<(vselect VK16WM:$mask,
1520                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1521                    (v16f32 immAllZerosV)),
1522           (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1523 def : Pat<(vselect VK16WM:$mask,
1524                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1525                    VR512:$src0),
1526           (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1527 def : Pat<(vselect VK16WM:$mask,
1528                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1529                    (v16i32 immAllZerosV)),
1530           (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1531 def : Pat<(vselect VK16WM:$mask,
1532                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1533                    VR512:$src0),
1534           (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1535
1536 def : Pat<(vselect VK8WM:$mask,
1537                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1538                    (v8f64 immAllZerosV)),
1539           (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1540 def : Pat<(vselect VK8WM:$mask,
1541                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1542                    VR512:$src0),
1543           (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1544 def : Pat<(vselect VK8WM:$mask,
1545                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1546                    (v8i64 immAllZerosV)),
1547           (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1548 def : Pat<(vselect VK8WM:$mask,
1549                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1550                    VR512:$src0),
1551           (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1552 }
1553
1554 let Predicates = [HasVLX] in {
1555 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1556                            v8i32x_info, v4i32x_info>,
1557                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1558 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1559                            v8f32x_info, v4f32x_info>,
1560                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1561
1562 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1563           (VBROADCASTF32X4Z256rm addr:$src)>;
1564 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1565           (VBROADCASTI32X4Z256rm addr:$src)>;
1566 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1567           (VBROADCASTI32X4Z256rm addr:$src)>;
1568 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1569           (VBROADCASTI32X4Z256rm addr:$src)>;
1570
1571 // Patterns for selects of bitcasted operations.
1572 def : Pat<(vselect VK8WM:$mask,
1573                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1574                    (v8f32 immAllZerosV)),
1575           (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1576 def : Pat<(vselect VK8WM:$mask,
1577                    (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1578                    VR256X:$src0),
1579           (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1580 def : Pat<(vselect VK8WM:$mask,
1581                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1582                    (v8i32 immAllZerosV)),
1583           (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1584 def : Pat<(vselect VK8WM:$mask,
1585                    (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1586                    VR256X:$src0),
1587           (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1588
1589
1590 // Provide fallback in case the load node that is used in the patterns above
1591 // is used by additional users, which prevents the pattern selection.
1592 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1593           (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1594                               (v2f64 VR128X:$src), 1)>;
1595 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1596           (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1597                               (v4f32 VR128X:$src), 1)>;
1598 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1599           (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1600                               (v2i64 VR128X:$src), 1)>;
1601 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1602           (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1603                               (v4i32 VR128X:$src), 1)>;
1604 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1605           (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1606                               (v8i16 VR128X:$src), 1)>;
1607 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1608           (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1609                               (v16i8 VR128X:$src), 1)>;
1610 }
1611
1612 let Predicates = [HasVLX, HasDQI] in {
1613 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1614                            v4i64x_info, v2i64x_info>, VEX_W1X,
1615                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1616 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1617                            v4f64x_info, v2f64x_info>, VEX_W1X,
1618                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1619
1620 // Patterns for selects of bitcasted operations.
1621 def : Pat<(vselect VK4WM:$mask,
1622                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1623                    (v4f64 immAllZerosV)),
1624           (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1625 def : Pat<(vselect VK4WM:$mask,
1626                    (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1627                    VR256X:$src0),
1628           (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1629 def : Pat<(vselect VK4WM:$mask,
1630                    (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1631                    (v4i64 immAllZerosV)),
1632           (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1633 def : Pat<(vselect VK4WM:$mask,
1634                    (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1635                    VR256X:$src0),
1636           (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1637 }
1638
1639 let Predicates = [HasDQI] in {
1640 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1641                        v8i64_info, v2i64x_info>, VEX_W,
1642                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1643 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1644                        v16i32_info, v8i32x_info>,
1645                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1646 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1647                        v8f64_info, v2f64x_info>, VEX_W,
1648                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1649 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1650                        v16f32_info, v8f32x_info>,
1651                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1652
1653 // Patterns for selects of bitcasted operations.
1654 def : Pat<(vselect VK16WM:$mask,
1655                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1656                    (v16f32 immAllZerosV)),
1657           (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1658 def : Pat<(vselect VK16WM:$mask,
1659                    (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1660                    VR512:$src0),
1661           (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1662 def : Pat<(vselect VK16WM:$mask,
1663                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1664                    (v16i32 immAllZerosV)),
1665           (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1666 def : Pat<(vselect VK16WM:$mask,
1667                    (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1668                    VR512:$src0),
1669           (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1670
1671 def : Pat<(vselect VK8WM:$mask,
1672                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1673                    (v8f64 immAllZerosV)),
1674           (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1675 def : Pat<(vselect VK8WM:$mask,
1676                    (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1677                    VR512:$src0),
1678           (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1679 def : Pat<(vselect VK8WM:$mask,
1680                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1681                    (v8i64 immAllZerosV)),
1682           (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1683 def : Pat<(vselect VK8WM:$mask,
1684                    (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1685                    VR512:$src0),
1686           (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1687 }
1688
1689 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1690                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1691   let Predicates = [HasDQI] in
1692     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1693                                           WriteShuffle256Ld, _Dst.info512,
1694                                           _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1695                                           EVEX_V512;
1696   let Predicates = [HasDQI, HasVLX] in
1697     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1698                                           WriteShuffle256Ld, _Dst.info256,
1699                                           _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1700                                           EVEX_V256;
1701 }
1702
1703 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1704                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1705   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1706
1707   let Predicates = [HasDQI, HasVLX] in
1708     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1709                                           WriteShuffleXLd, _Dst.info128,
1710                                           _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1711                                           EVEX_V128;
1712 }
1713
1714 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1715                                           avx512vl_i32_info, avx512vl_i64_info>;
1716 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1717                                           avx512vl_f32_info, avx512vl_f64_info>;
1718
1719 //===----------------------------------------------------------------------===//
1720 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1721 //---
1722 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1723                                   X86VectorVTInfo _, RegisterClass KRC> {
1724   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1725                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1726                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1727                   EVEX, Sched<[WriteShuffle]>;
1728 }
1729
1730 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1731                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1732   let Predicates = [HasCDI] in
1733     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1734   let Predicates = [HasCDI, HasVLX] in {
1735     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1736     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1737   }
1738 }
1739
1740 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1741                                                avx512vl_i32_info, VK16>;
1742 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1743                                                avx512vl_i64_info, VK8>, VEX_W;
1744
1745 //===----------------------------------------------------------------------===//
1746 // -- VPERMI2 - 3 source operands form --
1747 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1748                          X86FoldableSchedWrite sched,
1749                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1750 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1751     hasSideEffects = 0 in {
1752   defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1753           (ins _.RC:$src2, _.RC:$src3),
1754           OpcodeStr, "$src3, $src2", "$src2, $src3",
1755           (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1756           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1757
1758   let mayLoad = 1 in
1759   defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1760             (ins _.RC:$src2, _.MemOp:$src3),
1761             OpcodeStr, "$src3, $src2", "$src2, $src3",
1762             (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1763                    (_.VT (_.LdFrag addr:$src3)))), 1>,
1764             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1765   }
1766 }
1767
1768 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1769                             X86FoldableSchedWrite sched,
1770                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1771   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1772       hasSideEffects = 0, mayLoad = 1 in
1773   defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1774               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1775               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1776               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1777               (_.VT (X86VPermt2 _.RC:$src2,
1778                IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1779               AVX5128IBase, EVEX_4V, EVEX_B,
1780               Sched<[sched.Folded, sched.ReadAfterFold]>;
1781 }
1782
1783 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1784                                X86FoldableSchedWrite sched,
1785                                AVX512VLVectorVTInfo VTInfo,
1786                                AVX512VLVectorVTInfo ShuffleMask> {
1787   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1788                            ShuffleMask.info512>,
1789             avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1790                              ShuffleMask.info512>, EVEX_V512;
1791   let Predicates = [HasVLX] in {
1792   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1793                                ShuffleMask.info128>,
1794                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1795                                   ShuffleMask.info128>, EVEX_V128;
1796   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1797                                ShuffleMask.info256>,
1798                  avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1799                                   ShuffleMask.info256>, EVEX_V256;
1800   }
1801 }
1802
1803 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1804                                   X86FoldableSchedWrite sched,
1805                                   AVX512VLVectorVTInfo VTInfo,
1806                                   AVX512VLVectorVTInfo Idx,
1807                                   Predicate Prd> {
1808   let Predicates = [Prd] in
1809   defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1810                            Idx.info512>, EVEX_V512;
1811   let Predicates = [Prd, HasVLX] in {
1812   defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1813                                Idx.info128>, EVEX_V128;
1814   defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1815                                Idx.info256>,  EVEX_V256;
1816   }
1817 }
1818
1819 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1820                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1821 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1822                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1823 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1824                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1825                   VEX_W, EVEX_CD8<16, CD8VF>;
1826 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1827                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1828                   EVEX_CD8<8, CD8VF>;
1829 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1830                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1831 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1832                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1833
1834 // Extra patterns to deal with extra bitcasts due to passthru and index being
1835 // different types on the fp versions.
1836 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1837                                   X86VectorVTInfo IdxVT,
1838                                   X86VectorVTInfo CastVT> {
1839   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1840                              (X86VPermt2 (_.VT _.RC:$src2),
1841                                          (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1842                              (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1843             (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1844                                                 _.RC:$src2, _.RC:$src3)>;
1845   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1846                              (X86VPermt2 _.RC:$src2,
1847                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1848                                          (_.LdFrag addr:$src3)),
1849                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1850             (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1851                                                 _.RC:$src2, addr:$src3)>;
1852   def : Pat<(_.VT (vselect _.KRCWM:$mask,
1853                              (X86VPermt2 _.RC:$src2,
1854                                          (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1855                                          (_.BroadcastLdFrag addr:$src3)),
1856                              (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1857             (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1858                                                  _.RC:$src2, addr:$src3)>;
1859 }
1860
1861 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1862 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1863 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1864 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1865
1866 // VPERMT2
1867 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1868                          X86FoldableSchedWrite sched,
1869                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1870 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1871   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1872           (ins IdxVT.RC:$src2, _.RC:$src3),
1873           OpcodeStr, "$src3, $src2", "$src2, $src3",
1874           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1875           EVEX_4V, AVX5128IBase, Sched<[sched]>;
1876
1877   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1878             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1879             OpcodeStr, "$src3, $src2", "$src2, $src3",
1880             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1881                    (_.LdFrag addr:$src3))), 1>,
1882             EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1883   }
1884 }
1885 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1886                             X86FoldableSchedWrite sched,
1887                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1888   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1889   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1890               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1891               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1892               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1893               (_.VT (X86VPermt2 _.RC:$src1,
1894                IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1895               AVX5128IBase, EVEX_4V, EVEX_B,
1896               Sched<[sched.Folded, sched.ReadAfterFold]>;
1897 }
1898
1899 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1900                                X86FoldableSchedWrite sched,
1901                                AVX512VLVectorVTInfo VTInfo,
1902                                AVX512VLVectorVTInfo ShuffleMask> {
1903   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1904                               ShuffleMask.info512>,
1905             avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1906                               ShuffleMask.info512>, EVEX_V512;
1907   let Predicates = [HasVLX] in {
1908   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909                               ShuffleMask.info128>,
1910                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1911                               ShuffleMask.info128>, EVEX_V128;
1912   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1913                               ShuffleMask.info256>,
1914                  avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1915                               ShuffleMask.info256>, EVEX_V256;
1916   }
1917 }
1918
1919 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1920                                   X86FoldableSchedWrite sched,
1921                                   AVX512VLVectorVTInfo VTInfo,
1922                                   AVX512VLVectorVTInfo Idx, Predicate Prd> {
1923   let Predicates = [Prd] in
1924   defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1925                            Idx.info512>, EVEX_V512;
1926   let Predicates = [Prd, HasVLX] in {
1927   defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1928                                Idx.info128>, EVEX_V128;
1929   defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1930                                Idx.info256>, EVEX_V256;
1931   }
1932 }
1933
1934 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1935                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1936 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1937                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1938 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1939                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1940                   VEX_W, EVEX_CD8<16, CD8VF>;
1941 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1942                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1943                   EVEX_CD8<8, CD8VF>;
1944 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1945                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1946 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1947                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1948
1949 //===----------------------------------------------------------------------===//
1950 // AVX-512 - BLEND using mask
1951 //
1952
1953 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1954                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1955   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1956   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1957              (ins _.RC:$src1, _.RC:$src2),
1958              !strconcat(OpcodeStr,
1959              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1960              EVEX_4V, Sched<[sched]>;
1961   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1962              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1963              !strconcat(OpcodeStr,
1964              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1965              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1966   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1967              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1968              !strconcat(OpcodeStr,
1969              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1970              []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1971   let mayLoad = 1 in {
1972   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1973              (ins _.RC:$src1, _.MemOp:$src2),
1974              !strconcat(OpcodeStr,
1975              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1976              []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1977              Sched<[sched.Folded, sched.ReadAfterFold]>;
1978   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1979              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1980              !strconcat(OpcodeStr,
1981              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1982              []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1983              Sched<[sched.Folded, sched.ReadAfterFold]>;
1984   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1985              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1986              !strconcat(OpcodeStr,
1987              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1988              []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1989              Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1990   }
1991   }
1992 }
1993 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1994                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1995   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1996   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1997       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1998        !strconcat(OpcodeStr,
1999             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2000             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2001       EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2002       Sched<[sched.Folded, sched.ReadAfterFold]>;
2003
2004   def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2005       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2006        !strconcat(OpcodeStr,
2007             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2008             "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2009       EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2010       Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2011
2012   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2013       (ins _.RC:$src1, _.ScalarMemOp:$src2),
2014        !strconcat(OpcodeStr,
2015             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2016             "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2017       EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2018       Sched<[sched.Folded, sched.ReadAfterFold]>;
2019   }
2020 }
2021
2022 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2023                         AVX512VLVectorVTInfo VTInfo> {
2024   defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025            WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2026                                  EVEX_V512;
2027
2028   let Predicates = [HasVLX] in {
2029     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2030                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2031                                       EVEX_V256;
2032     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2033                 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2034                                       EVEX_V128;
2035   }
2036 }
2037
2038 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2039                         AVX512VLVectorVTInfo VTInfo> {
2040   let Predicates = [HasBWI] in
2041     defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2042                                EVEX_V512;
2043
2044   let Predicates = [HasBWI, HasVLX] in {
2045     defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2046                                   EVEX_V256;
2047     defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2048                                   EVEX_V128;
2049   }
2050 }
2051
2052 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2053                               avx512vl_f32_info>;
2054 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2055                               avx512vl_f64_info>, VEX_W;
2056 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2057                               avx512vl_i32_info>;
2058 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2059                               avx512vl_i64_info>, VEX_W;
2060 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2061                               avx512vl_i8_info>;
2062 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2063                               avx512vl_i16_info>, VEX_W;
2064
2065 //===----------------------------------------------------------------------===//
2066 // Compare Instructions
2067 //===----------------------------------------------------------------------===//
2068
2069 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2070
2071 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2072                              PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2073                              X86FoldableSchedWrite sched> {
2074   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2075                       (outs _.KRC:$dst),
2076                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2077                       "vcmp"#_.Suffix,
2078                       "$cc, $src2, $src1", "$src1, $src2, $cc",
2079                       (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2080                       (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2081                                  timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2082   let mayLoad = 1 in
2083   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2084                     (outs _.KRC:$dst),
2085                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2086                     "vcmp"#_.Suffix,
2087                     "$cc, $src2, $src1", "$src1, $src2, $cc",
2088                     (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2089                         timm:$cc),
2090                     (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2091                         timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2092                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2093
2094   let Uses = [MXCSR] in
2095   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2096                      (outs _.KRC:$dst),
2097                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2098                      "vcmp"#_.Suffix,
2099                      "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2100                      (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2101                                 timm:$cc),
2102                      (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2103                                    timm:$cc)>,
2104                      EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2105
2106   let isCodeGenOnly = 1 in {
2107     let isCommutable = 1 in
2108     def rr : AVX512Ii8<0xC2, MRMSrcReg,
2109                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2110                 !strconcat("vcmp", _.Suffix,
2111                            "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2112                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2113                                           _.FRC:$src2,
2114                                           timm:$cc))]>,
2115                 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2116     def rm : AVX512Ii8<0xC2, MRMSrcMem,
2117               (outs _.KRC:$dst),
2118               (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2119               !strconcat("vcmp", _.Suffix,
2120                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2121               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2122                                         (_.ScalarLdFrag addr:$src2),
2123                                         timm:$cc))]>,
2124               EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2125               Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2126   }
2127 }
2128
2129 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2130                           (X86cmpms node:$src1, node:$src2, node:$cc), [{
2131   return N->hasOneUse();
2132 }]>;
2133 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2134                           (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2135   return N->hasOneUse();
2136 }]>;
2137
2138 let Predicates = [HasAVX512] in {
2139   let ExeDomain = SSEPackedSingle in
2140   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2141                                    X86cmpms_su, X86cmpmsSAE_su,
2142                                    SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2143   let ExeDomain = SSEPackedDouble in
2144   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2145                                    X86cmpms_su, X86cmpmsSAE_su,
2146                                    SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2147 }
2148
2149 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2150                               X86FoldableSchedWrite sched,
2151                               X86VectorVTInfo _, bit IsCommutable> {
2152   let isCommutable = IsCommutable, hasSideEffects = 0 in
2153   def rr : AVX512BI<opc, MRMSrcReg,
2154              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2155              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2156              []>, EVEX_4V, Sched<[sched]>;
2157   let mayLoad = 1, hasSideEffects = 0 in
2158   def rm : AVX512BI<opc, MRMSrcMem,
2159              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2160              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2161              []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2162   let isCommutable = IsCommutable, hasSideEffects = 0 in
2163   def rrk : AVX512BI<opc, MRMSrcReg,
2164               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2165               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2166                           "$dst {${mask}}, $src1, $src2}"),
2167               []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2168   let mayLoad = 1, hasSideEffects = 0 in
2169   def rmk : AVX512BI<opc, MRMSrcMem,
2170               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2171               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2172                           "$dst {${mask}}, $src1, $src2}"),
2173               []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2174 }
2175
2176 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2177                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
2178                                   bit IsCommutable> :
2179            avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2180   let mayLoad = 1, hasSideEffects = 0 in {
2181   def rmb : AVX512BI<opc, MRMSrcMem,
2182               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2183               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2184                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2185               []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2186   def rmbk : AVX512BI<opc, MRMSrcMem,
2187                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2188                                        _.ScalarMemOp:$src2),
2189                !strconcat(OpcodeStr,
2190                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2191                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2192                []>, EVEX_4V, EVEX_K, EVEX_B,
2193                Sched<[sched.Folded, sched.ReadAfterFold]>;
2194   }
2195 }
2196
2197 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2198                                  X86SchedWriteWidths sched,
2199                                  AVX512VLVectorVTInfo VTInfo, Predicate prd,
2200                                  bit IsCommutable = 0> {
2201   let Predicates = [prd] in
2202   defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2203                               VTInfo.info512, IsCommutable>, EVEX_V512;
2204
2205   let Predicates = [prd, HasVLX] in {
2206     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2207                                    VTInfo.info256, IsCommutable>, EVEX_V256;
2208     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2209                                    VTInfo.info128, IsCommutable>, EVEX_V128;
2210   }
2211 }
2212
2213 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2214                                      X86SchedWriteWidths sched,
2215                                      AVX512VLVectorVTInfo VTInfo,
2216                                      Predicate prd, bit IsCommutable = 0> {
2217   let Predicates = [prd] in
2218   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2219                                   VTInfo.info512, IsCommutable>, EVEX_V512;
2220
2221   let Predicates = [prd, HasVLX] in {
2222     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2223                                        VTInfo.info256, IsCommutable>, EVEX_V256;
2224     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2225                                        VTInfo.info128, IsCommutable>, EVEX_V128;
2226   }
2227 }
2228
2229 // This fragment treats X86cmpm as commutable to help match loads in both
2230 // operands for PCMPEQ.
2231 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2232 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2233                          (setcc node:$src1, node:$src2, SETGT)>;
2234
2235 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2236 // increase the pattern complexity the way an immediate would.
2237 let AddedComplexity = 2 in {
2238 // FIXME: Is there a better scheduler class for VPCMP?
2239 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2240                       SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2241                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2242
2243 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2244                       SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2245                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2246
2247 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2248                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2249                 EVEX_CD8<32, CD8VF>;
2250
2251 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2252                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2253                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2254
2255 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2256                       SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2257                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2258
2259 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2260                       SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2261                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2262
2263 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2264                       SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2265                 EVEX_CD8<32, CD8VF>;
2266
2267 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2268                       SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2269                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2270 }
2271
2272 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2273                           PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2274                           X86FoldableSchedWrite sched,
2275                           X86VectorVTInfo _, string Name> {
2276   let isCommutable = 1 in
2277   def rri : AVX512AIi8<opc, MRMSrcReg,
2278              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2279              !strconcat("vpcmp", Suffix,
2280                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2281              [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2282                                                 (_.VT _.RC:$src2),
2283                                                 cond)))]>,
2284              EVEX_4V, Sched<[sched]>;
2285   def rmi : AVX512AIi8<opc, MRMSrcMem,
2286              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2287              !strconcat("vpcmp", Suffix,
2288                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2289              [(set _.KRC:$dst, (_.KVT
2290                                 (Frag:$cc
2291                                  (_.VT _.RC:$src1),
2292                                  (_.VT (_.LdFrag addr:$src2)),
2293                                  cond)))]>,
2294              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2295   let isCommutable = 1 in
2296   def rrik : AVX512AIi8<opc, MRMSrcReg,
2297               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2298                                       u8imm:$cc),
2299               !strconcat("vpcmp", Suffix,
2300                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2301                          "$dst {${mask}}, $src1, $src2, $cc}"),
2302               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2303                                      (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2304                                                          (_.VT _.RC:$src2),
2305                                                          cond))))]>,
2306               EVEX_4V, EVEX_K, Sched<[sched]>;
2307   def rmik : AVX512AIi8<opc, MRMSrcMem,
2308               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2309                                     u8imm:$cc),
2310               !strconcat("vpcmp", Suffix,
2311                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2312                          "$dst {${mask}}, $src1, $src2, $cc}"),
2313               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2314                                      (_.KVT
2315                                       (Frag_su:$cc
2316                                        (_.VT _.RC:$src1),
2317                                        (_.VT (_.LdFrag addr:$src2)),
2318                                        cond))))]>,
2319               EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2320
2321   def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2322                                  (_.VT _.RC:$src1), cond)),
2323             (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2324              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2325
2326   def : Pat<(and _.KRCWM:$mask,
2327                  (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2328                                       (_.VT _.RC:$src1), cond))),
2329             (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2330              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2331              (CommFrag.OperandTransform $cc))>;
2332 }
2333
2334 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2335                               PatFrag Frag_su, PatFrag CommFrag,
2336                               PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2337                               X86VectorVTInfo _, string Name> :
2338            avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2339                           sched, _, Name> {
2340   def rmib : AVX512AIi8<opc, MRMSrcMem,
2341              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2342                                      u8imm:$cc),
2343              !strconcat("vpcmp", Suffix,
2344                         "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2345                         "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2346              [(set _.KRC:$dst, (_.KVT (Frag:$cc
2347                                        (_.VT _.RC:$src1),
2348                                        (_.BroadcastLdFrag addr:$src2),
2349                                        cond)))]>,
2350              EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2351   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2352               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2353                                        _.ScalarMemOp:$src2, u8imm:$cc),
2354               !strconcat("vpcmp", Suffix,
2355                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2356                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2357               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2358                                      (_.KVT (Frag_su:$cc
2359                                              (_.VT _.RC:$src1),
2360                                              (_.BroadcastLdFrag addr:$src2),
2361                                              cond))))]>,
2362               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2363
2364   def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2365                     (_.VT _.RC:$src1), cond)),
2366             (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2367              _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2368
2369   def : Pat<(and _.KRCWM:$mask,
2370                  (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2371                                       (_.VT _.RC:$src1), cond))),
2372             (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2373              _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2374              (CommFrag_su.OperandTransform $cc))>;
2375 }
2376
2377 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2378                              PatFrag Frag_su, PatFrag CommFrag,
2379                              PatFrag CommFrag_su, X86SchedWriteWidths sched,
2380                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2381   let Predicates = [prd] in
2382   defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2383                           sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2384
2385   let Predicates = [prd, HasVLX] in {
2386     defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2387                                sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2388     defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2389                                sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2390   }
2391 }
2392
2393 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2394                                  PatFrag Frag_su, PatFrag CommFrag,
2395                                  PatFrag CommFrag_su, X86SchedWriteWidths sched,
2396                                  AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2397   let Predicates = [prd] in
2398   defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2399                               sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2400
2401   let Predicates = [prd, HasVLX] in {
2402     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2403                                    sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2404     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2405                                    sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2406   }
2407 }
2408
2409 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2410   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2411   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2412   return getI8Imm(SSECC, SDLoc(N));
2413 }]>;
2414
2415 // Swapped operand version of the above.
2416 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2417   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2418   uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2419   SSECC = X86::getSwappedVPCMPImm(SSECC);
2420   return getI8Imm(SSECC, SDLoc(N));
2421 }]>;
2422
2423 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2424                        (setcc node:$src1, node:$src2, node:$cc), [{
2425   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2426   return !ISD::isUnsignedIntSetCC(CC);
2427 }], X86pcmpm_imm>;
2428
2429 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2430                           (setcc node:$src1, node:$src2, node:$cc), [{
2431   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2432   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2433 }], X86pcmpm_imm>;
2434
2435 // Same as above, but commutes immediate. Use for load folding.
2436 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2437                                (setcc node:$src1, node:$src2, node:$cc), [{
2438   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2439   return !ISD::isUnsignedIntSetCC(CC);
2440 }], X86pcmpm_imm_commute>;
2441
2442 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2443                                   (setcc node:$src1, node:$src2, node:$cc), [{
2444   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2445   return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2446 }], X86pcmpm_imm_commute>;
2447
2448 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2449                         (setcc node:$src1, node:$src2, node:$cc), [{
2450   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2451   return ISD::isUnsignedIntSetCC(CC);
2452 }], X86pcmpm_imm>;
2453
2454 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2455                            (setcc node:$src1, node:$src2, node:$cc), [{
2456   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2457   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2458 }], X86pcmpm_imm>;
2459
2460 // Same as above, but commutes immediate. Use for load folding.
2461 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2462                                 (setcc node:$src1, node:$src2, node:$cc), [{
2463   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2464   return ISD::isUnsignedIntSetCC(CC);
2465 }], X86pcmpm_imm_commute>;
2466
2467 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2468                                    (setcc node:$src1, node:$src2, node:$cc), [{
2469   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2470   return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2471 }], X86pcmpm_imm_commute>;
2472
2473 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2474 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2475                                 X86pcmpm_commute, X86pcmpm_commute_su,
2476                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2477                                 EVEX_CD8<8, CD8VF>;
2478 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2479                                  X86pcmpum_commute, X86pcmpum_commute_su,
2480                                  SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2481                                  EVEX_CD8<8, CD8VF>;
2482
2483 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2484                                 X86pcmpm_commute, X86pcmpm_commute_su,
2485                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2486                                 VEX_W, EVEX_CD8<16, CD8VF>;
2487 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2488                                  X86pcmpum_commute, X86pcmpum_commute_su,
2489                                  SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2490                                  VEX_W, EVEX_CD8<16, CD8VF>;
2491
2492 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2493                                     X86pcmpm_commute, X86pcmpm_commute_su,
2494                                     SchedWriteVecALU, avx512vl_i32_info,
2495                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2496 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2497                                      X86pcmpum_commute, X86pcmpum_commute_su,
2498                                      SchedWriteVecALU, avx512vl_i32_info,
2499                                      HasAVX512>, EVEX_CD8<32, CD8VF>;
2500
2501 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2502                                     X86pcmpm_commute, X86pcmpm_commute_su,
2503                                     SchedWriteVecALU, avx512vl_i64_info,
2504                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2505 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2506                                      X86pcmpum_commute, X86pcmpum_commute_su,
2507                                      SchedWriteVecALU, avx512vl_i64_info,
2508                                      HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2509
2510 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2511                          (X86cmpm node:$src1, node:$src2, node:$cc), [{
2512   return N->hasOneUse();
2513 }]>;
2514 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2515                             (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2516   return N->hasOneUse();
2517 }]>;
2518
2519 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2520   uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2521   return getI8Imm(Imm, SDLoc(N));
2522 }]>;
2523
2524 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2525                               string Name> {
2526 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2527   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2528                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2529                    "vcmp"#_.Suffix,
2530                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2531                    (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2532                    (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2533                    1>, Sched<[sched]>;
2534
2535   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2536                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2537                 "vcmp"#_.Suffix,
2538                 "$cc, $src2, $src1", "$src1, $src2, $cc",
2539                 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2540                              timm:$cc),
2541                 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2542                             timm:$cc)>,
2543                 Sched<[sched.Folded, sched.ReadAfterFold]>;
2544
2545   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2546                 (outs _.KRC:$dst),
2547                 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2548                 "vcmp"#_.Suffix,
2549                 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2550                 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2551                 (X86any_cmpm (_.VT _.RC:$src1),
2552                              (_.VT (_.BroadcastLdFrag addr:$src2)),
2553                              timm:$cc),
2554                 (X86cmpm_su (_.VT _.RC:$src1),
2555                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2556                             timm:$cc)>,
2557                 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2558   }
2559
2560   // Patterns for selecting with loads in other operand.
2561   def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2562                          timm:$cc),
2563             (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2564                                                       (X86cmpm_imm_commute timm:$cc))>;
2565
2566   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2567                                             (_.VT _.RC:$src1),
2568                                             timm:$cc)),
2569             (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2570                                                        _.RC:$src1, addr:$src2,
2571                                                        (X86cmpm_imm_commute timm:$cc))>;
2572
2573   def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2574                          (_.VT _.RC:$src1), timm:$cc),
2575             (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2576                                                        (X86cmpm_imm_commute timm:$cc))>;
2577
2578   def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2579                                             (_.VT _.RC:$src1),
2580                                             timm:$cc)),
2581             (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2582                                                         _.RC:$src1, addr:$src2,
2583                                                         (X86cmpm_imm_commute timm:$cc))>;
2584 }
2585
2586 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2587   // comparison code form (VCMP[EQ/LT/LE/...]
2588   let Uses = [MXCSR] in
2589   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2590                      (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2591                      "vcmp"#_.Suffix,
2592                      "$cc, {sae}, $src2, $src1",
2593                      "$src1, $src2, {sae}, $cc",
2594                      (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2595                      (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2596                                     timm:$cc)>,
2597                      EVEX_B, Sched<[sched]>;
2598 }
2599
2600 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2601   let Predicates = [HasAVX512] in {
2602     defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2603                 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2604
2605   }
2606   let Predicates = [HasAVX512,HasVLX] in {
2607    defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2608    defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2609   }
2610 }
2611
2612 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2613                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2614 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2615                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2616
2617 // Patterns to select fp compares with load as first operand.
2618 let Predicates = [HasAVX512] in {
2619   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2620                             timm:$cc)),
2621             (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2622
2623   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2624                             timm:$cc)),
2625             (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2626 }
2627
2628 // ----------------------------------------------------------------
2629 // FPClass
2630
2631 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2632                               (X86Vfpclasss node:$src1, node:$src2), [{
2633   return N->hasOneUse();
2634 }]>;
2635
2636 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2637                              (X86Vfpclass node:$src1, node:$src2), [{
2638   return N->hasOneUse();
2639 }]>;
2640
2641 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2642 //                                    op(mem_scalar,imm)
2643 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2644                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2645                                  Predicate prd> {
2646   let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2647       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2648                       (ins _.RC:$src1, i32u8imm:$src2),
2649                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2650                       [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2651                               (i32 timm:$src2)))]>,
2652                       Sched<[sched]>;
2653       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2654                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2655                       OpcodeStr##_.Suffix#
2656                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2657                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2658                                       (X86Vfpclasss_su (_.VT _.RC:$src1),
2659                                       (i32 timm:$src2))))]>,
2660                       EVEX_K, Sched<[sched]>;
2661     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2662                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2663                     OpcodeStr##_.Suffix##
2664                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2665                     [(set _.KRC:$dst,
2666                           (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2667                                        (i32 timm:$src2)))]>,
2668                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2669     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2670                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2671                     OpcodeStr##_.Suffix##
2672                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2673                     [(set _.KRC:$dst,(and _.KRCWM:$mask,
2674                         (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2675                             (i32 timm:$src2))))]>,
2676                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2677   }
2678 }
2679
2680 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2681 //                                  fpclass(reg_vec, mem_vec, imm)
2682 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2683 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2684                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2685                                  string mem>{
2686   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2687   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2688                       (ins _.RC:$src1, i32u8imm:$src2),
2689                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2690                       [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2691                                        (i32 timm:$src2)))]>,
2692                       Sched<[sched]>;
2693   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2694                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2695                       OpcodeStr##_.Suffix#
2696                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2697                       [(set _.KRC:$dst,(and _.KRCWM:$mask,
2698                                        (X86Vfpclass_su (_.VT _.RC:$src1),
2699                                        (i32 timm:$src2))))]>,
2700                       EVEX_K, Sched<[sched]>;
2701   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2702                     (ins _.MemOp:$src1, i32u8imm:$src2),
2703                     OpcodeStr##_.Suffix#"{"#mem#"}"#
2704                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2705                     [(set _.KRC:$dst,(X86Vfpclass
2706                                      (_.VT (_.LdFrag addr:$src1)),
2707                                      (i32 timm:$src2)))]>,
2708                     Sched<[sched.Folded, sched.ReadAfterFold]>;
2709   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2710                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2711                     OpcodeStr##_.Suffix#"{"#mem#"}"#
2712                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2713                     [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2714                                   (_.VT (_.LdFrag addr:$src1)),
2715                                   (i32 timm:$src2))))]>,
2716                     EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2717   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2718                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2719                     OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2720                                       _.BroadcastStr##", $dst|$dst, ${src1}"
2721                                                   ##_.BroadcastStr##", $src2}",
2722                     [(set _.KRC:$dst,(X86Vfpclass
2723                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2724                                      (i32 timm:$src2)))]>,
2725                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2726   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2727                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2728                     OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2729                           _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2730                                                    _.BroadcastStr##", $src2}",
2731                     [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2732                                      (_.VT (_.BroadcastLdFrag addr:$src1)),
2733                                      (i32 timm:$src2))))]>,
2734                     EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2735   }
2736
2737   // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2738   // the memory form.
2739   def : InstAlias<OpcodeStr#_.Suffix#mem#
2740                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2741                   (!cast<Instruction>(NAME#"rr")
2742                    _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2743   def : InstAlias<OpcodeStr#_.Suffix#mem#
2744                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2745                   (!cast<Instruction>(NAME#"rrk")
2746                    _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2747   def : InstAlias<OpcodeStr#_.Suffix#mem#
2748                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2749                   _.BroadcastStr#", $src2}",
2750                   (!cast<Instruction>(NAME#"rmb")
2751                    _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2752   def : InstAlias<OpcodeStr#_.Suffix#mem#
2753                   "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2754                   "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2755                   (!cast<Instruction>(NAME#"rmbk")
2756                    _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2757 }
2758
2759 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2760                                      bits<8> opc, X86SchedWriteWidths sched,
2761                                      Predicate prd>{
2762   let Predicates = [prd] in {
2763     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2764                                       _.info512, "z">, EVEX_V512;
2765   }
2766   let Predicates = [prd, HasVLX] in {
2767     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2768                                       _.info128, "x">, EVEX_V128;
2769     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2770                                       _.info256, "y">, EVEX_V256;
2771   }
2772 }
2773
2774 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2775                                  bits<8> opcScalar, X86SchedWriteWidths sched,
2776                                  Predicate prd> {
2777   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2778                                       sched, prd>,
2779                                       EVEX_CD8<32, CD8VF>;
2780   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2781                                       sched, prd>,
2782                                       EVEX_CD8<64, CD8VF> , VEX_W;
2783   defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2784                                    sched.Scl, f32x_info, prd>, VEX_LIG,
2785                                    EVEX_CD8<32, CD8VT1>;
2786   defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2787                                    sched.Scl, f64x_info, prd>, VEX_LIG,
2788                                    EVEX_CD8<64, CD8VT1>, VEX_W;
2789 }
2790
2791 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2792                                       HasDQI>, AVX512AIi8Base, EVEX;
2793
2794 //-----------------------------------------------------------------
2795 // Mask register copy, including
2796 // - copy between mask registers
2797 // - load/store mask registers
2798 // - copy from GPR to mask register and vice versa
2799 //
2800 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2801                          string OpcodeStr, RegisterClass KRC,
2802                          ValueType vvt, X86MemOperand x86memop> {
2803   let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2804   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2805              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2806              Sched<[WriteMove]>;
2807   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2808              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2809              [(set KRC:$dst, (vvt (load addr:$src)))]>,
2810              Sched<[WriteLoad]>;
2811   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2812              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2813              [(store KRC:$src, addr:$dst)]>,
2814              Sched<[WriteStore]>;
2815 }
2816
2817 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2818                              string OpcodeStr,
2819                              RegisterClass KRC, RegisterClass GRC> {
2820   let hasSideEffects = 0 in {
2821     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2822                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2823                Sched<[WriteMove]>;
2824     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2825                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2826                Sched<[WriteMove]>;
2827   }
2828 }
2829
2830 let Predicates = [HasDQI] in
2831   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2832                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2833                VEX, PD;
2834
2835 let Predicates = [HasAVX512] in
2836   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2837                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2838                VEX, PS;
2839
2840 let Predicates = [HasBWI] in {
2841   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2842                VEX, PD, VEX_W;
2843   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2844                VEX, XD;
2845   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2846                VEX, PS, VEX_W;
2847   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2848                VEX, XD, VEX_W;
2849 }
2850
2851 // GR from/to mask register
2852 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2853           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2854 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2855           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2856
2857 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2858           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2859 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2860           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2861
2862 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2863           (KMOVWrk VK16:$src)>;
2864 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2865           (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2866 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2867           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2868 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2869           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2870
2871 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2872           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2873 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2874           (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2875 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2876           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2877 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2878           (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2879
2880 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2881           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2882 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2883           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2884 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2885           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2886 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2887           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2888
2889 // Load/store kreg
2890 let Predicates = [HasDQI] in {
2891   def : Pat<(store VK1:$src, addr:$dst),
2892             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2893
2894   def : Pat<(v1i1 (load addr:$src)),
2895             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2896   def : Pat<(v2i1 (load addr:$src)),
2897             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2898   def : Pat<(v4i1 (load addr:$src)),
2899             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2900 }
2901
2902 let Predicates = [HasAVX512] in {
2903   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2904             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2905   def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2906             (KMOVWkm addr:$src)>;
2907 }
2908
2909 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2910                          SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2911                                               SDTCVecEltisVT<1, i1>,
2912                                               SDTCisPtrTy<2>]>>;
2913
2914 let Predicates = [HasAVX512] in {
2915   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2916     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2917               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2918
2919     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2920               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2921
2922     def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2923               (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2924
2925     def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2926               (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2927   }
2928
2929   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2930   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2931   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2932   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2933   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2934   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2935   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2936
2937   def : Pat<(insert_subvector (v16i1 immAllZerosV),
2938                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2939             (COPY_TO_REGCLASS
2940              (KMOVWkr (AND32ri8
2941                        (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2942                        (i32 1))), VK16)>;
2943 }
2944
2945 // Mask unary operation
2946 // - KNOT
2947 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2948                             RegisterClass KRC, SDPatternOperator OpNode,
2949                             X86FoldableSchedWrite sched, Predicate prd> {
2950   let Predicates = [prd] in
2951     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2952                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2953                [(set KRC:$dst, (OpNode KRC:$src))]>,
2954                Sched<[sched]>;
2955 }
2956
2957 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2958                                 SDPatternOperator OpNode,
2959                                 X86FoldableSchedWrite sched> {
2960   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2961                             sched, HasDQI>, VEX, PD;
2962   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2963                             sched, HasAVX512>, VEX, PS;
2964   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2965                             sched, HasBWI>, VEX, PD, VEX_W;
2966   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2967                             sched, HasBWI>, VEX, PS, VEX_W;
2968 }
2969
2970 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2971 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2972
2973 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2974 let Predicates = [HasAVX512, NoDQI] in
2975 def : Pat<(vnot VK8:$src),
2976           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2977
2978 def : Pat<(vnot VK4:$src),
2979           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2980 def : Pat<(vnot VK2:$src),
2981           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2982
2983 // Mask binary operation
2984 // - KAND, KANDN, KOR, KXNOR, KXOR
2985 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2986                            RegisterClass KRC, SDPatternOperator OpNode,
2987                            X86FoldableSchedWrite sched, Predicate prd,
2988                            bit IsCommutable> {
2989   let Predicates = [prd], isCommutable = IsCommutable in
2990     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2991                !strconcat(OpcodeStr,
2992                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2993                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2994                Sched<[sched]>;
2995 }
2996
2997 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2998                                  SDPatternOperator OpNode,
2999                                  X86FoldableSchedWrite sched, bit IsCommutable,
3000                                  Predicate prdW = HasAVX512> {
3001   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3002                              sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3003   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3004                              sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3005   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3006                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3007   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3008                              sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3009 }
3010
3011 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3012 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3013 // These nodes use 'vnot' instead of 'not' to support vectors.
3014 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3015 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3016
3017 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3018 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3019 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3020 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3021 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3022 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3023 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3024
3025 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3026                             Instruction Inst> {
3027   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3028   // for the DQI set, this type is legal and KxxxB instruction is used
3029   let Predicates = [NoDQI] in
3030   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3031             (COPY_TO_REGCLASS
3032               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3033                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3034
3035   // All types smaller than 8 bits require conversion anyway
3036   def : Pat<(OpNode VK1:$src1, VK1:$src2),
3037         (COPY_TO_REGCLASS (Inst
3038                            (COPY_TO_REGCLASS VK1:$src1, VK16),
3039                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3040   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3041         (COPY_TO_REGCLASS (Inst
3042                            (COPY_TO_REGCLASS VK2:$src1, VK16),
3043                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3044   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3045         (COPY_TO_REGCLASS (Inst
3046                            (COPY_TO_REGCLASS VK4:$src1, VK16),
3047                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3048 }
3049
3050 defm : avx512_binop_pat<and,   and,  KANDWrr>;
3051 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3052 defm : avx512_binop_pat<or,    or,   KORWrr>;
3053 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3054 defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
3055
3056 // Mask unpacking
3057 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3058                              X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3059                              Predicate prd> {
3060   let Predicates = [prd] in {
3061     let hasSideEffects = 0 in
3062     def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3063                (ins Src.KRC:$src1, Src.KRC:$src2),
3064                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3065                VEX_4V, VEX_L, Sched<[sched]>;
3066
3067     def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3068               (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
3069   }
3070 }
3071
3072 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3073 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3074 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3075
3076 // Mask bit testing
3077 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3078                               SDNode OpNode, X86FoldableSchedWrite sched,
3079                               Predicate prd> {
3080   let Predicates = [prd], Defs = [EFLAGS] in
3081     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3082                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3083                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3084                Sched<[sched]>;
3085 }
3086
3087 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3088                                 X86FoldableSchedWrite sched,
3089                                 Predicate prdW = HasAVX512> {
3090   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3091                                                                 VEX, PD;
3092   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3093                                                                 VEX, PS;
3094   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3095                                                                 VEX, PS, VEX_W;
3096   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3097                                                                 VEX, PD, VEX_W;
3098 }
3099
3100 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3101 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3102 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3103
3104 // Mask shift
3105 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3106                                SDNode OpNode, X86FoldableSchedWrite sched> {
3107   let Predicates = [HasAVX512] in
3108     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3109                  !strconcat(OpcodeStr,
3110                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3111                             [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3112                  Sched<[sched]>;
3113 }
3114
3115 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3116                                  SDNode OpNode, X86FoldableSchedWrite sched> {
3117   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3118                                sched>, VEX, TAPD, VEX_W;
3119   let Predicates = [HasDQI] in
3120   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3121                                sched>, VEX, TAPD;
3122   let Predicates = [HasBWI] in {
3123   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3124                                sched>, VEX, TAPD, VEX_W;
3125   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3126                                sched>, VEX, TAPD;
3127   }
3128 }
3129
3130 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3131 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3132
3133 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3134 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3135                                                  string InstStr,
3136                                                  X86VectorVTInfo Narrow,
3137                                                  X86VectorVTInfo Wide> {
3138 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3139                                 (Narrow.VT Narrow.RC:$src2), cond)),
3140           (COPY_TO_REGCLASS
3141            (!cast<Instruction>(InstStr#"Zrri")
3142             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3143             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3144             (Frag.OperandTransform $cc)), Narrow.KRC)>;
3145
3146 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3147                            (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3148                                                     (Narrow.VT Narrow.RC:$src2),
3149                                                     cond)))),
3150           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3151            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3152            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3153            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3154            (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3155 }
3156
3157 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3158                                                      PatFrag CommFrag, PatFrag CommFrag_su,
3159                                                      string InstStr,
3160                                                      X86VectorVTInfo Narrow,
3161                                                      X86VectorVTInfo Wide> {
3162 // Broadcast load.
3163 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3164                                 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3165           (COPY_TO_REGCLASS
3166            (!cast<Instruction>(InstStr#"Zrmib")
3167             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3168             addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3169
3170 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3171                            (Narrow.KVT
3172                             (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3173                                          (Narrow.BroadcastLdFrag addr:$src2),
3174                                          cond)))),
3175           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3176            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3177            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3178            addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3179
3180 // Commuted with broadcast load.
3181 def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3182                                     (Narrow.VT Narrow.RC:$src1),
3183                                     cond)),
3184           (COPY_TO_REGCLASS
3185            (!cast<Instruction>(InstStr#"Zrmib")
3186             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3187             addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3188
3189 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3190                            (Narrow.KVT
3191                             (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3192                                              (Narrow.VT Narrow.RC:$src1), 
3193                                              cond)))),
3194           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3195            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3196            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3197            addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3198 }
3199
3200 // Same as above, but for fp types which don't use PatFrags.
3201 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3202                                                 X86VectorVTInfo Narrow,
3203                                                 X86VectorVTInfo Wide> {
3204 def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
3205                                    (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3206           (COPY_TO_REGCLASS
3207            (!cast<Instruction>(InstStr#"Zrri")
3208             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3209             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3210             timm:$cc), Narrow.KRC)>;
3211
3212 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3213                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3214                                        (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3215           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3216            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3217            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3218            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3219            timm:$cc), Narrow.KRC)>;
3220
3221 // Broadcast load.
3222 def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
3223                                    (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3224           (COPY_TO_REGCLASS
3225            (!cast<Instruction>(InstStr#"Zrmbi")
3226             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3227             addr:$src2, timm:$cc), Narrow.KRC)>;
3228
3229 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3230                            (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3231                                        (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3232           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3233            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3234            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3235            addr:$src2, timm:$cc), Narrow.KRC)>;
3236
3237 // Commuted with broadcast load.
3238 def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3239                                    (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3240           (COPY_TO_REGCLASS
3241            (!cast<Instruction>(InstStr#"Zrmbi")
3242             (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3243             addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3244
3245 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3246                            (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3247                                        (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3248           (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3249            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3250            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3251            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3252 }
3253
3254 let Predicates = [HasAVX512, NoVLX] in {
3255   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3256   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3257
3258   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3259   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3260
3261   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3262   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3263
3264   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3265   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3266
3267   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3268   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3269
3270   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3271   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3272
3273   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3274   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3275
3276   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3277   defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3278
3279   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3280   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3281   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3282   defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3283 }
3284
3285 let Predicates = [HasBWI, NoVLX] in {
3286   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3287   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3288
3289   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3290   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3291
3292   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3293   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3294
3295   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3296   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3297 }
3298
3299 // Mask setting all 0s or 1s
3300 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3301   let Predicates = [HasAVX512] in
3302     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3303         SchedRW = [WriteZero] in
3304       def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3305                      [(set KRC:$dst, (VT Val))]>;
3306 }
3307
3308 multiclass avx512_mask_setop_w<PatFrag Val> {
3309   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3310   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3311   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3312 }
3313
3314 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3315 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3316
3317 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3318 let Predicates = [HasAVX512] in {
3319   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3320   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3321   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3322   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3323   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3324   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3325   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3326   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3327 }
3328
3329 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3330 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3331                                              RegisterClass RC, ValueType VT> {
3332   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3333             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3334
3335   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3336             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3337 }
3338 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3339 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3340 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3341 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3342 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3343 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3344
3345 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3346 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3347 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3348 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3349 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3350
3351 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3352 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3353 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3354 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3355
3356 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3357 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3358 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3359
3360 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3361 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3362
3363 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3364
3365 //===----------------------------------------------------------------------===//
3366 // AVX-512 - Aligned and unaligned load and store
3367 //
3368
3369 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3370                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3371                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3372                        bit NoRMPattern = 0,
3373                        SDPatternOperator SelectOprr = vselect> {
3374   let hasSideEffects = 0 in {
3375   let isMoveReg = 1 in
3376   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3377                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3378                     _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3379                     EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3380   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3381                       (ins _.KRCWM:$mask,  _.RC:$src),
3382                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3383                        "${dst} {${mask}} {z}, $src}"),
3384                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3385                                            (_.VT _.RC:$src),
3386                                            _.ImmAllZerosV)))], _.ExeDomain>,
3387                        EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3388
3389   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3390   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3391                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3392                     !if(NoRMPattern, [],
3393                         [(set _.RC:$dst,
3394                           (_.VT (ld_frag addr:$src)))]),
3395                     _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3396                     EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3397
3398   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3399     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3400                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3401                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3402                       "${dst} {${mask}}, $src1}"),
3403                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3404                                           (_.VT _.RC:$src1),
3405                                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3406                        EVEX, EVEX_K, Sched<[Sched.RR]>;
3407     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3408                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3409                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3410                       "${dst} {${mask}}, $src1}"),
3411                      [(set _.RC:$dst, (_.VT
3412                          (vselect _.KRCWM:$mask,
3413                           (_.VT (ld_frag addr:$src1)),
3414                            (_.VT _.RC:$src0))))], _.ExeDomain>,
3415                      EVEX, EVEX_K, Sched<[Sched.RM]>;
3416   }
3417   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3418                   (ins _.KRCWM:$mask, _.MemOp:$src),
3419                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3420                                 "${dst} {${mask}} {z}, $src}",
3421                   [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3422                     (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3423                   _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3424   }
3425   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3426             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3427
3428   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3429             (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3430
3431   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3432             (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3433              _.KRCWM:$mask, addr:$ptr)>;
3434 }
3435
3436 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3437                                  AVX512VLVectorVTInfo _, Predicate prd,
3438                                  X86SchedWriteMoveLSWidths Sched,
3439                                  string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3440   let Predicates = [prd] in
3441   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3442                        _.info512.AlignedLdFrag, masked_load_aligned,
3443                        Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3444
3445   let Predicates = [prd, HasVLX] in {
3446   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3447                           _.info256.AlignedLdFrag, masked_load_aligned,
3448                           Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3449   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3450                           _.info128.AlignedLdFrag, masked_load_aligned,
3451                           Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3452   }
3453 }
3454
3455 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3456                           AVX512VLVectorVTInfo _, Predicate prd,
3457                           X86SchedWriteMoveLSWidths Sched,
3458                           string EVEX2VEXOvrd, bit NoRMPattern = 0,
3459                           SDPatternOperator SelectOprr = vselect> {
3460   let Predicates = [prd] in
3461   defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3462                        masked_load, Sched.ZMM, "",
3463                        NoRMPattern, SelectOprr>, EVEX_V512;
3464
3465   let Predicates = [prd, HasVLX] in {
3466   defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3467                          masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3468                          NoRMPattern, SelectOprr>, EVEX_V256;
3469   defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3470                          masked_load, Sched.XMM, EVEX2VEXOvrd,
3471                          NoRMPattern, SelectOprr>, EVEX_V128;
3472   }
3473 }
3474
3475 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3476                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3477                         X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3478                         bit NoMRPattern = 0> {
3479   let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3480   let isMoveReg = 1 in
3481   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3482                          OpcodeStr # "\t{$src, $dst|$dst, $src}",
3483                          [], _.ExeDomain>, EVEX,
3484                          FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3485                          EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3486   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3487                          (ins _.KRCWM:$mask, _.RC:$src),
3488                          OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3489                          "${dst} {${mask}}, $src}",
3490                          [], _.ExeDomain>,  EVEX, EVEX_K,
3491                          FoldGenData<BaseName#_.ZSuffix#rrk>,
3492                          Sched<[Sched.RR]>;
3493   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3494                           (ins _.KRCWM:$mask, _.RC:$src),
3495                           OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3496                           "${dst} {${mask}} {z}, $src}",
3497                           [], _.ExeDomain>, EVEX, EVEX_KZ,
3498                           FoldGenData<BaseName#_.ZSuffix#rrkz>,
3499                           Sched<[Sched.RR]>;
3500   }
3501
3502   let hasSideEffects = 0, mayStore = 1 in
3503   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3504                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3505                     !if(NoMRPattern, [],
3506                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3507                     _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3508                     EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3509   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3510                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3511               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3512                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3513                NotMemoryFoldable;
3514
3515   def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3516            (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3517                                                         _.KRCWM:$mask, _.RC:$src)>;
3518
3519   def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3520                   (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3521                    _.RC:$dst, _.RC:$src), 0>;
3522   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3523                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3524                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3525   def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3526                   (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3527                    _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3528 }
3529
3530 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3531                             AVX512VLVectorVTInfo _, Predicate prd,
3532                             X86SchedWriteMoveLSWidths Sched,
3533                             string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3534   let Predicates = [prd] in
3535   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3536                         masked_store, Sched.ZMM, "",
3537                         NoMRPattern>, EVEX_V512;
3538   let Predicates = [prd, HasVLX] in {
3539     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3540                              masked_store, Sched.YMM,
3541                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3542     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3543                              masked_store, Sched.XMM, EVEX2VEXOvrd,
3544                              NoMRPattern>, EVEX_V128;
3545   }
3546 }
3547
3548 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3549                                   AVX512VLVectorVTInfo _, Predicate prd,
3550                                   X86SchedWriteMoveLSWidths Sched,
3551                                   string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3552   let Predicates = [prd] in
3553   defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3554                         masked_store_aligned, Sched.ZMM, "",
3555                         NoMRPattern>, EVEX_V512;
3556
3557   let Predicates = [prd, HasVLX] in {
3558     defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3559                              masked_store_aligned, Sched.YMM,
3560                              EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3561     defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3562                              masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3563                              NoMRPattern>, EVEX_V128;
3564   }
3565 }
3566
3567 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3568                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3569                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3570                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3571                PS, EVEX_CD8<32, CD8VF>;
3572
3573 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3574                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3575                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3576                                       HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3577                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3578
3579 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3580                               SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3581                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3582                                SchedWriteFMoveLS, "VMOVUPS">,
3583                                PS, EVEX_CD8<32, CD8VF>;
3584
3585 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3586                               SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3587                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3588                                SchedWriteFMoveLS, "VMOVUPD">,
3589                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3590
3591 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3592                                        HasAVX512, SchedWriteVecMoveLS,
3593                                        "VMOVDQA", 1>,
3594                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3595                                         HasAVX512, SchedWriteVecMoveLS,
3596                                         "VMOVDQA", 1>,
3597                  PD, EVEX_CD8<32, CD8VF>;
3598
3599 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3600                                        HasAVX512, SchedWriteVecMoveLS,
3601                                        "VMOVDQA">,
3602                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3603                                         HasAVX512, SchedWriteVecMoveLS,
3604                                         "VMOVDQA">,
3605                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
3606
3607 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3608                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3609                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3610                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3611                 XD, EVEX_CD8<8, CD8VF>;
3612
3613 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3614                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3615                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3616                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3617                  XD, VEX_W, EVEX_CD8<16, CD8VF>;
3618
3619 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3620                                 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3621                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3622                                  SchedWriteVecMoveLS, "VMOVDQU", 1>,
3623                  XS, EVEX_CD8<32, CD8VF>;
3624
3625 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3626                                 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3627                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3628                                  SchedWriteVecMoveLS, "VMOVDQU">,
3629                  XS, VEX_W, EVEX_CD8<64, CD8VF>;
3630
3631 // Special instructions to help with spilling when we don't have VLX. We need
3632 // to load or store from a ZMM register instead. These are converted in
3633 // expandPostRAPseudos.
3634 let isReMaterializable = 1, canFoldAsLoad = 1,
3635     isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3636 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3637                             "", []>, Sched<[WriteFLoadX]>;
3638 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3639                             "", []>, Sched<[WriteFLoadY]>;
3640 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3641                             "", []>, Sched<[WriteFLoadX]>;
3642 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3643                             "", []>, Sched<[WriteFLoadY]>;
3644 }
3645
3646 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3647 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3648                             "", []>, Sched<[WriteFStoreX]>;
3649 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3650                             "", []>, Sched<[WriteFStoreY]>;
3651 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3652                             "", []>, Sched<[WriteFStoreX]>;
3653 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3654                             "", []>, Sched<[WriteFStoreY]>;
3655 }
3656
3657 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3658                           (v8i64 VR512:$src))),
3659    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3660                                               VK8), VR512:$src)>;
3661
3662 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3663                            (v16i32 VR512:$src))),
3664                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3665
3666 // These patterns exist to prevent the above patterns from introducing a second
3667 // mask inversion when one already exists.
3668 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3669                           (v8i64 immAllZerosV),
3670                           (v8i64 VR512:$src))),
3671                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3672 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3673                            (v16i32 immAllZerosV),
3674                            (v16i32 VR512:$src))),
3675                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3676
3677 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3678                               X86VectorVTInfo Wide> {
3679  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3680                                Narrow.RC:$src1, Narrow.RC:$src0)),
3681            (EXTRACT_SUBREG
3682             (Wide.VT
3683              (!cast<Instruction>(InstrStr#"rrk")
3684               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3685               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3686               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3687             Narrow.SubRegIdx)>;
3688
3689  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3690                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3691            (EXTRACT_SUBREG
3692             (Wide.VT
3693              (!cast<Instruction>(InstrStr#"rrkz")
3694               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3695               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3696             Narrow.SubRegIdx)>;
3697 }
3698
3699 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3700 // available. Use a 512-bit operation and extract.
3701 let Predicates = [HasAVX512, NoVLX] in {
3702   defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3703   defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3704   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3705   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3706
3707   defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3708   defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3709   defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3710   defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3711 }
3712
3713 let Predicates = [HasBWI, NoVLX] in {
3714   defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3715   defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3716
3717   defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3718   defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3719 }
3720
3721 let Predicates = [HasAVX512] in {
3722   // 512-bit load.
3723   def : Pat<(alignedloadv16i32 addr:$src),
3724             (VMOVDQA64Zrm addr:$src)>;
3725   def : Pat<(alignedloadv32i16 addr:$src),
3726             (VMOVDQA64Zrm addr:$src)>;
3727   def : Pat<(alignedloadv64i8 addr:$src),
3728             (VMOVDQA64Zrm addr:$src)>;
3729   def : Pat<(loadv16i32 addr:$src),
3730             (VMOVDQU64Zrm addr:$src)>;
3731   def : Pat<(loadv32i16 addr:$src),
3732             (VMOVDQU64Zrm addr:$src)>;
3733   def : Pat<(loadv64i8 addr:$src),
3734             (VMOVDQU64Zrm addr:$src)>;
3735
3736   // 512-bit store.
3737   def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3738             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3739   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3740             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3741   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3742             (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3743   def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3744             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3745   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3746             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3747   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3748             (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3749 }
3750
3751 let Predicates = [HasVLX] in {
3752   // 128-bit load.
3753   def : Pat<(alignedloadv4i32 addr:$src),
3754             (VMOVDQA64Z128rm addr:$src)>;
3755   def : Pat<(alignedloadv8i16 addr:$src),
3756             (VMOVDQA64Z128rm addr:$src)>;
3757   def : Pat<(alignedloadv16i8 addr:$src),
3758             (VMOVDQA64Z128rm addr:$src)>;
3759   def : Pat<(loadv4i32 addr:$src),
3760             (VMOVDQU64Z128rm addr:$src)>;
3761   def : Pat<(loadv8i16 addr:$src),
3762             (VMOVDQU64Z128rm addr:$src)>;
3763   def : Pat<(loadv16i8 addr:$src),
3764             (VMOVDQU64Z128rm addr:$src)>;
3765
3766   // 128-bit store.
3767   def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3768             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3769   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3770             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3771   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3772             (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3773   def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3774             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3775   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3776             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3777   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3778             (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3779
3780   // 256-bit load.
3781   def : Pat<(alignedloadv8i32 addr:$src),
3782             (VMOVDQA64Z256rm addr:$src)>;
3783   def : Pat<(alignedloadv16i16 addr:$src),
3784             (VMOVDQA64Z256rm addr:$src)>;
3785   def : Pat<(alignedloadv32i8 addr:$src),
3786             (VMOVDQA64Z256rm addr:$src)>;
3787   def : Pat<(loadv8i32 addr:$src),
3788             (VMOVDQU64Z256rm addr:$src)>;
3789   def : Pat<(loadv16i16 addr:$src),
3790             (VMOVDQU64Z256rm addr:$src)>;
3791   def : Pat<(loadv32i8 addr:$src),
3792             (VMOVDQU64Z256rm addr:$src)>;
3793
3794   // 256-bit store.
3795   def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3796             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3797   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3798             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3799   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3800             (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3801   def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3802             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3803   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3804             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3805   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3806             (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3807 }
3808
3809 // Move Int Doubleword to Packed Double Int
3810 //
3811 let ExeDomain = SSEPackedInt in {
3812 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3813                       "vmovd\t{$src, $dst|$dst, $src}",
3814                       [(set VR128X:$dst,
3815                         (v4i32 (scalar_to_vector GR32:$src)))]>,
3816                         EVEX, Sched<[WriteVecMoveFromGpr]>;
3817 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3818                       "vmovd\t{$src, $dst|$dst, $src}",
3819                       [(set VR128X:$dst,
3820                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3821                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3822 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3823                       "vmovq\t{$src, $dst|$dst, $src}",
3824                         [(set VR128X:$dst,
3825                           (v2i64 (scalar_to_vector GR64:$src)))]>,
3826                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3827 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3828 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3829                       (ins i64mem:$src),
3830                       "vmovq\t{$src, $dst|$dst, $src}", []>,
3831                       EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3832 let isCodeGenOnly = 1 in {
3833 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3834                        "vmovq\t{$src, $dst|$dst, $src}",
3835                        [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3836                        EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3837 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3838                          "vmovq\t{$src, $dst|$dst, $src}",
3839                          [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3840                          EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3841 }
3842 } // ExeDomain = SSEPackedInt
3843
3844 // Move Int Doubleword to Single Scalar
3845 //
3846 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3847 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3848                       "vmovd\t{$src, $dst|$dst, $src}",
3849                       [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3850                       EVEX, Sched<[WriteVecMoveFromGpr]>;
3851 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3852
3853 // Move doubleword from xmm register to r/m32
3854 //
3855 let ExeDomain = SSEPackedInt in {
3856 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3857                        "vmovd\t{$src, $dst|$dst, $src}",
3858                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3859                                         (iPTR 0)))]>,
3860                        EVEX, Sched<[WriteVecMoveToGpr]>;
3861 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3862                        (ins i32mem:$dst, VR128X:$src),
3863                        "vmovd\t{$src, $dst|$dst, $src}",
3864                        [(store (i32 (extractelt (v4i32 VR128X:$src),
3865                                      (iPTR 0))), addr:$dst)]>,
3866                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3867 } // ExeDomain = SSEPackedInt
3868
3869 // Move quadword from xmm1 register to r/m64
3870 //
3871 let ExeDomain = SSEPackedInt in {
3872 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3873                       "vmovq\t{$src, $dst|$dst, $src}",
3874                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3875                                                    (iPTR 0)))]>,
3876                       PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3877                       Requires<[HasAVX512]>;
3878
3879 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3880 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3881                       "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3882                       EVEX, VEX_W, Sched<[WriteVecStore]>,
3883                       Requires<[HasAVX512, In64BitMode]>;
3884
3885 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3886                       (ins i64mem:$dst, VR128X:$src),
3887                       "vmovq\t{$src, $dst|$dst, $src}",
3888                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3889                               addr:$dst)]>,
3890                       EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3891                       Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3892
3893 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3894 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3895                              (ins VR128X:$src),
3896                              "vmovq\t{$src, $dst|$dst, $src}", []>,
3897                              EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3898 } // ExeDomain = SSEPackedInt
3899
3900 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3901                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3902
3903 let Predicates = [HasAVX512] in {
3904   def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3905             (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3906 }
3907
3908 // Move Scalar Single to Double Int
3909 //
3910 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3911 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3912                       (ins FR32X:$src),
3913                       "vmovd\t{$src, $dst|$dst, $src}",
3914                       [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3915                       EVEX, Sched<[WriteVecMoveToGpr]>;
3916 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3917
3918 // Move Quadword Int to Packed Quadword Int
3919 //
3920 let ExeDomain = SSEPackedInt in {
3921 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3922                       (ins i64mem:$src),
3923                       "vmovq\t{$src, $dst|$dst, $src}",
3924                       [(set VR128X:$dst,
3925                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3926                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3927 } // ExeDomain = SSEPackedInt
3928
3929 // Allow "vmovd" but print "vmovq".
3930 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3931                 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3932 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3933                 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3934
3935 // Conversions between masks and scalar fp.
3936 def : Pat<(v32i1 (bitconvert FR32X:$src)),
3937           (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3938 def : Pat<(f32 (bitconvert VK32:$src)),
3939           (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3940
3941 def : Pat<(v64i1 (bitconvert FR64X:$src)),
3942           (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3943 def : Pat<(f64 (bitconvert VK64:$src)),
3944           (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3945
3946 //===----------------------------------------------------------------------===//
3947 // AVX-512  MOVSS, MOVSD
3948 //===----------------------------------------------------------------------===//
3949
3950 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3951                               X86VectorVTInfo _> {
3952   let Predicates = [HasAVX512, OptForSize] in
3953   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3954              (ins _.RC:$src1, _.RC:$src2),
3955              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3956              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3957              _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3958   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3959               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3960               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3961               "$dst {${mask}} {z}, $src1, $src2}"),
3962               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3963                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3964                                       _.ImmAllZerosV)))],
3965               _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3966   let Constraints = "$src0 = $dst"  in
3967   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3968              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3969              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3970              "$dst {${mask}}, $src1, $src2}"),
3971              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3972                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3973                                      (_.VT _.RC:$src0))))],
3974              _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3975   let canFoldAsLoad = 1, isReMaterializable = 1 in {
3976   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3977              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3978              [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3979              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3980   // _alt version uses FR32/FR64 register class.
3981   let isCodeGenOnly = 1 in
3982   def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3983                  !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3984                  [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3985                  _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3986   }
3987   let mayLoad = 1, hasSideEffects = 0 in {
3988     let Constraints = "$src0 = $dst" in
3989     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3990                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3991                !strconcat(asm, "\t{$src, $dst {${mask}}|",
3992                "$dst {${mask}}, $src}"),
3993                [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3994     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3995                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3996                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3997                "$dst {${mask}} {z}, $src}"),
3998                [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3999   }
4000   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4001              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4002              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4003              EVEX, Sched<[WriteFStore]>;
4004   let mayStore = 1, hasSideEffects = 0 in
4005   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4006               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4007               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4008               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4009               NotMemoryFoldable;
4010 }
4011
4012 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4013                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4014
4015 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4016                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4017
4018
4019 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4020                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
4021
4022 def : Pat<(_.VT (OpNode _.RC:$src0,
4023                         (_.VT (scalar_to_vector
4024                                   (_.EltVT (X86selects VK1WM:$mask,
4025                                                        (_.EltVT _.FRC:$src1),
4026                                                        (_.EltVT _.FRC:$src2))))))),
4027           (!cast<Instruction>(InstrStr#rrk)
4028                         (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4029                         VK1WM:$mask,
4030                         (_.VT _.RC:$src0),
4031                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4032
4033 def : Pat<(_.VT (OpNode _.RC:$src0,
4034                         (_.VT (scalar_to_vector
4035                                   (_.EltVT (X86selects VK1WM:$mask,
4036                                                        (_.EltVT _.FRC:$src1),
4037                                                        (_.EltVT ZeroFP))))))),
4038           (!cast<Instruction>(InstrStr#rrkz)
4039                         VK1WM:$mask,
4040                         (_.VT _.RC:$src0),
4041                         (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4042 }
4043
4044 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4045                                         dag Mask, RegisterClass MaskRC> {
4046
4047 def : Pat<(masked_store
4048              (_.info512.VT (insert_subvector undef,
4049                                (_.info128.VT _.info128.RC:$src),
4050                                (iPTR 0))), addr:$dst, Mask),
4051           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4053                       _.info128.RC:$src)>;
4054
4055 }
4056
4057 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4058                                                AVX512VLVectorVTInfo _,
4059                                                dag Mask, RegisterClass MaskRC,
4060                                                SubRegIndex subreg> {
4061
4062 def : Pat<(masked_store
4063              (_.info512.VT (insert_subvector undef,
4064                                (_.info128.VT _.info128.RC:$src),
4065                                (iPTR 0))), addr:$dst, Mask),
4066           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4067                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4068                       _.info128.RC:$src)>;
4069
4070 }
4071
4072 // This matches the more recent codegen from clang that avoids emitting a 512
4073 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4074 // bits on AVX512F only targets.
4075 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4076                                                AVX512VLVectorVTInfo _,
4077                                                dag Mask512, dag Mask128,
4078                                                RegisterClass MaskRC,
4079                                                SubRegIndex subreg> {
4080
4081 // AVX512F pattern.
4082 def : Pat<(masked_store
4083              (_.info512.VT (insert_subvector undef,
4084                                (_.info128.VT _.info128.RC:$src),
4085                                (iPTR 0))), addr:$dst, Mask512),
4086           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4087                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4088                       _.info128.RC:$src)>;
4089
4090 // AVX512VL pattern.
4091 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4092           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4093                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4094                       _.info128.RC:$src)>;
4095 }
4096
4097 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4098                                        dag Mask, RegisterClass MaskRC> {
4099
4100 def : Pat<(_.info128.VT (extract_subvector
4101                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4102                                         _.info512.ImmAllZerosV)),
4103                            (iPTR 0))),
4104           (!cast<Instruction>(InstrStr#rmkz)
4105                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4106                       addr:$srcAddr)>;
4107
4108 def : Pat<(_.info128.VT (extract_subvector
4109                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4110                       (_.info512.VT (insert_subvector undef,
4111                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4112                             (iPTR 0))))),
4113                 (iPTR 0))),
4114           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4115                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4116                       addr:$srcAddr)>;
4117
4118 }
4119
4120 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4121                                               AVX512VLVectorVTInfo _,
4122                                               dag Mask, RegisterClass MaskRC,
4123                                               SubRegIndex subreg> {
4124
4125 def : Pat<(_.info128.VT (extract_subvector
4126                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
4127                                         _.info512.ImmAllZerosV)),
4128                            (iPTR 0))),
4129           (!cast<Instruction>(InstrStr#rmkz)
4130                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4131                       addr:$srcAddr)>;
4132
4133 def : Pat<(_.info128.VT (extract_subvector
4134                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4135                       (_.info512.VT (insert_subvector undef,
4136                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4137                             (iPTR 0))))),
4138                 (iPTR 0))),
4139           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4140                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4141                       addr:$srcAddr)>;
4142
4143 }
4144
4145 // This matches the more recent codegen from clang that avoids emitting a 512
4146 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4147 // bits on AVX512F only targets.
4148 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4149                                               AVX512VLVectorVTInfo _,
4150                                               dag Mask512, dag Mask128,
4151                                               RegisterClass MaskRC,
4152                                               SubRegIndex subreg> {
4153 // AVX512F patterns.
4154 def : Pat<(_.info128.VT (extract_subvector
4155                          (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4156                                         _.info512.ImmAllZerosV)),
4157                            (iPTR 0))),
4158           (!cast<Instruction>(InstrStr#rmkz)
4159                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4160                       addr:$srcAddr)>;
4161
4162 def : Pat<(_.info128.VT (extract_subvector
4163                 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4164                       (_.info512.VT (insert_subvector undef,
4165                             (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4166                             (iPTR 0))))),
4167                 (iPTR 0))),
4168           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4169                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4170                       addr:$srcAddr)>;
4171
4172 // AVX512Vl patterns.
4173 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4174                          _.info128.ImmAllZerosV)),
4175           (!cast<Instruction>(InstrStr#rmkz)
4176                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4177                       addr:$srcAddr)>;
4178
4179 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4180                          (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4181           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4182                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4183                       addr:$srcAddr)>;
4184 }
4185
4186 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4187 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4188
4189 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4190                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4191 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4192                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4193 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4194                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4195
4196 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4197                    (v16i1 (insert_subvector
4198                            (v16i1 immAllZerosV),
4199                            (v4i1 (extract_subvector
4200                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4201                                   (iPTR 0))),
4202                            (iPTR 0))),
4203                    (v4i1 (extract_subvector
4204                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4205                           (iPTR 0))), GR8, sub_8bit>;
4206 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4207                    (v8i1
4208                     (extract_subvector
4209                      (v16i1
4210                       (insert_subvector
4211                        (v16i1 immAllZerosV),
4212                        (v2i1 (extract_subvector
4213                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4214                               (iPTR 0))),
4215                        (iPTR 0))),
4216                      (iPTR 0))),
4217                    (v2i1 (extract_subvector
4218                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4219                           (iPTR 0))), GR8, sub_8bit>;
4220
4221 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4222                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4223 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4224                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4225 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4226                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4227
4228 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4229                    (v16i1 (insert_subvector
4230                            (v16i1 immAllZerosV),
4231                            (v4i1 (extract_subvector
4232                                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4233                                   (iPTR 0))),
4234                            (iPTR 0))),
4235                    (v4i1 (extract_subvector
4236                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4237                           (iPTR 0))), GR8, sub_8bit>;
4238 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4239                    (v8i1
4240                     (extract_subvector
4241                      (v16i1
4242                       (insert_subvector
4243                        (v16i1 immAllZerosV),
4244                        (v2i1 (extract_subvector
4245                               (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4246                               (iPTR 0))),
4247                        (iPTR 0))),
4248                      (iPTR 0))),
4249                    (v2i1 (extract_subvector
4250                           (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4251                           (iPTR 0))), GR8, sub_8bit>;
4252
4253 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4254           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4255            (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4256            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4257            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4258
4259 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4260           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4261            (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4262
4263 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4264           (COPY_TO_REGCLASS
4265            (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4266                                                        VK1WM:$mask, addr:$src)),
4267            FR32X)>;
4268 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4269           (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4270
4271 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4272           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4273            (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4274            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4275            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4276
4277 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4278           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4279            (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4280
4281 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4282           (COPY_TO_REGCLASS
4283            (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4284                                                        VK1WM:$mask, addr:$src)),
4285            FR64X)>;
4286 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4287           (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4288
4289 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4290   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4291                            (ins VR128X:$src1, VR128X:$src2),
4292                            "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4293                            []>, XS, EVEX_4V, VEX_LIG,
4294                            FoldGenData<"VMOVSSZrr">,
4295                            Sched<[SchedWriteFShuffle.XMM]>;
4296
4297   let Constraints = "$src0 = $dst" in
4298   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4299                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4300                                                    VR128X:$src1, VR128X:$src2),
4301                              "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4302                                         "$dst {${mask}}, $src1, $src2}",
4303                              []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4304                              FoldGenData<"VMOVSSZrrk">,
4305                              Sched<[SchedWriteFShuffle.XMM]>;
4306
4307   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4308                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4309                          "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4310                                     "$dst {${mask}} {z}, $src1, $src2}",
4311                          []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4312                          FoldGenData<"VMOVSSZrrkz">,
4313                          Sched<[SchedWriteFShuffle.XMM]>;
4314
4315   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4316                            (ins VR128X:$src1, VR128X:$src2),
4317                            "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4318                            []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4319                            FoldGenData<"VMOVSDZrr">,
4320                            Sched<[SchedWriteFShuffle.XMM]>;
4321
4322   let Constraints = "$src0 = $dst" in
4323   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4324                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4325                                                    VR128X:$src1, VR128X:$src2),
4326                              "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4327                                         "$dst {${mask}}, $src1, $src2}",
4328                              []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4329                              VEX_W, FoldGenData<"VMOVSDZrrk">,
4330                              Sched<[SchedWriteFShuffle.XMM]>;
4331
4332   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4333                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4334                                                           VR128X:$src2),
4335                               "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4336                                          "$dst {${mask}} {z}, $src1, $src2}",
4337                               []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4338                               VEX_W, FoldGenData<"VMOVSDZrrkz">,
4339                               Sched<[SchedWriteFShuffle.XMM]>;
4340 }
4341
4342 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4343                 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4344 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4345                              "$dst {${mask}}, $src1, $src2}",
4346                 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4347                                 VR128X:$src1, VR128X:$src2), 0>;
4348 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4349                              "$dst {${mask}} {z}, $src1, $src2}",
4350                 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4351                                  VR128X:$src1, VR128X:$src2), 0>;
4352 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4353                 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4354 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4355                              "$dst {${mask}}, $src1, $src2}",
4356                 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4357                                 VR128X:$src1, VR128X:$src2), 0>;
4358 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4359                              "$dst {${mask}} {z}, $src1, $src2}",
4360                 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4361                                  VR128X:$src1, VR128X:$src2), 0>;
4362
4363 let Predicates = [HasAVX512, OptForSize] in {
4364   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4365             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4366   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4367             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4368
4369   // Move low f32 and clear high bits.
4370   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4371             (SUBREG_TO_REG (i32 0),
4372              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4373               (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4374   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4375             (SUBREG_TO_REG (i32 0),
4376              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4377               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4378
4379   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4380             (SUBREG_TO_REG (i32 0),
4381              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4382               (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4383   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4384             (SUBREG_TO_REG (i32 0),
4385              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4386               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4387 }
4388
4389 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4390 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4391 let Predicates = [HasAVX512, OptForSpeed] in {
4392   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4393             (SUBREG_TO_REG (i32 0),
4394              (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4395                           (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4396                           (i8 1))), sub_xmm)>;
4397   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4398             (SUBREG_TO_REG (i32 0),
4399              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4400                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4401                           (i8 3))), sub_xmm)>;
4402 }
4403
4404 let Predicates = [HasAVX512] in {
4405   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4406             (VMOVSSZrm addr:$src)>;
4407   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4408             (VMOVSDZrm addr:$src)>;
4409
4410   // Represent the same patterns above but in the form they appear for
4411   // 256-bit types
4412   def : Pat<(v8f32 (X86vzload32 addr:$src)),
4413             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4414   def : Pat<(v4f64 (X86vzload64 addr:$src)),
4415             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4416
4417   // Represent the same patterns above but in the form they appear for
4418   // 512-bit types
4419   def : Pat<(v16f32 (X86vzload32 addr:$src)),
4420             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4421   def : Pat<(v8f64 (X86vzload64 addr:$src)),
4422             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4423 }
4424
4425 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4426 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4427                                 (ins VR128X:$src),
4428                                 "vmovq\t{$src, $dst|$dst, $src}",
4429                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4430                                                    (v2i64 VR128X:$src))))]>,
4431                                 EVEX, VEX_W;
4432 }
4433
4434 let Predicates = [HasAVX512] in {
4435   def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4436             (VMOVDI2PDIZrr GR32:$src)>;
4437
4438   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4439             (VMOV64toPQIZrr GR64:$src)>;
4440
4441   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4442   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4443             (VMOVDI2PDIZrm addr:$src)>;
4444   def : Pat<(v4i32 (X86vzload32 addr:$src)),
4445             (VMOVDI2PDIZrm addr:$src)>;
4446   def : Pat<(v8i32 (X86vzload32 addr:$src)),
4447             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4448   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4449             (VMOVZPQILo2PQIZrr VR128X:$src)>;
4450   def : Pat<(v2i64 (X86vzload64 addr:$src)),
4451             (VMOVQI2PQIZrm addr:$src)>;
4452   def : Pat<(v4i64 (X86vzload64 addr:$src)),
4453             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4454
4455   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4456   def : Pat<(v16i32 (X86vzload32 addr:$src)),
4457             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4458   def : Pat<(v8i64 (X86vzload64 addr:$src)),
4459             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4460
4461   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4462             (SUBREG_TO_REG (i32 0),
4463              (v2f64 (VMOVZPQILo2PQIZrr
4464                      (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4465              sub_xmm)>;
4466   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4467             (SUBREG_TO_REG (i32 0),
4468              (v2i64 (VMOVZPQILo2PQIZrr
4469                      (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4470              sub_xmm)>;
4471
4472   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4473             (SUBREG_TO_REG (i32 0),
4474              (v2f64 (VMOVZPQILo2PQIZrr
4475                      (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4476              sub_xmm)>;
4477   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4478             (SUBREG_TO_REG (i32 0),
4479              (v2i64 (VMOVZPQILo2PQIZrr
4480                      (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4481              sub_xmm)>;
4482 }
4483
4484 //===----------------------------------------------------------------------===//
4485 // AVX-512 - Non-temporals
4486 //===----------------------------------------------------------------------===//
4487
4488 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4489                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4490                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4491                       EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4492
4493 let Predicates = [HasVLX] in {
4494   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4495                        (ins i256mem:$src),
4496                        "vmovntdqa\t{$src, $dst|$dst, $src}",
4497                        [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4498                        EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4499
4500   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4501                       (ins i128mem:$src),
4502                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4503                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4504                       EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4505 }
4506
4507 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4508                         X86SchedWriteMoveLS Sched,
4509                         PatFrag st_frag = alignednontemporalstore> {
4510   let SchedRW = [Sched.MR], AddedComplexity = 400 in
4511   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4512                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4513                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4514                     _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4515 }
4516
4517 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4518                            AVX512VLVectorVTInfo VTInfo,
4519                            X86SchedWriteMoveLSWidths Sched> {
4520   let Predicates = [HasAVX512] in
4521     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4522
4523   let Predicates = [HasAVX512, HasVLX] in {
4524     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4525     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4526   }
4527 }
4528
4529 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4530                                 SchedWriteVecMoveLSNT>, PD;
4531 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4532                                 SchedWriteFMoveLSNT>, PD, VEX_W;
4533 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4534                                 SchedWriteFMoveLSNT>, PS;
4535
4536 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4537   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4538             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4539   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4540             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4541   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4542             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4543
4544   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4545             (VMOVNTDQAZrm addr:$src)>;
4546   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4547             (VMOVNTDQAZrm addr:$src)>;
4548   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4549             (VMOVNTDQAZrm addr:$src)>;
4550   def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4551             (VMOVNTDQAZrm addr:$src)>;
4552   def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4553             (VMOVNTDQAZrm addr:$src)>;
4554   def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4555             (VMOVNTDQAZrm addr:$src)>;
4556 }
4557
4558 let Predicates = [HasVLX], AddedComplexity = 400 in {
4559   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4560             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4561   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4562             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4563   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4564             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4565
4566   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4567             (VMOVNTDQAZ256rm addr:$src)>;
4568   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4569             (VMOVNTDQAZ256rm addr:$src)>;
4570   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4571             (VMOVNTDQAZ256rm addr:$src)>;
4572   def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4573             (VMOVNTDQAZ256rm addr:$src)>;
4574   def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4575             (VMOVNTDQAZ256rm addr:$src)>;
4576   def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4577             (VMOVNTDQAZ256rm addr:$src)>;
4578
4579   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4580             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4581   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4582             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4583   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4584             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4585
4586   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4587             (VMOVNTDQAZ128rm addr:$src)>;
4588   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4589             (VMOVNTDQAZ128rm addr:$src)>;
4590   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4591             (VMOVNTDQAZ128rm addr:$src)>;
4592   def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4593             (VMOVNTDQAZ128rm addr:$src)>;
4594   def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4595             (VMOVNTDQAZ128rm addr:$src)>;
4596   def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4597             (VMOVNTDQAZ128rm addr:$src)>;
4598 }
4599
4600 //===----------------------------------------------------------------------===//
4601 // AVX-512 - Integer arithmetic
4602 //
4603 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4604                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4605                            bit IsCommutable = 0> {
4606   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4607                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4608                     "$src2, $src1", "$src1, $src2",
4609                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4610                     IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4611                     Sched<[sched]>;
4612
4613   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4614                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4615                   "$src2, $src1", "$src1, $src2",
4616                   (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4617                   AVX512BIBase, EVEX_4V,
4618                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4619 }
4620
4621 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4622                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
4623                             bit IsCommutable = 0> :
4624            avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4625   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4626                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4627                   "${src2}"##_.BroadcastStr##", $src1",
4628                   "$src1, ${src2}"##_.BroadcastStr,
4629                   (_.VT (OpNode _.RC:$src1,
4630                                 (_.BroadcastLdFrag addr:$src2)))>,
4631                   AVX512BIBase, EVEX_4V, EVEX_B,
4632                   Sched<[sched.Folded, sched.ReadAfterFold]>;
4633 }
4634
4635 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4636                               AVX512VLVectorVTInfo VTInfo,
4637                               X86SchedWriteWidths sched, Predicate prd,
4638                               bit IsCommutable = 0> {
4639   let Predicates = [prd] in
4640     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4641                              IsCommutable>, EVEX_V512;
4642
4643   let Predicates = [prd, HasVLX] in {
4644     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4645                                 sched.YMM, IsCommutable>, EVEX_V256;
4646     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4647                                 sched.XMM, IsCommutable>, EVEX_V128;
4648   }
4649 }
4650
4651 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4652                                AVX512VLVectorVTInfo VTInfo,
4653                                X86SchedWriteWidths sched, Predicate prd,
4654                                bit IsCommutable = 0> {
4655   let Predicates = [prd] in
4656     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4657                              IsCommutable>, EVEX_V512;
4658
4659   let Predicates = [prd, HasVLX] in {
4660     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4661                                  sched.YMM, IsCommutable>, EVEX_V256;
4662     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4663                                  sched.XMM, IsCommutable>, EVEX_V128;
4664   }
4665 }
4666
4667 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668                                 X86SchedWriteWidths sched, Predicate prd,
4669                                 bit IsCommutable = 0> {
4670   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4671                                   sched, prd, IsCommutable>,
4672                                   VEX_W, EVEX_CD8<64, CD8VF>;
4673 }
4674
4675 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4676                                 X86SchedWriteWidths sched, Predicate prd,
4677                                 bit IsCommutable = 0> {
4678   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4679                                   sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4680 }
4681
4682 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683                                 X86SchedWriteWidths sched, Predicate prd,
4684                                 bit IsCommutable = 0> {
4685   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4686                                  sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4687                                  VEX_WIG;
4688 }
4689
4690 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4691                                 X86SchedWriteWidths sched, Predicate prd,
4692                                 bit IsCommutable = 0> {
4693   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4694                                  sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4695                                  VEX_WIG;
4696 }
4697
4698 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4699                                  SDNode OpNode, X86SchedWriteWidths sched,
4700                                  Predicate prd, bit IsCommutable = 0> {
4701   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4702                                    IsCommutable>;
4703
4704   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4705                                    IsCommutable>;
4706 }
4707
4708 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4709                                  SDNode OpNode, X86SchedWriteWidths sched,
4710                                  Predicate prd, bit IsCommutable = 0> {
4711   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4712                                    IsCommutable>;
4713
4714   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4715                                    IsCommutable>;
4716 }
4717
4718 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4719                                   bits<8> opc_d, bits<8> opc_q,
4720                                   string OpcodeStr, SDNode OpNode,
4721                                   X86SchedWriteWidths sched,
4722                                   bit IsCommutable = 0> {
4723   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4724                                     sched, HasAVX512, IsCommutable>,
4725               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4726                                     sched, HasBWI, IsCommutable>;
4727 }
4728
4729 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4730                             X86FoldableSchedWrite sched,
4731                             SDNode OpNode,X86VectorVTInfo _Src,
4732                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4733                             bit IsCommutable = 0> {
4734   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4735                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4736                             "$src2, $src1","$src1, $src2",
4737                             (_Dst.VT (OpNode
4738                                          (_Src.VT _Src.RC:$src1),
4739                                          (_Src.VT _Src.RC:$src2))),
4740                             IsCommutable>,
4741                             AVX512BIBase, EVEX_4V, Sched<[sched]>;
4742   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4743                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4744                         "$src2, $src1", "$src1, $src2",
4745                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4746                                       (_Src.LdFrag addr:$src2)))>,
4747                         AVX512BIBase, EVEX_4V,
4748                         Sched<[sched.Folded, sched.ReadAfterFold]>;
4749
4750   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4751                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4752                     OpcodeStr,
4753                     "${src2}"##_Brdct.BroadcastStr##", $src1",
4754                      "$src1, ${src2}"##_Brdct.BroadcastStr,
4755                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4756                                  (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4757                     AVX512BIBase, EVEX_4V, EVEX_B,
4758                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4759 }
4760
4761 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4762                                     SchedWriteVecALU, 1>;
4763 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4764                                     SchedWriteVecALU, 0>;
4765 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4766                                     SchedWriteVecALU, HasBWI, 1>;
4767 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4768                                     SchedWriteVecALU, HasBWI, 0>;
4769 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4770                                      SchedWriteVecALU, HasBWI, 1>;
4771 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4772                                      SchedWriteVecALU, HasBWI, 0>;
4773 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4774                                     SchedWritePMULLD, HasAVX512, 1>, T8PD;
4775 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4776                                     SchedWriteVecIMul, HasBWI, 1>;
4777 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4778                                     SchedWriteVecIMul, HasDQI, 1>, T8PD,
4779                                     NotEVEX2VEXConvertible;
4780 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4781                                     HasBWI, 1>;
4782 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4783                                      HasBWI, 1>;
4784 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4785                                       SchedWriteVecIMul, HasBWI, 1>, T8PD;
4786 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4787                                    SchedWriteVecALU, HasBWI, 1>;
4788 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4789                                     SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4790 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4791                                      SchedWriteVecIMul, HasAVX512, 1>;
4792
4793 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4794                             X86SchedWriteWidths sched,
4795                             AVX512VLVectorVTInfo _SrcVTInfo,
4796                             AVX512VLVectorVTInfo _DstVTInfo,
4797                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4798   let Predicates = [prd] in
4799     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4800                                  _SrcVTInfo.info512, _DstVTInfo.info512,
4801                                  v8i64_info, IsCommutable>,
4802                                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4803   let Predicates = [HasVLX, prd] in {
4804     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4805                                       _SrcVTInfo.info256, _DstVTInfo.info256,
4806                                       v4i64x_info, IsCommutable>,
4807                                       EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4808     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4809                                       _SrcVTInfo.info128, _DstVTInfo.info128,
4810                                       v2i64x_info, IsCommutable>,
4811                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4812   }
4813 }
4814
4815 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4816                                 avx512vl_i8_info, avx512vl_i8_info,
4817                                 X86multishift, HasVBMI, 0>, T8PD;
4818
4819 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4820                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4821                             X86FoldableSchedWrite sched> {
4822   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4823                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4824                     OpcodeStr,
4825                     "${src2}"##_Src.BroadcastStr##", $src1",
4826                      "$src1, ${src2}"##_Src.BroadcastStr,
4827                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4828                                  (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4829                     EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4830                     Sched<[sched.Folded, sched.ReadAfterFold]>;
4831 }
4832
4833 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4834                             SDNode OpNode,X86VectorVTInfo _Src,
4835                             X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4836                             bit IsCommutable = 0> {
4837   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4838                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4839                             "$src2, $src1","$src1, $src2",
4840                             (_Dst.VT (OpNode
4841                                          (_Src.VT _Src.RC:$src1),
4842                                          (_Src.VT _Src.RC:$src2))),
4843                             IsCommutable, IsCommutable>,
4844                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4845   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4846                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4847                         "$src2, $src1", "$src1, $src2",
4848                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4849                                       (_Src.LdFrag addr:$src2)))>,
4850                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4851                          Sched<[sched.Folded, sched.ReadAfterFold]>;
4852 }
4853
4854 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4855                                     SDNode OpNode> {
4856   let Predicates = [HasBWI] in
4857   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4858                                  v32i16_info, SchedWriteShuffle.ZMM>,
4859                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4860                                  v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4861   let Predicates = [HasBWI, HasVLX] in {
4862     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4863                                      v16i16x_info, SchedWriteShuffle.YMM>,
4864                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4865                                       v16i16x_info, SchedWriteShuffle.YMM>,
4866                                       EVEX_V256;
4867     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4868                                      v8i16x_info, SchedWriteShuffle.XMM>,
4869                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4870                                       v8i16x_info, SchedWriteShuffle.XMM>,
4871                                       EVEX_V128;
4872   }
4873 }
4874 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4875                             SDNode OpNode> {
4876   let Predicates = [HasBWI] in
4877   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4878                                 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4879   let Predicates = [HasBWI, HasVLX] in {
4880     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4881                                      v32i8x_info, SchedWriteShuffle.YMM>,
4882                                      EVEX_V256, VEX_WIG;
4883     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4884                                      v16i8x_info, SchedWriteShuffle.XMM>,
4885                                      EVEX_V128, VEX_WIG;
4886   }
4887 }
4888
4889 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4890                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
4891                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4892   let Predicates = [HasBWI] in
4893   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4894                                 _Dst.info512, SchedWriteVecIMul.ZMM,
4895                                 IsCommutable>, EVEX_V512;
4896   let Predicates = [HasBWI, HasVLX] in {
4897     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4898                                      _Dst.info256, SchedWriteVecIMul.YMM,
4899                                      IsCommutable>, EVEX_V256;
4900     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4901                                      _Dst.info128, SchedWriteVecIMul.XMM,
4902                                      IsCommutable>, EVEX_V128;
4903   }
4904 }
4905
4906 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4907 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4908 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4909 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4910
4911 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4912                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4913 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4914                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4915
4916 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4917                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4918 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4919                                     SchedWriteVecALU, HasBWI, 1>;
4920 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4921                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4922 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4923                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4924                                     NotEVEX2VEXConvertible;
4925
4926 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4927                                     SchedWriteVecALU, HasBWI, 1>;
4928 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4929                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4930 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4931                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4932 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4933                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4934                                     NotEVEX2VEXConvertible;
4935
4936 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4937                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4938 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4939                                     SchedWriteVecALU, HasBWI, 1>;
4940 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4941                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4942 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4943                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4944                                     NotEVEX2VEXConvertible;
4945
4946 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4947                                     SchedWriteVecALU, HasBWI, 1>;
4948 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4949                                     SchedWriteVecALU, HasBWI, 1>, T8PD;
4950 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4951                                     SchedWriteVecALU, HasAVX512, 1>, T8PD;
4952 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4953                                     SchedWriteVecALU, HasAVX512, 1>, T8PD,
4954                                     NotEVEX2VEXConvertible;
4955
4956 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4957 let Predicates = [HasDQI, NoVLX] in {
4958   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4959             (EXTRACT_SUBREG
4960                 (VPMULLQZrr
4961                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4962                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4963              sub_ymm)>;
4964   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4965             (EXTRACT_SUBREG
4966                 (VPMULLQZrmb
4967                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4968                     addr:$src2),
4969              sub_ymm)>;
4970
4971   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4972             (EXTRACT_SUBREG
4973                 (VPMULLQZrr
4974                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4975                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4976              sub_xmm)>;
4977   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4978             (EXTRACT_SUBREG
4979                 (VPMULLQZrmb
4980                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4981                     addr:$src2),
4982              sub_xmm)>;
4983 }
4984
4985 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4986   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4987             (EXTRACT_SUBREG
4988                 (!cast<Instruction>(Instr#"rr")
4989                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4990                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4991              sub_ymm)>;
4992   def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4993             (EXTRACT_SUBREG
4994                 (!cast<Instruction>(Instr#"rmb")
4995                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4996                     addr:$src2),
4997              sub_ymm)>;
4998
4999   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5000             (EXTRACT_SUBREG
5001                 (!cast<Instruction>(Instr#"rr")
5002                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5003                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5004              sub_xmm)>;
5005   def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5006             (EXTRACT_SUBREG
5007                 (!cast<Instruction>(Instr#"rmb")
5008                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5009                     addr:$src2),
5010              sub_xmm)>;
5011 }
5012
5013 let Predicates = [HasAVX512, NoVLX] in {
5014   defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5015   defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5016   defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5017   defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5018 }
5019
5020 //===----------------------------------------------------------------------===//
5021 // AVX-512  Logical Instructions
5022 //===----------------------------------------------------------------------===//
5023
5024 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5025                                    SchedWriteVecLogic, HasAVX512, 1>;
5026 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5027                                   SchedWriteVecLogic, HasAVX512, 1>;
5028 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5029                                    SchedWriteVecLogic, HasAVX512, 1>;
5030 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5031                                     SchedWriteVecLogic, HasAVX512>;
5032
5033 let Predicates = [HasVLX] in {
5034   def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5035             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5036   def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5037             (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5038
5039   def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5040             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5041   def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5042             (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5043
5044   def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5045             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5046   def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5047             (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5048
5049   def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5050             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5051   def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5052             (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5053
5054   def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5055             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5056   def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5057             (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5058
5059   def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5060             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5061   def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5062             (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5063
5064   def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5065             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5066   def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5067             (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5068
5069   def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5070             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5071   def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5072             (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5073
5074   def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5075             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5076   def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5077             (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5078
5079   def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5080             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5081   def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5082             (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5083
5084   def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5085             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5086   def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5087             (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5088
5089   def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5090             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5091   def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5092             (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5093
5094   def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5095             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5096   def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5097             (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5098
5099   def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5100             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5101   def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5102             (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5103
5104   def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5105             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5106   def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5107             (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5108
5109   def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5110             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5111   def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5112             (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5113 }
5114
5115 let Predicates = [HasAVX512] in {
5116   def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5117             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5118   def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5119             (VPANDQZrr VR512:$src1, VR512:$src2)>;
5120
5121   def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5122             (VPORQZrr VR512:$src1, VR512:$src2)>;
5123   def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5124             (VPORQZrr VR512:$src1, VR512:$src2)>;
5125
5126   def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5127             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5128   def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5129             (VPXORQZrr VR512:$src1, VR512:$src2)>;
5130
5131   def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5132             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5133   def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5134             (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5135
5136   def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5137             (VPANDQZrm VR512:$src1, addr:$src2)>;
5138   def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5139             (VPANDQZrm VR512:$src1, addr:$src2)>;
5140
5141   def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5142             (VPORQZrm VR512:$src1, addr:$src2)>;
5143   def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5144             (VPORQZrm VR512:$src1, addr:$src2)>;
5145
5146   def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5147             (VPXORQZrm VR512:$src1, addr:$src2)>;
5148   def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5149             (VPXORQZrm VR512:$src1, addr:$src2)>;
5150
5151   def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5152             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5153   def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5154             (VPANDNQZrm VR512:$src1, addr:$src2)>;
5155 }
5156
5157 // Patterns to catch vselect with different type than logic op.
5158 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5159                                     X86VectorVTInfo _,
5160                                     X86VectorVTInfo IntInfo> {
5161   // Masked register-register logical operations.
5162   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5163                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5164                    _.RC:$src0)),
5165             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5166              _.RC:$src1, _.RC:$src2)>;
5167
5168   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5169                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5170                    _.ImmAllZerosV)),
5171             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5172              _.RC:$src2)>;
5173
5174   // Masked register-memory logical operations.
5175   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5176                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5177                                             (load addr:$src2)))),
5178                    _.RC:$src0)),
5179             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5180              _.RC:$src1, addr:$src2)>;
5181   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5182                    (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5183                                             (load addr:$src2)))),
5184                    _.ImmAllZerosV)),
5185             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5186              addr:$src2)>;
5187 }
5188
5189 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5190                                          X86VectorVTInfo _,
5191                                          X86VectorVTInfo IntInfo> {
5192   // Register-broadcast logical operations.
5193   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5194                    (bitconvert
5195                     (IntInfo.VT (OpNode _.RC:$src1,
5196                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5197                    _.RC:$src0)),
5198             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5199              _.RC:$src1, addr:$src2)>;
5200   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5201                    (bitconvert
5202                     (IntInfo.VT (OpNode _.RC:$src1,
5203                                  (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5204                    _.ImmAllZerosV)),
5205             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5206              _.RC:$src1, addr:$src2)>;
5207 }
5208
5209 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5210                                          AVX512VLVectorVTInfo SelectInfo,
5211                                          AVX512VLVectorVTInfo IntInfo> {
5212 let Predicates = [HasVLX] in {
5213   defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5214                                  IntInfo.info128>;
5215   defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5216                                  IntInfo.info256>;
5217 }
5218 let Predicates = [HasAVX512] in {
5219   defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5220                                  IntInfo.info512>;
5221 }
5222 }
5223
5224 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5225                                                AVX512VLVectorVTInfo SelectInfo,
5226                                                AVX512VLVectorVTInfo IntInfo> {
5227 let Predicates = [HasVLX] in {
5228   defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5229                                        SelectInfo.info128, IntInfo.info128>;
5230   defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5231                                        SelectInfo.info256, IntInfo.info256>;
5232 }
5233 let Predicates = [HasAVX512] in {
5234   defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5235                                        SelectInfo.info512, IntInfo.info512>;
5236 }
5237 }
5238
5239 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5240   // i64 vselect with i32/i16/i8 logic op
5241   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5242                                        avx512vl_i32_info>;
5243   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5244                                        avx512vl_i16_info>;
5245   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5246                                        avx512vl_i8_info>;
5247
5248   // i32 vselect with i64/i16/i8 logic op
5249   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5250                                        avx512vl_i64_info>;
5251   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5252                                        avx512vl_i16_info>;
5253   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5254                                        avx512vl_i8_info>;
5255
5256   // f32 vselect with i64/i32/i16/i8 logic op
5257   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5258                                        avx512vl_i64_info>;
5259   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5260                                        avx512vl_i32_info>;
5261   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5262                                        avx512vl_i16_info>;
5263   defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5264                                        avx512vl_i8_info>;
5265
5266   // f64 vselect with i64/i32/i16/i8 logic op
5267   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5268                                        avx512vl_i64_info>;
5269   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5270                                        avx512vl_i32_info>;
5271   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5272                                        avx512vl_i16_info>;
5273   defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5274                                        avx512vl_i8_info>;
5275
5276   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5277                                              avx512vl_f32_info,
5278                                              avx512vl_i32_info>;
5279   defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5280                                              avx512vl_f64_info,
5281                                              avx512vl_i64_info>;
5282 }
5283
5284 defm : avx512_logical_lowering_types<"VPAND", and>;
5285 defm : avx512_logical_lowering_types<"VPOR",  or>;
5286 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5287 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5288
5289 //===----------------------------------------------------------------------===//
5290 // AVX-512  FP arithmetic
5291 //===----------------------------------------------------------------------===//
5292
5293 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5294                             SDNode OpNode, SDNode VecNode,
5295                             X86FoldableSchedWrite sched, bit IsCommutable> {
5296   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5297   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5298                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5299                            "$src2, $src1", "$src1, $src2",
5300                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5301                            Sched<[sched]>;
5302
5303   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5304                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5305                          "$src2, $src1", "$src1, $src2",
5306                          (_.VT (VecNode _.RC:$src1,
5307                                         _.ScalarIntMemCPat:$src2))>,
5308                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5309   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5310   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5311                          (ins _.FRC:$src1, _.FRC:$src2),
5312                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5313                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5314                           Sched<[sched]> {
5315     let isCommutable = IsCommutable;
5316   }
5317   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5318                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5319                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5320                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5321                          (_.ScalarLdFrag addr:$src2)))]>,
5322                          Sched<[sched.Folded, sched.ReadAfterFold]>;
5323   }
5324   }
5325 }
5326
5327 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5328                                   SDNode VecNode, X86FoldableSchedWrite sched,
5329                                   bit IsCommutable = 0> {
5330   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5331   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5332                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5333                           "$rc, $src2, $src1", "$src1, $src2, $rc",
5334                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5335                           (i32 timm:$rc))>,
5336                           EVEX_B, EVEX_RC, Sched<[sched]>;
5337 }
5338 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5339                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5340                                 X86FoldableSchedWrite sched, bit IsCommutable,
5341                                 string EVEX2VexOvrd> {
5342   let ExeDomain = _.ExeDomain in {
5343   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5344                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5345                            "$src2, $src1", "$src1, $src2",
5346                            (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5347                            Sched<[sched]>, SIMD_EXC;
5348
5349   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5350                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5351                          "$src2, $src1", "$src1, $src2",
5352                          (_.VT (VecNode _.RC:$src1,
5353                                         _.ScalarIntMemCPat:$src2))>,
5354                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5355
5356   let isCodeGenOnly = 1, Predicates = [HasAVX512],
5357       Uses = [MXCSR], mayRaiseFPException = 1 in {
5358   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5359                          (ins _.FRC:$src1, _.FRC:$src2),
5360                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5361                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5362                           Sched<[sched]>,
5363                           EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5364     let isCommutable = IsCommutable;
5365   }
5366   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5367                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5368                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5369                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5370                          (_.ScalarLdFrag addr:$src2)))]>,
5371                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5372                          EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5373   }
5374
5375   let Uses = [MXCSR] in
5376   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5377                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5378                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5379                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5380                             EVEX_B, Sched<[sched]>;
5381   }
5382 }
5383
5384 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5385                                 SDNode VecNode, SDNode RndNode,
5386                                 X86SchedWriteSizes sched, bit IsCommutable> {
5387   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5388                               sched.PS.Scl, IsCommutable>,
5389              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5390                               sched.PS.Scl, IsCommutable>,
5391                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5392   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5393                               sched.PD.Scl, IsCommutable>,
5394              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5395                               sched.PD.Scl, IsCommutable>,
5396                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5397 }
5398
5399 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5400                               SDNode VecNode, SDNode SaeNode,
5401                               X86SchedWriteSizes sched, bit IsCommutable> {
5402   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5403                               VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5404                               NAME#"SS">,
5405                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5406   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5407                               VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5408                               NAME#"SD">,
5409                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5410 }
5411 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5412                                  SchedWriteFAddSizes, 1>;
5413 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5414                                  SchedWriteFMulSizes, 1>;
5415 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5416                                  SchedWriteFAddSizes, 0>;
5417 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5418                                  SchedWriteFDivSizes, 0>;
5419 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5420                                SchedWriteFCmpSizes, 0>;
5421 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5422                                SchedWriteFCmpSizes, 0>;
5423
5424 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5425 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5426 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5427                                     X86VectorVTInfo _, SDNode OpNode,
5428                                     X86FoldableSchedWrite sched,
5429                                     string EVEX2VEXOvrd> {
5430   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5431   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5432                          (ins _.FRC:$src1, _.FRC:$src2),
5433                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5434                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5435                           Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5436     let isCommutable = 1;
5437   }
5438   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5439                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5440                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5441                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5442                          (_.ScalarLdFrag addr:$src2)))]>,
5443                          Sched<[sched.Folded, sched.ReadAfterFold]>,
5444                          EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5445   }
5446 }
5447 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5448                                          SchedWriteFCmp.Scl, "VMINCSS">, XS,
5449                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5450
5451 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5452                                          SchedWriteFCmp.Scl, "VMINCSD">, XD,
5453                                          VEX_W, EVEX_4V, VEX_LIG,
5454                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5455
5456 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5457                                          SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5458                                          EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5459
5460 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5461                                          SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5462                                          VEX_W, EVEX_4V, VEX_LIG,
5463                                          EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5464
5465 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5466                             X86VectorVTInfo _, X86FoldableSchedWrite sched,
5467                             bit IsCommutable,
5468                             bit IsKCommutable = IsCommutable> {
5469   let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5470       Uses = [MXCSR], mayRaiseFPException = 1 in {
5471   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5472                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5473                   "$src2, $src1", "$src1, $src2",
5474                   (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5475                   IsKCommutable, IsKCommutable>,
5476                   EVEX_4V, Sched<[sched]>;
5477   let mayLoad = 1 in {
5478     defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5479                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5480                     "$src2, $src1", "$src1, $src2",
5481                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5482                     EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5483     defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5484                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5485                      "${src2}"##_.BroadcastStr##", $src1",
5486                      "$src1, ${src2}"##_.BroadcastStr,
5487                      (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5488                      EVEX_4V, EVEX_B,
5489                      Sched<[sched.Folded, sched.ReadAfterFold]>;
5490     }
5491   }
5492 }
5493
5494 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5495                                   SDPatternOperator OpNodeRnd,
5496                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5497   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5498   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5499                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5500                   "$rc, $src2, $src1", "$src1, $src2, $rc",
5501                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5502                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5503 }
5504
5505 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5506                                 SDPatternOperator OpNodeSAE,
5507                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5508   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5509   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5510                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5511                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5512                   (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5513                   EVEX_4V, EVEX_B, Sched<[sched]>;
5514 }
5515
5516 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5517                              Predicate prd, X86SchedWriteSizes sched,
5518                              bit IsCommutable = 0,
5519                              bit IsPD128Commutable = IsCommutable> {
5520   let Predicates = [prd] in {
5521   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5522                               sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5523                               EVEX_CD8<32, CD8VF>;
5524   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5525                               sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5526                               EVEX_CD8<64, CD8VF>;
5527   }
5528
5529     // Define only if AVX512VL feature is present.
5530   let Predicates = [prd, HasVLX] in {
5531     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5532                                    sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5533                                    EVEX_CD8<32, CD8VF>;
5534     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5535                                    sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5536                                    EVEX_CD8<32, CD8VF>;
5537     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5538                                    sched.PD.XMM, IsPD128Commutable,
5539                                    IsCommutable>, EVEX_V128, PD, VEX_W,
5540                                    EVEX_CD8<64, CD8VF>;
5541     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5542                                    sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5543                                    EVEX_CD8<64, CD8VF>;
5544   }
5545 }
5546
5547 let Uses = [MXCSR] in
5548 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5549                                    X86SchedWriteSizes sched> {
5550   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5551                                     v16f32_info>,
5552                                     EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5553   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5554                                     v8f64_info>,
5555                                     EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5556 }
5557
5558 let Uses = [MXCSR] in
5559 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5560                                  X86SchedWriteSizes sched> {
5561   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5562                                   v16f32_info>,
5563                                   EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5564   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5565                                   v8f64_info>,
5566                                   EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5567 }
5568
5569 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512,
5570                               SchedWriteFAddSizes, 1>,
5571             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5572 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512,
5573                               SchedWriteFMulSizes, 1>,
5574             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5575 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512,
5576                               SchedWriteFAddSizes>,
5577             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5578 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512,
5579                               SchedWriteFDivSizes>,
5580             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5581 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5582                               SchedWriteFCmpSizes, 0>,
5583             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5584 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5585                               SchedWriteFCmpSizes, 0>,
5586             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5587 let isCodeGenOnly = 1 in {
5588   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5589                                  SchedWriteFCmpSizes, 1>;
5590   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5591                                  SchedWriteFCmpSizes, 1>;
5592 }
5593 let Uses = []<Register>, mayRaiseFPException = 0 in {
5594 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5595                                SchedWriteFLogicSizes, 1>;
5596 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5597                                SchedWriteFLogicSizes, 0>;
5598 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5599                                SchedWriteFLogicSizes, 1>;
5600 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5601                                SchedWriteFLogicSizes, 1>;
5602 }
5603
5604 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5605                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5606   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5607   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5608                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5609                   "$src2, $src1", "$src1, $src2",
5610                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5611                   EVEX_4V, Sched<[sched]>;
5612   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5613                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5614                   "$src2, $src1", "$src1, $src2",
5615                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5616                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5617   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5618                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5619                    "${src2}"##_.BroadcastStr##", $src1",
5620                    "$src1, ${src2}"##_.BroadcastStr,
5621                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5622                    EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5623   }
5624 }
5625
5626 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5627                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5628   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5629   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5630                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5631                   "$src2, $src1", "$src1, $src2",
5632                   (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5633                   Sched<[sched]>;
5634   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5635                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5636                   "$src2, $src1", "$src1, $src2",
5637                   (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5638                   Sched<[sched.Folded, sched.ReadAfterFold]>;
5639   }
5640 }
5641
5642 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5643                                 X86SchedWriteWidths sched> {
5644   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5645              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5646                               EVEX_V512, EVEX_CD8<32, CD8VF>;
5647   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5648              avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5649                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5650   defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5651              avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5652                                     X86scalefsRnd, sched.Scl>,
5653                                     EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5654   defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5655              avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5656                                     X86scalefsRnd, sched.Scl>,
5657                                     EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5658
5659   // Define only if AVX512VL feature is present.
5660   let Predicates = [HasVLX] in {
5661     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5662                                    EVEX_V128, EVEX_CD8<32, CD8VF>;
5663     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5664                                    EVEX_V256, EVEX_CD8<32, CD8VF>;
5665     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5666                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5667     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5668                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5669   }
5670 }
5671 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5672                                     SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5673
5674 //===----------------------------------------------------------------------===//
5675 // AVX-512  VPTESTM instructions
5676 //===----------------------------------------------------------------------===//
5677
5678 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5679                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
5680                          string Name> {
5681   // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5682   // There are just too many permuations due to commutability and bitcasts.
5683   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5684   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5685                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5686                       "$src2, $src1", "$src1, $src2",
5687                    (null_frag), (null_frag), 1>,
5688                    EVEX_4V, Sched<[sched]>;
5689   let mayLoad = 1 in
5690   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5691                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5692                        "$src2, $src1", "$src1, $src2",
5693                    (null_frag), (null_frag)>,
5694                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5695                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5696   }
5697 }
5698
5699 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5700                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5701   let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5702   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5703                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5704                     "${src2}"##_.BroadcastStr##", $src1",
5705                     "$src1, ${src2}"##_.BroadcastStr,
5706                     (null_frag), (null_frag)>,
5707                     EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5708                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5709 }
5710
5711 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5712                                   X86SchedWriteWidths sched,
5713                                   AVX512VLVectorVTInfo _> {
5714   let Predicates  = [HasAVX512] in
5715   defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5716            avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5717
5718   let Predicates = [HasAVX512, HasVLX] in {
5719   defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5720               avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5721   defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5722               avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5723   }
5724 }
5725
5726 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5727                             X86SchedWriteWidths sched> {
5728   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5729                                  avx512vl_i32_info>;
5730   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5731                                  avx512vl_i64_info>, VEX_W;
5732 }
5733
5734 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5735                             X86SchedWriteWidths sched> {
5736   let Predicates = [HasBWI] in {
5737   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5738                             v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5739   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5740                             v64i8_info, NAME#"B">, EVEX_V512;
5741   }
5742   let Predicates = [HasVLX, HasBWI] in {
5743
5744   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5745                             v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5746   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5747                             v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5748   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5749                             v32i8x_info, NAME#"B">, EVEX_V256;
5750   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5751                             v16i8x_info, NAME#"B">, EVEX_V128;
5752   }
5753 }
5754
5755 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5756                                    X86SchedWriteWidths sched> :
5757   avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5758   avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5759
5760 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5761                                          SchedWriteVecLogic>, T8PD;
5762 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5763                                          SchedWriteVecLogic>, T8XS;
5764
5765 //===----------------------------------------------------------------------===//
5766 // AVX-512  Shift instructions
5767 //===----------------------------------------------------------------------===//
5768
5769 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5770                             string OpcodeStr, SDNode OpNode,
5771                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5772   let ExeDomain = _.ExeDomain in {
5773   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5774                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5775                       "$src2, $src1", "$src1, $src2",
5776                    (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5777                    Sched<[sched]>;
5778   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5779                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5780                        "$src2, $src1", "$src1, $src2",
5781                    (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5782                           (i8 timm:$src2)))>,
5783                    Sched<[sched.Folded]>;
5784   }
5785 }
5786
5787 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5788                              string OpcodeStr, SDNode OpNode,
5789                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5790   let ExeDomain = _.ExeDomain in
5791   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5792                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5793       "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5794      (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5795      EVEX_B, Sched<[sched.Folded]>;
5796 }
5797
5798 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5799                             X86FoldableSchedWrite sched, ValueType SrcVT,
5800                             X86VectorVTInfo _> {
5801    // src2 is always 128-bit
5802   let ExeDomain = _.ExeDomain in {
5803   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5804                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5805                       "$src2, $src1", "$src1, $src2",
5806                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5807                    AVX512BIBase, EVEX_4V, Sched<[sched]>;
5808   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5809                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5810                        "$src2, $src1", "$src1, $src2",
5811                    (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5812                    AVX512BIBase,
5813                    EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5814   }
5815 }
5816
5817 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5818                               X86SchedWriteWidths sched, ValueType SrcVT,
5819                               AVX512VLVectorVTInfo VTInfo,
5820                               Predicate prd> {
5821   let Predicates = [prd] in
5822   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5823                                VTInfo.info512>, EVEX_V512,
5824                                EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5825   let Predicates = [prd, HasVLX] in {
5826   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5827                                VTInfo.info256>, EVEX_V256,
5828                                EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5829   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5830                                VTInfo.info128>, EVEX_V128,
5831                                EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5832   }
5833 }
5834
5835 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5836                               string OpcodeStr, SDNode OpNode,
5837                               X86SchedWriteWidths sched,
5838                               bit NotEVEX2VEXConvertibleQ = 0> {
5839   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5840                               avx512vl_i32_info, HasAVX512>;
5841   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5842   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5843                               avx512vl_i64_info, HasAVX512>, VEX_W;
5844   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5845                               avx512vl_i16_info, HasBWI>;
5846 }
5847
5848 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5849                                   string OpcodeStr, SDNode OpNode,
5850                                   X86SchedWriteWidths sched,
5851                                   AVX512VLVectorVTInfo VTInfo> {
5852   let Predicates = [HasAVX512] in
5853   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5854                               sched.ZMM, VTInfo.info512>,
5855              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5856                                VTInfo.info512>, EVEX_V512;
5857   let Predicates = [HasAVX512, HasVLX] in {
5858   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5859                               sched.YMM, VTInfo.info256>,
5860              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5861                                VTInfo.info256>, EVEX_V256;
5862   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5863                               sched.XMM, VTInfo.info128>,
5864              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5865                                VTInfo.info128>, EVEX_V128;
5866   }
5867 }
5868
5869 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5870                               string OpcodeStr, SDNode OpNode,
5871                               X86SchedWriteWidths sched> {
5872   let Predicates = [HasBWI] in
5873   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5874                                sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5875   let Predicates = [HasVLX, HasBWI] in {
5876   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5877                                sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5878   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5879                                sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5880   }
5881 }
5882
5883 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5884                                Format ImmFormR, Format ImmFormM,
5885                                string OpcodeStr, SDNode OpNode,
5886                                X86SchedWriteWidths sched,
5887                                bit NotEVEX2VEXConvertibleQ = 0> {
5888   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5889                                  sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5890   let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5891   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5892                                  sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5893 }
5894
5895 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5896                                  SchedWriteVecShiftImm>,
5897              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5898                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5899
5900 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5901                                  SchedWriteVecShiftImm>,
5902              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5903                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5904
5905 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5906                                  SchedWriteVecShiftImm, 1>,
5907              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5908                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5909
5910 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5911                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5912 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5913                                  SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5914
5915 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5916                                 SchedWriteVecShift>;
5917 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5918                                 SchedWriteVecShift, 1>;
5919 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5920                                 SchedWriteVecShift>;
5921
5922 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5923 let Predicates = [HasAVX512, NoVLX] in {
5924   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5925             (EXTRACT_SUBREG (v8i64
5926               (VPSRAQZrr
5927                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5928                  VR128X:$src2)), sub_ymm)>;
5929
5930   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5931             (EXTRACT_SUBREG (v8i64
5932               (VPSRAQZrr
5933                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5934                  VR128X:$src2)), sub_xmm)>;
5935
5936   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5937             (EXTRACT_SUBREG (v8i64
5938               (VPSRAQZri
5939                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5940                  timm:$src2)), sub_ymm)>;
5941
5942   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5943             (EXTRACT_SUBREG (v8i64
5944               (VPSRAQZri
5945                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5946                  timm:$src2)), sub_xmm)>;
5947 }
5948
5949 //===-------------------------------------------------------------------===//
5950 // Variable Bit Shifts
5951 //===-------------------------------------------------------------------===//
5952
5953 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5954                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5955   let ExeDomain = _.ExeDomain in {
5956   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5957                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5958                       "$src2, $src1", "$src1, $src2",
5959                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5960                    AVX5128IBase, EVEX_4V, Sched<[sched]>;
5961   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5962                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5963                        "$src2, $src1", "$src1, $src2",
5964                    (_.VT (OpNode _.RC:$src1,
5965                    (_.VT (_.LdFrag addr:$src2))))>,
5966                    AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5967                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5968   }
5969 }
5970
5971 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5972                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5973   let ExeDomain = _.ExeDomain in
5974   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5975                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5976                     "${src2}"##_.BroadcastStr##", $src1",
5977                     "$src1, ${src2}"##_.BroadcastStr,
5978                     (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5979                     AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5980                     Sched<[sched.Folded, sched.ReadAfterFold]>;
5981 }
5982
5983 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5984                                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5985   let Predicates  = [HasAVX512] in
5986   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5987            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5988
5989   let Predicates = [HasAVX512, HasVLX] in {
5990   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5991               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5992   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5993               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5994   }
5995 }
5996
5997 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5998                                   SDNode OpNode, X86SchedWriteWidths sched> {
5999   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6000                                  avx512vl_i32_info>;
6001   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6002                                  avx512vl_i64_info>, VEX_W;
6003 }
6004
6005 // Use 512bit version to implement 128/256 bit in case NoVLX.
6006 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6007                                      SDNode OpNode, list<Predicate> p> {
6008   let Predicates = p in {
6009   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6010                                   (_.info256.VT _.info256.RC:$src2))),
6011             (EXTRACT_SUBREG
6012                 (!cast<Instruction>(OpcodeStr#"Zrr")
6013                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6014                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6015              sub_ymm)>;
6016
6017   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6018                                   (_.info128.VT _.info128.RC:$src2))),
6019             (EXTRACT_SUBREG
6020                 (!cast<Instruction>(OpcodeStr#"Zrr")
6021                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6022                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6023              sub_xmm)>;
6024   }
6025 }
6026 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6027                               SDNode OpNode, X86SchedWriteWidths sched> {
6028   let Predicates = [HasBWI] in
6029   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6030               EVEX_V512, VEX_W;
6031   let Predicates = [HasVLX, HasBWI] in {
6032
6033   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6034               EVEX_V256, VEX_W;
6035   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6036               EVEX_V128, VEX_W;
6037   }
6038 }
6039
6040 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6041               avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6042
6043 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6044               avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6045
6046 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6047               avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6048
6049 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6050 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6051
6052 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6053 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6054 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6055 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6056
6057
6058 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6059 let Predicates = [HasAVX512, NoVLX] in {
6060   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6061             (EXTRACT_SUBREG (v8i64
6062               (VPROLVQZrr
6063                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6064                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6065                        sub_xmm)>;
6066   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6067             (EXTRACT_SUBREG (v8i64
6068               (VPROLVQZrr
6069                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6070                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6071                        sub_ymm)>;
6072
6073   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6074             (EXTRACT_SUBREG (v16i32
6075               (VPROLVDZrr
6076                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6077                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6078                         sub_xmm)>;
6079   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6080             (EXTRACT_SUBREG (v16i32
6081               (VPROLVDZrr
6082                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6083                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6084                         sub_ymm)>;
6085
6086   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6087             (EXTRACT_SUBREG (v8i64
6088               (VPROLQZri
6089                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6090                         timm:$src2)), sub_xmm)>;
6091   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6092             (EXTRACT_SUBREG (v8i64
6093               (VPROLQZri
6094                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6095                        timm:$src2)), sub_ymm)>;
6096
6097   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6098             (EXTRACT_SUBREG (v16i32
6099               (VPROLDZri
6100                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6101                         timm:$src2)), sub_xmm)>;
6102   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6103             (EXTRACT_SUBREG (v16i32
6104               (VPROLDZri
6105                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6106                         timm:$src2)), sub_ymm)>;
6107 }
6108
6109 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6110 let Predicates = [HasAVX512, NoVLX] in {
6111   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6112             (EXTRACT_SUBREG (v8i64
6113               (VPRORVQZrr
6114                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6115                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6116                        sub_xmm)>;
6117   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6118             (EXTRACT_SUBREG (v8i64
6119               (VPRORVQZrr
6120                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6121                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6122                        sub_ymm)>;
6123
6124   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6125             (EXTRACT_SUBREG (v16i32
6126               (VPRORVDZrr
6127                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6128                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6129                         sub_xmm)>;
6130   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6131             (EXTRACT_SUBREG (v16i32
6132               (VPRORVDZrr
6133                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6134                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6135                         sub_ymm)>;
6136
6137   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6138             (EXTRACT_SUBREG (v8i64
6139               (VPRORQZri
6140                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6141                         timm:$src2)), sub_xmm)>;
6142   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6143             (EXTRACT_SUBREG (v8i64
6144               (VPRORQZri
6145                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6146                        timm:$src2)), sub_ymm)>;
6147
6148   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6149             (EXTRACT_SUBREG (v16i32
6150               (VPRORDZri
6151                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6152                         timm:$src2)), sub_xmm)>;
6153   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6154             (EXTRACT_SUBREG (v16i32
6155               (VPRORDZri
6156                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6157                         timm:$src2)), sub_ymm)>;
6158 }
6159
6160 //===-------------------------------------------------------------------===//
6161 // 1-src variable permutation VPERMW/D/Q
6162 //===-------------------------------------------------------------------===//
6163
6164 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6165                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6166   let Predicates  = [HasAVX512] in
6167   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6168            avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6169
6170   let Predicates = [HasAVX512, HasVLX] in
6171   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6172               avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6173 }
6174
6175 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6176                                  string OpcodeStr, SDNode OpNode,
6177                                  X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6178   let Predicates = [HasAVX512] in
6179   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6180                               sched, VTInfo.info512>,
6181              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6182                                sched, VTInfo.info512>, EVEX_V512;
6183   let Predicates = [HasAVX512, HasVLX] in
6184   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6185                               sched, VTInfo.info256>,
6186              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6187                                sched, VTInfo.info256>, EVEX_V256;
6188 }
6189
6190 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6191                               Predicate prd, SDNode OpNode,
6192                               X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6193   let Predicates = [prd] in
6194   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6195               EVEX_V512 ;
6196   let Predicates = [HasVLX, prd] in {
6197   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6198               EVEX_V256 ;
6199   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6200               EVEX_V128 ;
6201   }
6202 }
6203
6204 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6205                                WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6206 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6207                                WriteVarShuffle256, avx512vl_i8_info>;
6208
6209 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6210                                     WriteVarShuffle256, avx512vl_i32_info>;
6211 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6212                                     WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6213 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6214                                      WriteFVarShuffle256, avx512vl_f32_info>;
6215 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6216                                      WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6217
6218 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6219                              X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6220                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6221 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6222                              X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6223                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6224
6225 //===----------------------------------------------------------------------===//
6226 // AVX-512 - VPERMIL
6227 //===----------------------------------------------------------------------===//
6228
6229 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6230                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
6231                              X86VectorVTInfo Ctrl> {
6232   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6233                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6234                   "$src2, $src1", "$src1, $src2",
6235                   (_.VT (OpNode _.RC:$src1,
6236                                (Ctrl.VT Ctrl.RC:$src2)))>,
6237                   T8PD, EVEX_4V, Sched<[sched]>;
6238   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6239                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6240                   "$src2, $src1", "$src1, $src2",
6241                   (_.VT (OpNode
6242                            _.RC:$src1,
6243                            (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6244                   T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6245                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6246   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6247                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6248                    "${src2}"##_.BroadcastStr##", $src1",
6249                    "$src1, ${src2}"##_.BroadcastStr,
6250                    (_.VT (OpNode
6251                             _.RC:$src1,
6252                             (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6253                    T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6254                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6255 }
6256
6257 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6258                                     X86SchedWriteWidths sched,
6259                                     AVX512VLVectorVTInfo _,
6260                                     AVX512VLVectorVTInfo Ctrl> {
6261   let Predicates = [HasAVX512] in {
6262     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6263                                   _.info512, Ctrl.info512>, EVEX_V512;
6264   }
6265   let Predicates = [HasAVX512, HasVLX] in {
6266     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6267                                   _.info128, Ctrl.info128>, EVEX_V128;
6268     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6269                                   _.info256, Ctrl.info256>, EVEX_V256;
6270   }
6271 }
6272
6273 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6274                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6275   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6276                                       _, Ctrl>;
6277   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6278                                     X86VPermilpi, SchedWriteFShuffle, _>,
6279                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6280 }
6281
6282 let ExeDomain = SSEPackedSingle in
6283 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6284                                avx512vl_i32_info>;
6285 let ExeDomain = SSEPackedDouble in
6286 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6287                                avx512vl_i64_info>, VEX_W1X;
6288
6289 //===----------------------------------------------------------------------===//
6290 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6291 //===----------------------------------------------------------------------===//
6292
6293 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6294                              X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6295                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6296 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6297                                   X86PShufhw, SchedWriteShuffle>,
6298                                   EVEX, AVX512XSIi8Base;
6299 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6300                                   X86PShuflw, SchedWriteShuffle>,
6301                                   EVEX, AVX512XDIi8Base;
6302
6303 //===----------------------------------------------------------------------===//
6304 // AVX-512 - VPSHUFB
6305 //===----------------------------------------------------------------------===//
6306
6307 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6308                                X86SchedWriteWidths sched> {
6309   let Predicates = [HasBWI] in
6310   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6311                               EVEX_V512;
6312
6313   let Predicates = [HasVLX, HasBWI] in {
6314   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6315                               EVEX_V256;
6316   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6317                               EVEX_V128;
6318   }
6319 }
6320
6321 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6322                                   SchedWriteVarShuffle>, VEX_WIG;
6323
6324 //===----------------------------------------------------------------------===//
6325 // Move Low to High and High to Low packed FP Instructions
6326 //===----------------------------------------------------------------------===//
6327
6328 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6329           (ins VR128X:$src1, VR128X:$src2),
6330           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6331           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6332           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6333 let isCommutable = 1 in
6334 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6335           (ins VR128X:$src1, VR128X:$src2),
6336           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6337           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6338           Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6339
6340 //===----------------------------------------------------------------------===//
6341 // VMOVHPS/PD VMOVLPS Instructions
6342 // All patterns was taken from SSS implementation.
6343 //===----------------------------------------------------------------------===//
6344
6345 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6346                                   SDPatternOperator OpNode,
6347                                   X86VectorVTInfo _> {
6348   let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6349   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6350                   (ins _.RC:$src1, f64mem:$src2),
6351                   !strconcat(OpcodeStr,
6352                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6353                   [(set _.RC:$dst,
6354                      (OpNode _.RC:$src1,
6355                        (_.VT (bitconvert
6356                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6357                   Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6358 }
6359
6360 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6361 // SSE1. And MOVLPS pattern is even more complex.
6362 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6363                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6364 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6365                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6366 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6367                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6368 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6369                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6370
6371 let Predicates = [HasAVX512] in {
6372   // VMOVHPD patterns
6373   def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6374                     (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6375            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6376   def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6377             (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6378
6379   // VMOVLPD patterns
6380   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6381             (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6382 }
6383
6384 let SchedRW = [WriteFStore] in {
6385 let mayStore = 1, hasSideEffects = 0 in
6386 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6387                        (ins f64mem:$dst, VR128X:$src),
6388                        "vmovhps\t{$src, $dst|$dst, $src}",
6389                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6390 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6391                        (ins f64mem:$dst, VR128X:$src),
6392                        "vmovhpd\t{$src, $dst|$dst, $src}",
6393                        [(store (f64 (extractelt
6394                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6395                                      (iPTR 0))), addr:$dst)]>,
6396                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6397 let mayStore = 1, hasSideEffects = 0 in
6398 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6399                        (ins f64mem:$dst, VR128X:$src),
6400                        "vmovlps\t{$src, $dst|$dst, $src}",
6401                        []>, EVEX, EVEX_CD8<32, CD8VT2>;
6402 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6403                        (ins f64mem:$dst, VR128X:$src),
6404                        "vmovlpd\t{$src, $dst|$dst, $src}",
6405                        [(store (f64 (extractelt (v2f64 VR128X:$src),
6406                                      (iPTR 0))), addr:$dst)]>,
6407                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6408 } // SchedRW
6409
6410 let Predicates = [HasAVX512] in {
6411   // VMOVHPD patterns
6412   def : Pat<(store (f64 (extractelt
6413                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6414                            (iPTR 0))), addr:$dst),
6415            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6416 }
6417 //===----------------------------------------------------------------------===//
6418 // FMA - Fused Multiply Operations
6419 //
6420
6421 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6422                                X86FoldableSchedWrite sched,
6423                                X86VectorVTInfo _, string Suff> {
6424   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6425       Uses = [MXCSR], mayRaiseFPException = 1 in {
6426   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6427           (ins _.RC:$src2, _.RC:$src3),
6428           OpcodeStr, "$src3, $src2", "$src2, $src3",
6429           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6430           AVX512FMA3Base, Sched<[sched]>;
6431
6432   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6433           (ins _.RC:$src2, _.MemOp:$src3),
6434           OpcodeStr, "$src3, $src2", "$src2, $src3",
6435           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6436           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6437
6438   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6439             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6440             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6441             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6442             (OpNode _.RC:$src2,
6443              _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6444              AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6445   }
6446 }
6447
6448 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6449                                  X86FoldableSchedWrite sched,
6450                                  X86VectorVTInfo _, string Suff> {
6451   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6452       Uses = [MXCSR] in
6453   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6454           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6455           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6456           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6457           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6458 }
6459
6460 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6461                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6462                                    AVX512VLVectorVTInfo _, string Suff> {
6463   let Predicates = [HasAVX512] in {
6464     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6465                                       _.info512, Suff>,
6466                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6467                                         _.info512, Suff>,
6468                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6469   }
6470   let Predicates = [HasVLX, HasAVX512] in {
6471     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6472                                     _.info256, Suff>,
6473                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6474     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6475                                     _.info128, Suff>,
6476                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6477   }
6478 }
6479
6480 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6481                               SDNode OpNodeRnd> {
6482     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6483                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6484     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6485                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6486                                       VEX_W;
6487 }
6488
6489 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, X86FmaddRnd>;
6490 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6491 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6492 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6493 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6494 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6495
6496
6497 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6498                                X86FoldableSchedWrite sched,
6499                                X86VectorVTInfo _, string Suff> {
6500   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6501       Uses = [MXCSR], mayRaiseFPException = 1 in {
6502   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6503           (ins _.RC:$src2, _.RC:$src3),
6504           OpcodeStr, "$src3, $src2", "$src2, $src3",
6505           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6506           vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6507
6508   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6509           (ins _.RC:$src2, _.MemOp:$src3),
6510           OpcodeStr, "$src3, $src2", "$src2, $src3",
6511           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6512           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6513
6514   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6515          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6516          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6517          "$src2, ${src3}"##_.BroadcastStr,
6518          (_.VT (OpNode _.RC:$src2,
6519                       (_.VT (_.BroadcastLdFrag addr:$src3)),
6520                       _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6521          Sched<[sched.Folded, sched.ReadAfterFold]>;
6522   }
6523 }
6524
6525 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6526                                  X86FoldableSchedWrite sched,
6527                                  X86VectorVTInfo _, string Suff> {
6528   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6529       Uses = [MXCSR] in
6530   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6531           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6532           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6533           (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6534           1, 1, vselect, 1>,
6535           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6536 }
6537
6538 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6539                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6540                                    AVX512VLVectorVTInfo _, string Suff> {
6541   let Predicates = [HasAVX512] in {
6542     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6543                                       _.info512, Suff>,
6544                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6545                                         _.info512, Suff>,
6546                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6547   }
6548   let Predicates = [HasVLX, HasAVX512] in {
6549     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6550                                     _.info256, Suff>,
6551                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6552     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6553                                     _.info128, Suff>,
6554                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6555   }
6556 }
6557
6558 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6559                               SDNode OpNodeRnd > {
6560     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6561                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6562     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6563                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6564                                       VEX_W;
6565 }
6566
6567 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, X86FmaddRnd>;
6568 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6569 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6570 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6571 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6572 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6573
6574 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6575                                X86FoldableSchedWrite sched,
6576                                X86VectorVTInfo _, string Suff> {
6577   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6578       Uses = [MXCSR], mayRaiseFPException = 1 in {
6579   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6580           (ins _.RC:$src2, _.RC:$src3),
6581           OpcodeStr, "$src3, $src2", "$src2, $src3",
6582           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6583           AVX512FMA3Base, Sched<[sched]>;
6584
6585   // Pattern is 312 order so that the load is in a different place from the
6586   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6587   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6588           (ins _.RC:$src2, _.MemOp:$src3),
6589           OpcodeStr, "$src3, $src2", "$src2, $src3",
6590           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6591           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6592
6593   // Pattern is 312 order so that the load is in a different place from the
6594   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6595   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6596          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6597          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6598          "$src2, ${src3}"##_.BroadcastStr,
6599          (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6600                        _.RC:$src1, _.RC:$src2)), 1, 0>,
6601          AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6602   }
6603 }
6604
6605 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6606                                  X86FoldableSchedWrite sched,
6607                                  X86VectorVTInfo _, string Suff> {
6608   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6609       Uses = [MXCSR] in
6610   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6611           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6612           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6613           (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6614           1, 1, vselect, 1>,
6615           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6616 }
6617
6618 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6619                                    SDNode OpNodeRnd, X86SchedWriteWidths sched,
6620                                    AVX512VLVectorVTInfo _, string Suff> {
6621   let Predicates = [HasAVX512] in {
6622     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6623                                       _.info512, Suff>,
6624                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6625                                         _.info512, Suff>,
6626                               EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6627   }
6628   let Predicates = [HasVLX, HasAVX512] in {
6629     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6630                                     _.info256, Suff>,
6631                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6632     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6633                                     _.info128, Suff>,
6634                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6635   }
6636 }
6637
6638 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6639                               SDNode OpNodeRnd > {
6640     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6641                                       SchedWriteFMA, avx512vl_f32_info, "PS">;
6642     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6643                                       SchedWriteFMA, avx512vl_f64_info, "PD">,
6644                                       VEX_W;
6645 }
6646
6647 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, X86FmaddRnd>;
6648 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6649 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6650 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6651 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6652 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6653
6654 // Scalar FMA
6655 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6656                                dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6657 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6658   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6659           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6660           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6661           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6662
6663   let mayLoad = 1 in
6664   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6665           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6666           "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6667           AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6668
6669   let Uses = [MXCSR] in
6670   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6671          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6672          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6673          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6674
6675   let isCodeGenOnly = 1, isCommutable = 1 in {
6676     def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6677                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6678                      !strconcat(OpcodeStr,
6679                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6680                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6681     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6682                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6683                     !strconcat(OpcodeStr,
6684                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6685                     [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6686
6687     let Uses = [MXCSR] in
6688     def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6689                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6690                      !strconcat(OpcodeStr,
6691                               "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6692                      !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6693                      Sched<[SchedWriteFMA.Scl]>;
6694   }// isCodeGenOnly = 1
6695 }// Constraints = "$src1 = $dst"
6696 }
6697
6698 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6699                             string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6700                             X86VectorVTInfo _, string SUFF> {
6701   let ExeDomain = _.ExeDomain in {
6702   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6703                 // Operands for intrinsic are in 123 order to preserve passthu
6704                 // semantics.
6705                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6706                          _.FRC:$src3))),
6707                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6708                          (_.ScalarLdFrag addr:$src3)))),
6709                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6710                          _.FRC:$src3, (i32 timm:$rc)))), 0>;
6711
6712   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6713                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6714                                           _.FRC:$src1))),
6715                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6716                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6717                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6718                          _.FRC:$src1, (i32 timm:$rc)))), 1>;
6719
6720   // One pattern is 312 order so that the load is in a different place from the
6721   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6722   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6723                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6724                          _.FRC:$src2))),
6725                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6726                                  _.FRC:$src1, _.FRC:$src2))),
6727                 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6728                          _.FRC:$src2, (i32 timm:$rc)))), 1>;
6729   }
6730 }
6731
6732 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6733                         string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6734   let Predicates = [HasAVX512] in {
6735     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6736                                  OpNodeRnd, f32x_info, "SS">,
6737                                  EVEX_CD8<32, CD8VT1>, VEX_LIG;
6738     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6739                                  OpNodeRnd, f64x_info, "SD">,
6740                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6741   }
6742 }
6743
6744 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
6745 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6746 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6747 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6748
6749 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6750                                       string Suffix, SDNode Move,
6751                                       X86VectorVTInfo _, PatLeaf ZeroFP> {
6752   let Predicates = [HasAVX512] in {
6753     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6754                 (Op _.FRC:$src2,
6755                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6756                     _.FRC:$src3))))),
6757               (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6758                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6759                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6760
6761     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6762                 (Op _.FRC:$src2, _.FRC:$src3,
6763                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6764               (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6765                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6766                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6767
6768     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6769                 (Op _.FRC:$src2,
6770                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6771                     (_.ScalarLdFrag addr:$src3)))))),
6772               (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6773                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6774                addr:$src3)>;
6775
6776     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6777                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6778                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6779               (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6780                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6781                addr:$src3)>;
6782
6783     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6784                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6785                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6786               (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6787                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6788                addr:$src3)>;
6789
6790     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6791                (X86selects VK1WM:$mask,
6792                 (Op _.FRC:$src2,
6793                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6794                     _.FRC:$src3),
6795                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6796               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6797                VR128X:$src1, VK1WM:$mask,
6798                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6799                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6800
6801     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6802                (X86selects VK1WM:$mask,
6803                 (Op _.FRC:$src2,
6804                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6805                     (_.ScalarLdFrag addr:$src3)),
6806                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6807               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6808                VR128X:$src1, VK1WM:$mask,
6809                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6810
6811     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6812                (X86selects VK1WM:$mask,
6813                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6814                     (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6815                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6816               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6817                VR128X:$src1, VK1WM:$mask,
6818                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6819
6820     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6821                (X86selects VK1WM:$mask,
6822                 (Op _.FRC:$src2, _.FRC:$src3,
6823                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6824                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6825               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6826                VR128X:$src1, VK1WM:$mask,
6827                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6828                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6829
6830     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6831                (X86selects VK1WM:$mask,
6832                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6833                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6834                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6835               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6836                VR128X:$src1, VK1WM:$mask,
6837                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6838
6839     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6840                (X86selects VK1WM:$mask,
6841                 (Op _.FRC:$src2,
6842                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6843                     _.FRC:$src3),
6844                 (_.EltVT ZeroFP)))))),
6845               (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6846                VR128X:$src1, VK1WM:$mask,
6847                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6848                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6849
6850     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6851                (X86selects VK1WM:$mask,
6852                 (Op _.FRC:$src2, _.FRC:$src3,
6853                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6854                 (_.EltVT ZeroFP)))))),
6855               (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6856                VR128X:$src1, VK1WM:$mask,
6857                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6858                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6859
6860     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6861                (X86selects VK1WM:$mask,
6862                 (Op _.FRC:$src2,
6863                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6864                     (_.ScalarLdFrag addr:$src3)),
6865                 (_.EltVT ZeroFP)))))),
6866               (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6867                VR128X:$src1, VK1WM:$mask,
6868                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6869
6870     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6871                (X86selects VK1WM:$mask,
6872                 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6873                     _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6874                 (_.EltVT ZeroFP)))))),
6875               (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6876                VR128X:$src1, VK1WM:$mask,
6877                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6878
6879     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6880                (X86selects VK1WM:$mask,
6881                 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6882                     (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6883                 (_.EltVT ZeroFP)))))),
6884               (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6885                VR128X:$src1, VK1WM:$mask,
6886                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6887
6888     // Patterns with rounding mode.
6889     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6890                 (RndOp _.FRC:$src2,
6891                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6892                        _.FRC:$src3, (i32 timm:$rc)))))),
6893               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6894                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6895                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6896
6897     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6898                 (RndOp _.FRC:$src2, _.FRC:$src3,
6899                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6900                        (i32 timm:$rc)))))),
6901               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6902                VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6903                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6904
6905     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6906                (X86selects VK1WM:$mask,
6907                 (RndOp _.FRC:$src2,
6908                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6909                        _.FRC:$src3, (i32 timm:$rc)),
6910                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6911               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6912                VR128X:$src1, VK1WM:$mask,
6913                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6914                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6915
6916     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6917                (X86selects VK1WM:$mask,
6918                 (RndOp _.FRC:$src2, _.FRC:$src3,
6919                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6920                        (i32 timm:$rc)),
6921                 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6922               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6923                VR128X:$src1, VK1WM:$mask,
6924                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6925                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6926
6927     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6928                (X86selects VK1WM:$mask,
6929                 (RndOp _.FRC:$src2,
6930                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6931                        _.FRC:$src3, (i32 timm:$rc)),
6932                 (_.EltVT ZeroFP)))))),
6933               (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6934                VR128X:$src1, VK1WM:$mask,
6935                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6936                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6937
6938     def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6939                (X86selects VK1WM:$mask,
6940                 (RndOp _.FRC:$src2, _.FRC:$src3,
6941                        (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942                        (i32 timm:$rc)),
6943                 (_.EltVT ZeroFP)))))),
6944               (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6945                VR128X:$src1, VK1WM:$mask,
6946                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6947                (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6948   }
6949 }
6950
6951 defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS",
6952                                   X86Movss, v4f32x_info, fp32imm0>;
6953 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6954                                   X86Movss, v4f32x_info, fp32imm0>;
6955 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6956                                   X86Movss, v4f32x_info, fp32imm0>;
6957 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6958                                   X86Movss, v4f32x_info, fp32imm0>;
6959
6960 defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD",
6961                                   X86Movsd, v2f64x_info, fp64imm0>;
6962 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6963                                   X86Movsd, v2f64x_info, fp64imm0>;
6964 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6965                                   X86Movsd, v2f64x_info, fp64imm0>;
6966 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6967                                   X86Movsd, v2f64x_info, fp64imm0>;
6968
6969 //===----------------------------------------------------------------------===//
6970 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6971 //===----------------------------------------------------------------------===//
6972 let Constraints = "$src1 = $dst" in {
6973 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6974                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6975   // NOTE: The SDNode have the multiply operands first with the add last.
6976   // This enables commuted load patterns to be autogenerated by tablegen.
6977   let ExeDomain = _.ExeDomain in {
6978   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6979           (ins _.RC:$src2, _.RC:$src3),
6980           OpcodeStr, "$src3, $src2", "$src2, $src3",
6981           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6982          AVX512FMA3Base, Sched<[sched]>;
6983
6984   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6985           (ins _.RC:$src2, _.MemOp:$src3),
6986           OpcodeStr, "$src3, $src2", "$src2, $src3",
6987           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6988           AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6989
6990   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6991             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6992             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6993             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6994             (OpNode _.RC:$src2,
6995                     (_.VT (_.BroadcastLdFrag addr:$src3)),
6996                     _.RC:$src1)>,
6997             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6998   }
6999 }
7000 } // Constraints = "$src1 = $dst"
7001
7002 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7003                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7004   let Predicates = [HasIFMA] in {
7005     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7006                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7007   }
7008   let Predicates = [HasVLX, HasIFMA] in {
7009     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7010                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7011     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7012                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7013   }
7014 }
7015
7016 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7017                                          SchedWriteVecIMul, avx512vl_i64_info>,
7018                                          VEX_W;
7019 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7020                                          SchedWriteVecIMul, avx512vl_i64_info>,
7021                                          VEX_W;
7022
7023 //===----------------------------------------------------------------------===//
7024 // AVX-512  Scalar convert from sign integer to float/double
7025 //===----------------------------------------------------------------------===//
7026
7027 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7028                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
7029                     X86MemOperand x86memop, PatFrag ld_frag, string asm,
7030                     string mem, list<Register> _Uses = [MXCSR],
7031                     bit _mayRaiseFPException = 1> {
7032 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7033     mayRaiseFPException = _mayRaiseFPException in {
7034   let hasSideEffects = 0, isCodeGenOnly = 1 in {
7035     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7036               (ins DstVT.FRC:$src1, SrcRC:$src),
7037               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7038               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7039     let mayLoad = 1 in
7040       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7041               (ins DstVT.FRC:$src1, x86memop:$src),
7042               asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7043               EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7044   } // hasSideEffects = 0
7045   def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7046                 (ins DstVT.RC:$src1, SrcRC:$src2),
7047                 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7048                 [(set DstVT.RC:$dst,
7049                       (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7050                EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7051
7052   def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7053                 (ins DstVT.RC:$src1, x86memop:$src2),
7054                 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7055                 [(set DstVT.RC:$dst,
7056                       (OpNode (DstVT.VT DstVT.RC:$src1),
7057                                (ld_frag addr:$src2)))]>,
7058                 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7059 }
7060   def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7061                   (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7062                   DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7063 }
7064
7065 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7066                                X86FoldableSchedWrite sched, RegisterClass SrcRC,
7067                                X86VectorVTInfo DstVT, string asm,
7068                                string mem> {
7069   let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7070   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7071               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7072               !strconcat(asm,
7073                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7074               [(set DstVT.RC:$dst,
7075                     (OpNode (DstVT.VT DstVT.RC:$src1),
7076                              SrcRC:$src2,
7077                              (i32 timm:$rc)))]>,
7078               EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7079   def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7080                   (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7081                   DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7082 }
7083
7084 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7085                                 X86FoldableSchedWrite sched,
7086                                 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7087                                 X86MemOperand x86memop, PatFrag ld_frag,
7088                                 string asm, string mem> {
7089   defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7090               avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7091                             ld_frag, asm, mem>, VEX_LIG;
7092 }
7093
7094 let Predicates = [HasAVX512] in {
7095 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7096                                  WriteCvtI2SS, GR32,
7097                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7098                                  XS, EVEX_CD8<32, CD8VT1>;
7099 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7100                                  WriteCvtI2SS, GR64,
7101                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7102                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7103 defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7104                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7105                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7106 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7107                                  WriteCvtI2SD, GR64,
7108                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7109                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7110
7111 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7112               (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7113 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7114               (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7115
7116 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7117           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7118 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7119           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7120 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7121           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7122 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7123           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7124
7125 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7126           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7127 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7128           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7129 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7130           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7131 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7132           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7133
7134 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7135                                   WriteCvtI2SS, GR32,
7136                                   v4f32x_info, i32mem, loadi32,
7137                                   "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7138 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7139                                   WriteCvtI2SS, GR64,
7140                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7141                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7142 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7143                                   i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7144                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7145 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7146                                   WriteCvtI2SD, GR64,
7147                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7148                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7149
7150 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7151               (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7152 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7153               (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7154
7155 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7156           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7157 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7158           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7159 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7160           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7161 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7162           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7163
7164 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7165           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7166 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7167           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7168 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7169           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7170 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7171           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7172 }
7173
7174 //===----------------------------------------------------------------------===//
7175 // AVX-512  Scalar convert from float/double to integer
7176 //===----------------------------------------------------------------------===//
7177
7178 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7179                                   X86VectorVTInfo DstVT, SDNode OpNode,
7180                                   SDNode OpNodeRnd,
7181                                   X86FoldableSchedWrite sched, string asm,
7182                                   string aliasStr> {
7183   let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7184     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7185                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7186                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7187                 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7188     let Uses = [MXCSR] in
7189     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7190                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7191                  [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7192                  EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7193                  Sched<[sched]>;
7194     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7195                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7196                 [(set DstVT.RC:$dst, (OpNode
7197                       (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7198                 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7199   } // Predicates = [HasAVX512]
7200
7201   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7202           (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7203   def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7204           (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7205   def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7206           (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7207                                           SrcVT.IntScalarMemOp:$src), 0, "att">;
7208 }
7209
7210 // Convert float/double to signed/unsigned int 32/64
7211 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7212                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7213                                    XS, EVEX_CD8<32, CD8VT1>;
7214 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7215                                    X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7216                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7217 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7218                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7219                                    XS, EVEX_CD8<32, CD8VT1>;
7220 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7221                                    X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7222                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7223 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7224                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7225                                    XD, EVEX_CD8<64, CD8VT1>;
7226 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7227                                    X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7228                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7229 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7230                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7231                                    XD, EVEX_CD8<64, CD8VT1>;
7232 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7233                                    X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7234                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7235
7236 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7237 // which produce unnecessary vmovs{s,d} instructions
7238 let Predicates = [HasAVX512] in {
7239 def : Pat<(v4f32 (X86Movss
7240                    (v4f32 VR128X:$dst),
7241                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7242           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7243
7244 def : Pat<(v4f32 (X86Movss
7245                    (v4f32 VR128X:$dst),
7246                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7247           (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7248
7249 def : Pat<(v4f32 (X86Movss
7250                    (v4f32 VR128X:$dst),
7251                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7252           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7253
7254 def : Pat<(v4f32 (X86Movss
7255                    (v4f32 VR128X:$dst),
7256                    (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7257           (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7258
7259 def : Pat<(v2f64 (X86Movsd
7260                    (v2f64 VR128X:$dst),
7261                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7262           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7263
7264 def : Pat<(v2f64 (X86Movsd
7265                    (v2f64 VR128X:$dst),
7266                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7267           (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7268
7269 def : Pat<(v2f64 (X86Movsd
7270                    (v2f64 VR128X:$dst),
7271                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7272           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7273
7274 def : Pat<(v2f64 (X86Movsd
7275                    (v2f64 VR128X:$dst),
7276                    (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7277           (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7278
7279 def : Pat<(v4f32 (X86Movss
7280                    (v4f32 VR128X:$dst),
7281                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7282           (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7283
7284 def : Pat<(v4f32 (X86Movss
7285                    (v4f32 VR128X:$dst),
7286                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7287           (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7288
7289 def : Pat<(v4f32 (X86Movss
7290                    (v4f32 VR128X:$dst),
7291                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7292           (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7293
7294 def : Pat<(v4f32 (X86Movss
7295                    (v4f32 VR128X:$dst),
7296                    (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7297           (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7298
7299 def : Pat<(v2f64 (X86Movsd
7300                    (v2f64 VR128X:$dst),
7301                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7302           (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7303
7304 def : Pat<(v2f64 (X86Movsd
7305                    (v2f64 VR128X:$dst),
7306                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7307           (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7308
7309 def : Pat<(v2f64 (X86Movsd
7310                    (v2f64 VR128X:$dst),
7311                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7312           (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7313
7314 def : Pat<(v2f64 (X86Movsd
7315                    (v2f64 VR128X:$dst),
7316                    (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7317           (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7318 } // Predicates = [HasAVX512]
7319
7320 // Convert float/double to signed/unsigned int 32/64 with truncation
7321 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7322                             X86VectorVTInfo _DstRC, SDNode OpNode,
7323                             SDNode OpNodeInt, SDNode OpNodeSAE,
7324                             X86FoldableSchedWrite sched, string aliasStr>{
7325 let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
7326   let isCodeGenOnly = 1 in {
7327   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7328               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7329               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7330               EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7331   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7332               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7333               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7334               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7335   }
7336
7337   def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7338             !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7339            [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7340            EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7341   let Uses = [MXCSR] in
7342   def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7343             !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7344             [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7345                                   EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7346   def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7347               (ins _SrcRC.IntScalarMemOp:$src),
7348               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7349               [(set _DstRC.RC:$dst,
7350                 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7351               EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7352 } //HasAVX512
7353
7354   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7355           (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7356   def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7357           (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7358   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7359           (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7360                                           _SrcRC.IntScalarMemOp:$src), 0, "att">;
7361 }
7362
7363 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7364                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7365                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7366 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7367                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7368                         "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7369 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7370                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7371                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7372 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7373                         any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7374                         "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7375
7376 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7377                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7378                         "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7379 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7380                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7381                         "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7382 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7383                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7384                         "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7385 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7386                         any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7387                         "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7388
7389 //===----------------------------------------------------------------------===//
7390 // AVX-512  Convert form float to double and back
7391 //===----------------------------------------------------------------------===//
7392
7393 let Uses = [MXCSR], mayRaiseFPException = 1 in
7394 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7395                                 X86VectorVTInfo _Src, SDNode OpNode,
7396                                 X86FoldableSchedWrite sched> {
7397   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7398                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7399                          "$src2, $src1", "$src1, $src2",
7400                          (_.VT (OpNode (_.VT _.RC:$src1),
7401                                        (_Src.VT _Src.RC:$src2)))>,
7402                          EVEX_4V, VEX_LIG, Sched<[sched]>;
7403   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7404                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7405                          "$src2, $src1", "$src1, $src2",
7406                          (_.VT (OpNode (_.VT _.RC:$src1),
7407                                   (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7408                          EVEX_4V, VEX_LIG,
7409                          Sched<[sched.Folded, sched.ReadAfterFold]>;
7410
7411   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7412     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7413                (ins _.FRC:$src1, _Src.FRC:$src2),
7414                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7415                EVEX_4V, VEX_LIG, Sched<[sched]>;
7416     let mayLoad = 1 in
7417     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7418                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7419                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7420                EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7421   }
7422 }
7423
7424 // Scalar Coversion with SAE - suppress all exceptions
7425 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7426                                     X86VectorVTInfo _Src, SDNode OpNodeSAE,
7427                                     X86FoldableSchedWrite sched> {
7428   let Uses = [MXCSR] in
7429   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7430                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7431                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7432                         (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7433                                          (_Src.VT _Src.RC:$src2)))>,
7434                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7435 }
7436
7437 // Scalar Conversion with rounding control (RC)
7438 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7439                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7440                                    X86FoldableSchedWrite sched> {
7441   let Uses = [MXCSR] in
7442   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7443                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7444                         "$rc, $src2, $src1", "$src1, $src2, $rc",
7445                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7446                                          (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7447                         EVEX_4V, VEX_LIG, Sched<[sched]>,
7448                         EVEX_B, EVEX_RC;
7449 }
7450 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7451                                       SDNode OpNode, SDNode OpNodeRnd,
7452                                       X86FoldableSchedWrite sched,
7453                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7454   let Predicates = [HasAVX512] in {
7455     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7456              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7457                                OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7458   }
7459 }
7460
7461 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7462                                       SDNode OpNode, SDNode OpNodeSAE,
7463                                       X86FoldableSchedWrite sched,
7464                                       X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7465   let Predicates = [HasAVX512] in {
7466     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7467              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7468              EVEX_CD8<32, CD8VT1>, XS;
7469   }
7470 }
7471 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7472                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7473                                          f32x_info>;
7474 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7475                                           X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7476                                           f64x_info>;
7477
7478 def : Pat<(f64 (any_fpextend FR32X:$src)),
7479           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7480           Requires<[HasAVX512]>;
7481 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7482           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7483           Requires<[HasAVX512, OptForSize]>;
7484
7485 def : Pat<(f32 (any_fpround FR64X:$src)),
7486           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7487            Requires<[HasAVX512]>;
7488
7489 def : Pat<(v4f32 (X86Movss
7490                    (v4f32 VR128X:$dst),
7491                    (v4f32 (scalar_to_vector
7492                      (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7493           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7494           Requires<[HasAVX512]>;
7495
7496 def : Pat<(v2f64 (X86Movsd
7497                    (v2f64 VR128X:$dst),
7498                    (v2f64 (scalar_to_vector
7499                      (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7500           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7501           Requires<[HasAVX512]>;
7502
7503 //===----------------------------------------------------------------------===//
7504 // AVX-512  Vector convert from signed/unsigned integer to float/double
7505 //          and from float/double to signed/unsigned integer
7506 //===----------------------------------------------------------------------===//
7507
7508 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7509                           X86VectorVTInfo _Src, SDNode OpNode,
7510                           X86FoldableSchedWrite sched,
7511                           string Broadcast = _.BroadcastStr,
7512                           string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7513                           RegisterClass MaskRC = _.KRCWM,
7514                           dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7515 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7516   defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7517                          (ins _Src.RC:$src),
7518                          (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7519                          (ins MaskRC:$mask, _Src.RC:$src),
7520                           OpcodeStr, "$src", "$src",
7521                          (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7522                          (vselect MaskRC:$mask,
7523                                   (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7524                                   _.RC:$src0),
7525                          vselect, "$src0 = $dst">,
7526                          EVEX, Sched<[sched]>;
7527
7528   defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7529                          (ins MemOp:$src),
7530                          (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7531                          (ins MaskRC:$mask, MemOp:$src),
7532                          OpcodeStr#Alias, "$src", "$src",
7533                          LdDAG,
7534                          (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7535                          vselect, "$src0 = $dst">,
7536                          EVEX, Sched<[sched.Folded]>;
7537
7538   defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7539                          (ins _Src.ScalarMemOp:$src),
7540                          (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7541                          (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7542                          OpcodeStr,
7543                          "${src}"##Broadcast, "${src}"##Broadcast,
7544                          (_.VT (OpNode (_Src.VT
7545                                   (_Src.BroadcastLdFrag addr:$src))
7546                             )),
7547                          (vselect MaskRC:$mask,
7548                                   (_.VT
7549                                    (OpNode
7550                                     (_Src.VT
7551                                      (_Src.BroadcastLdFrag addr:$src)))),
7552                                   _.RC:$src0),
7553                          vselect, "$src0 = $dst">,
7554                          EVEX, EVEX_B, Sched<[sched.Folded]>;
7555   }
7556 }
7557 // Coversion with SAE - suppress all exceptions
7558 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7559                               X86VectorVTInfo _Src, SDNode OpNodeSAE,
7560                               X86FoldableSchedWrite sched> {
7561   let Uses = [MXCSR] in
7562   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7563                         (ins _Src.RC:$src), OpcodeStr,
7564                         "{sae}, $src", "$src, {sae}",
7565                         (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7566                         EVEX, EVEX_B, Sched<[sched]>;
7567 }
7568
7569 // Conversion with rounding control (RC)
7570 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7571                          X86VectorVTInfo _Src, SDNode OpNodeRnd,
7572                          X86FoldableSchedWrite sched> {
7573   let Uses = [MXCSR] in
7574   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7575                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7576                         "$rc, $src", "$src, $rc",
7577                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7578                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7579 }
7580
7581 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7582 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7583                                 X86VectorVTInfo _Src, SDNode OpNode,
7584                                 X86FoldableSchedWrite sched,
7585                                 string Broadcast = _.BroadcastStr,
7586                                 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7587                                 RegisterClass MaskRC = _.KRCWM>
7588   : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7589                    MemOp, MaskRC,
7590                    (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7591
7592 // Extend Float to Double
7593 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7594                            X86SchedWriteWidths sched> {
7595   let Predicates = [HasAVX512] in {
7596     defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7597                             any_fpextend, sched.ZMM>,
7598              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7599                                 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7600   }
7601   let Predicates = [HasVLX] in {
7602     defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7603                                X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7604     defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend,
7605                                sched.YMM>, EVEX_V256;
7606   }
7607 }
7608
7609 // Truncate Double to Float
7610 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7611   let Predicates = [HasAVX512] in {
7612     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86any_vfpround, sched.ZMM>,
7613              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7614                                X86vfproundRnd, sched.ZMM>, EVEX_V512;
7615   }
7616   let Predicates = [HasVLX] in {
7617     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7618                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7619                                EVEX_V128;
7620     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86any_vfpround,
7621                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7622   }
7623
7624   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7625                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7626   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7627                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7628                   VK2WM:$mask, VR128X:$src), 0, "att">;
7629   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7630                   "$dst {${mask}} {z}, $src}",
7631                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7632                   VK2WM:$mask, VR128X:$src), 0, "att">;
7633   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7634                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7635   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7636                   "$dst {${mask}}, ${src}{1to2}}",
7637                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7638                   VK2WM:$mask, f64mem:$src), 0, "att">;
7639   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7640                   "$dst {${mask}} {z}, ${src}{1to2}}",
7641                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7642                   VK2WM:$mask, f64mem:$src), 0, "att">;
7643
7644   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7645                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7646   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7647                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7648                   VK4WM:$mask, VR256X:$src), 0, "att">;
7649   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7650                   "$dst {${mask}} {z}, $src}",
7651                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7652                   VK4WM:$mask, VR256X:$src), 0, "att">;
7653   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7654                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7655   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7656                   "$dst {${mask}}, ${src}{1to4}}",
7657                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7658                   VK4WM:$mask, f64mem:$src), 0, "att">;
7659   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7660                   "$dst {${mask}} {z}, ${src}{1to4}}",
7661                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7662                   VK4WM:$mask, f64mem:$src), 0, "att">;
7663 }
7664
7665 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7666                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
7667 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7668                                   PS, EVEX_CD8<32, CD8VH>;
7669
7670 let Predicates = [HasVLX] in {
7671   // Special patterns to allow use of X86vmfpround for masking. Instruction
7672   // patterns have been disabled with null_frag.
7673   def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
7674             (VCVTPD2PSZ128rr VR128X:$src)>;
7675   def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7676                           VK2WM:$mask),
7677             (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7678   def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7679                           VK2WM:$mask),
7680             (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7681
7682   def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
7683             (VCVTPD2PSZ128rm addr:$src)>;
7684   def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7685                           VK2WM:$mask),
7686             (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7687   def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7688                           VK2WM:$mask),
7689             (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7690
7691   def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7692             (VCVTPD2PSZ128rmb addr:$src)>;
7693   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7694                           (v4f32 VR128X:$src0), VK2WM:$mask),
7695             (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7696   def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7697                           v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7698             (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7699 }
7700
7701 // Convert Signed/Unsigned Doubleword to Double
7702 let Uses = []<Register>, mayRaiseFPException = 0 in
7703 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7704                            SDNode OpNode128, X86SchedWriteWidths sched> {
7705   // No rounding in this op
7706   let Predicates = [HasAVX512] in
7707     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7708                             sched.ZMM>, EVEX_V512;
7709
7710   let Predicates = [HasVLX] in {
7711     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7712                                OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7713                                (v2f64 (OpNode128 (bc_v4i32
7714                                 (v2i64
7715                                  (scalar_to_vector (loadi64 addr:$src))))))>,
7716                                EVEX_V128;
7717     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7718                                sched.YMM>, EVEX_V256;
7719   }
7720 }
7721
7722 // Convert Signed/Unsigned Doubleword to Float
7723 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7724                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7725   let Predicates = [HasAVX512] in
7726     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7727                             sched.ZMM>,
7728              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7729                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7730
7731   let Predicates = [HasVLX] in {
7732     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7733                                sched.XMM>, EVEX_V128;
7734     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7735                                sched.YMM>, EVEX_V256;
7736   }
7737 }
7738
7739 // Convert Float to Signed/Unsigned Doubleword with truncation
7740 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7741                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7742   let Predicates = [HasAVX512] in {
7743     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7744                             sched.ZMM>,
7745              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7746                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
7747   }
7748   let Predicates = [HasVLX] in {
7749     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7750                                sched.XMM>, EVEX_V128;
7751     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7752                                sched.YMM>, EVEX_V256;
7753   }
7754 }
7755
7756 // Convert Float to Signed/Unsigned Doubleword
7757 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7758                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7759   let Predicates = [HasAVX512] in {
7760     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7761                             sched.ZMM>,
7762              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7763                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7764   }
7765   let Predicates = [HasVLX] in {
7766     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7767                                sched.XMM>, EVEX_V128;
7768     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7769                                sched.YMM>, EVEX_V256;
7770   }
7771 }
7772
7773 // Convert Double to Signed/Unsigned Doubleword with truncation
7774 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7775                             SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7776   let Predicates = [HasAVX512] in {
7777     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7778                             sched.ZMM>,
7779              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7780                                 OpNodeSAE, sched.ZMM>, EVEX_V512;
7781   }
7782   let Predicates = [HasVLX] in {
7783     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7784     // memory forms of these instructions in Asm Parser. They have the same
7785     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7786     // due to the same reason.
7787     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7788                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7789                                VK2WM>, EVEX_V128;
7790     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7791                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7792   }
7793
7794   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7795                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7796                   VR128X:$src), 0, "att">;
7797   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7798                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7799                   VK2WM:$mask, VR128X:$src), 0, "att">;
7800   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7801                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7802                   VK2WM:$mask, VR128X:$src), 0, "att">;
7803   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7804                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7805                   f64mem:$src), 0, "att">;
7806   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7807                   "$dst {${mask}}, ${src}{1to2}}",
7808                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7809                   VK2WM:$mask, f64mem:$src), 0, "att">;
7810   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7811                   "$dst {${mask}} {z}, ${src}{1to2}}",
7812                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7813                   VK2WM:$mask, f64mem:$src), 0, "att">;
7814
7815   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7816                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7817                   VR256X:$src), 0, "att">;
7818   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7819                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7820                   VK4WM:$mask, VR256X:$src), 0, "att">;
7821   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7822                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7823                   VK4WM:$mask, VR256X:$src), 0, "att">;
7824   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7825                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7826                   f64mem:$src), 0, "att">;
7827   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7828                   "$dst {${mask}}, ${src}{1to4}}",
7829                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7830                   VK4WM:$mask, f64mem:$src), 0, "att">;
7831   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7832                   "$dst {${mask}} {z}, ${src}{1to4}}",
7833                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7834                   VK4WM:$mask, f64mem:$src), 0, "att">;
7835 }
7836
7837 // Convert Double to Signed/Unsigned Doubleword
7838 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7839                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7840   let Predicates = [HasAVX512] in {
7841     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7842                             sched.ZMM>,
7843              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7844                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7845   }
7846   let Predicates = [HasVLX] in {
7847     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7848     // memory forms of these instructions in Asm Parcer. They have the same
7849     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7850     // due to the same reason.
7851     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7852                                null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7853                                VK2WM>, EVEX_V128;
7854     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7855                                sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7856   }
7857
7858   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7859                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7860   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7861                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7862                   VK2WM:$mask, VR128X:$src), 0, "att">;
7863   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7864                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7865                   VK2WM:$mask, VR128X:$src), 0, "att">;
7866   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7867                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7868                   f64mem:$src), 0, "att">;
7869   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7870                   "$dst {${mask}}, ${src}{1to2}}",
7871                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7872                   VK2WM:$mask, f64mem:$src), 0, "att">;
7873   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7874                   "$dst {${mask}} {z}, ${src}{1to2}}",
7875                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7876                   VK2WM:$mask, f64mem:$src), 0, "att">;
7877
7878   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7879                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7880   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7881                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7882                   VK4WM:$mask, VR256X:$src), 0, "att">;
7883   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7884                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7885                   VK4WM:$mask, VR256X:$src), 0, "att">;
7886   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7887                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7888                   f64mem:$src), 0, "att">;
7889   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7890                   "$dst {${mask}}, ${src}{1to4}}",
7891                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7892                   VK4WM:$mask, f64mem:$src), 0, "att">;
7893   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7894                   "$dst {${mask}} {z}, ${src}{1to4}}",
7895                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7896                   VK4WM:$mask, f64mem:$src), 0, "att">;
7897 }
7898
7899 // Convert Double to Signed/Unsigned Quardword
7900 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7901                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7902   let Predicates = [HasDQI] in {
7903     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7904                             sched.ZMM>,
7905              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7906                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7907   }
7908   let Predicates = [HasDQI, HasVLX] in {
7909     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7910                                sched.XMM>, EVEX_V128;
7911     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7912                                sched.YMM>, EVEX_V256;
7913   }
7914 }
7915
7916 // Convert Double to Signed/Unsigned Quardword with truncation
7917 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7918                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7919   let Predicates = [HasDQI] in {
7920     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7921                             sched.ZMM>,
7922              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7923                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7924   }
7925   let Predicates = [HasDQI, HasVLX] in {
7926     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7927                                sched.XMM>, EVEX_V128;
7928     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7929                                sched.YMM>, EVEX_V256;
7930   }
7931 }
7932
7933 // Convert Signed/Unsigned Quardword to Double
7934 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7935                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7936   let Predicates = [HasDQI] in {
7937     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7938                             sched.ZMM>,
7939              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7940                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7941   }
7942   let Predicates = [HasDQI, HasVLX] in {
7943     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7944                                sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7945     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7946                                sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7947   }
7948 }
7949
7950 // Convert Float to Signed/Unsigned Quardword
7951 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7952                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7953   let Predicates = [HasDQI] in {
7954     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7955                             sched.ZMM>,
7956              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7957                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7958   }
7959   let Predicates = [HasDQI, HasVLX] in {
7960     // Explicitly specified broadcast string, since we take only 2 elements
7961     // from v4f32x_info source
7962     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7963                                sched.XMM, "{1to2}", "", f64mem, VK2WM,
7964                                (v2i64 (OpNode (bc_v4f32
7965                                 (v2f64
7966                                  (scalar_to_vector (loadf64 addr:$src))))))>,
7967                                EVEX_V128;
7968     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7969                                sched.YMM>, EVEX_V256;
7970   }
7971 }
7972
7973 // Convert Float to Signed/Unsigned Quardword with truncation
7974 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7975                             SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7976   let Predicates = [HasDQI] in {
7977     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7978              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7979                                 OpNodeRnd, sched.ZMM>, EVEX_V512;
7980   }
7981   let Predicates = [HasDQI, HasVLX] in {
7982     // Explicitly specified broadcast string, since we take only 2 elements
7983     // from v4f32x_info source
7984     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7985                                sched.XMM, "{1to2}", "", f64mem, VK2WM,
7986                                (v2i64 (OpNode (bc_v4f32
7987                                 (v2f64
7988                                  (scalar_to_vector (loadf64 addr:$src))))))>,
7989                                EVEX_V128;
7990     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7991                                sched.YMM>, EVEX_V256;
7992   }
7993 }
7994
7995 // Convert Signed/Unsigned Quardword to Float
7996 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7997                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7998   let Predicates = [HasDQI] in {
7999     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8000                             sched.ZMM>,
8001              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8002                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8003   }
8004   let Predicates = [HasDQI, HasVLX] in {
8005     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8006     // memory forms of these instructions in Asm Parcer. They have the same
8007     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8008     // due to the same reason.
8009     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8010                                sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8011                                EVEX_V128, NotEVEX2VEXConvertible;
8012     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8013                                sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8014                                NotEVEX2VEXConvertible;
8015   }
8016
8017   def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8018                   (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8019                   VR128X:$src), 0, "att">;
8020   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8021                   (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8022                   VK2WM:$mask, VR128X:$src), 0, "att">;
8023   def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8024                   (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8025                   VK2WM:$mask, VR128X:$src), 0, "att">;
8026   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8027                   (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8028                   i64mem:$src), 0, "att">;
8029   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8030                   "$dst {${mask}}, ${src}{1to2}}",
8031                   (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8032                   VK2WM:$mask, i64mem:$src), 0, "att">;
8033   def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8034                   "$dst {${mask}} {z}, ${src}{1to2}}",
8035                   (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8036                   VK2WM:$mask, i64mem:$src), 0, "att">;
8037
8038   def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8039                   (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8040                   VR256X:$src), 0, "att">;
8041   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8042                   "$dst {${mask}}, $src}",
8043                   (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8044                   VK4WM:$mask, VR256X:$src), 0, "att">;
8045   def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8046                   "$dst {${mask}} {z}, $src}",
8047                   (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8048                   VK4WM:$mask, VR256X:$src), 0, "att">;
8049   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8050                   (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8051                   i64mem:$src), 0, "att">;
8052   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8053                   "$dst {${mask}}, ${src}{1to4}}",
8054                   (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8055                   VK4WM:$mask, i64mem:$src), 0, "att">;
8056   def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8057                   "$dst {${mask}} {z}, ${src}{1to4}}",
8058                   (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8059                   VK4WM:$mask, i64mem:$src), 0, "att">;
8060 }
8061
8062 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86any_VSintToFP,
8063                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8064
8065 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp,
8066                                 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8067                                 PS, EVEX_CD8<32, CD8VF>;
8068
8069 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8070                                 X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8071                                 XS, EVEX_CD8<32, CD8VF>;
8072
8073 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8074                                  X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8075                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
8076
8077 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8078                                  X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8079                                  EVEX_CD8<32, CD8VF>;
8080
8081 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8082                                  X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8083                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
8084
8085 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8086                                   X86any_VUintToFP, SchedWriteCvtDQ2PD>, XS,
8087                                   EVEX_CD8<32, CD8VH>;
8088
8089 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8090                                  X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8091                                  EVEX_CD8<32, CD8VF>;
8092
8093 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8094                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8095                                  EVEX_CD8<32, CD8VF>;
8096
8097 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8098                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8099                                  VEX_W, EVEX_CD8<64, CD8VF>;
8100
8101 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8102                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8103                                  PS, EVEX_CD8<32, CD8VF>;
8104
8105 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8106                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8107                                  PS, EVEX_CD8<64, CD8VF>;
8108
8109 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8110                                  X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8111                                  PD, EVEX_CD8<64, CD8VF>;
8112
8113 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8114                                  X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8115                                  EVEX_CD8<32, CD8VH>;
8116
8117 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8118                                  X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8119                                  PD, EVEX_CD8<64, CD8VF>;
8120
8121 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8122                                  X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8123                                  EVEX_CD8<32, CD8VH>;
8124
8125 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8126                                  X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8127                                  PD, EVEX_CD8<64, CD8VF>;
8128
8129 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8130                                  X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8131                                  EVEX_CD8<32, CD8VH>;
8132
8133 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8134                                  X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8135                                  PD, EVEX_CD8<64, CD8VF>;
8136
8137 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8138                                  X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8139                                  EVEX_CD8<32, CD8VH>;
8140
8141 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8142                             X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8143                             EVEX_CD8<64, CD8VF>;
8144
8145 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8146                             X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8147                             EVEX_CD8<64, CD8VF>;
8148
8149 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
8150                             X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8151                             EVEX_CD8<64, CD8VF>;
8152
8153 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
8154                             X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8155                             EVEX_CD8<64, CD8VF>;
8156
8157 let Predicates = [HasVLX] in {
8158   // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8159   // patterns have been disabled with null_frag.
8160   def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8161             (VCVTPD2DQZ128rr VR128X:$src)>;
8162   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8163                           VK2WM:$mask),
8164             (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8165   def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8166                           VK2WM:$mask),
8167             (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8168
8169   def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8170             (VCVTPD2DQZ128rm addr:$src)>;
8171   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8172                           VK2WM:$mask),
8173             (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8174   def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8175                           VK2WM:$mask),
8176             (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8177
8178   def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8179             (VCVTPD2DQZ128rmb addr:$src)>;
8180   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8181                           (v4i32 VR128X:$src0), VK2WM:$mask),
8182             (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8183   def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8184                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8185             (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8186
8187   // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8188   // patterns have been disabled with null_frag.
8189   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8190             (VCVTTPD2DQZ128rr VR128X:$src)>;
8191   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8192                           VK2WM:$mask),
8193             (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8194   def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8195                           VK2WM:$mask),
8196             (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8197
8198   def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8199             (VCVTTPD2DQZ128rm addr:$src)>;
8200   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8201                           VK2WM:$mask),
8202             (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8203   def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8204                           VK2WM:$mask),
8205             (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8206
8207   def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8208             (VCVTTPD2DQZ128rmb addr:$src)>;
8209   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8210                           (v4i32 VR128X:$src0), VK2WM:$mask),
8211             (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8212   def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8213                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8214             (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8215
8216   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8217   // patterns have been disabled with null_frag.
8218   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8219             (VCVTPD2UDQZ128rr VR128X:$src)>;
8220   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8221                            VK2WM:$mask),
8222             (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8223   def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8224                            VK2WM:$mask),
8225             (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8226
8227   def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8228             (VCVTPD2UDQZ128rm addr:$src)>;
8229   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8230                            VK2WM:$mask),
8231             (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8232   def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8233                            VK2WM:$mask),
8234             (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8235
8236   def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8237             (VCVTPD2UDQZ128rmb addr:$src)>;
8238   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8239                            (v4i32 VR128X:$src0), VK2WM:$mask),
8240             (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8241   def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8242                            v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8243             (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8244
8245   // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8246   // patterns have been disabled with null_frag.
8247   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8248             (VCVTTPD2UDQZ128rr VR128X:$src)>;
8249   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8250                           VK2WM:$mask),
8251             (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8252   def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8253                           VK2WM:$mask),
8254             (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8255
8256   def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8257             (VCVTTPD2UDQZ128rm addr:$src)>;
8258   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8259                           VK2WM:$mask),
8260             (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8261   def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8262                           VK2WM:$mask),
8263             (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8264
8265   def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8266             (VCVTTPD2UDQZ128rmb addr:$src)>;
8267   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8268                           (v4i32 VR128X:$src0), VK2WM:$mask),
8269             (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8270   def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8271                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8272             (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8273 }
8274
8275 let Predicates = [HasDQI, HasVLX] in {
8276   def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8277             (VCVTPS2QQZ128rm addr:$src)>;
8278   def : Pat<(v2i64 (vselect VK2WM:$mask,
8279                             (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8280                             VR128X:$src0)),
8281             (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8282   def : Pat<(v2i64 (vselect VK2WM:$mask,
8283                             (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8284                             v2i64x_info.ImmAllZerosV)),
8285             (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8286
8287   def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8288             (VCVTPS2UQQZ128rm addr:$src)>;
8289   def : Pat<(v2i64 (vselect VK2WM:$mask,
8290                             (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8291                             VR128X:$src0)),
8292             (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8293   def : Pat<(v2i64 (vselect VK2WM:$mask,
8294                             (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8295                             v2i64x_info.ImmAllZerosV)),
8296             (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8297
8298   def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8299             (VCVTTPS2QQZ128rm addr:$src)>;
8300   def : Pat<(v2i64 (vselect VK2WM:$mask,
8301                             (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8302                             VR128X:$src0)),
8303             (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8304   def : Pat<(v2i64 (vselect VK2WM:$mask,
8305                             (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8306                             v2i64x_info.ImmAllZerosV)),
8307             (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8308
8309   def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8310             (VCVTTPS2UQQZ128rm addr:$src)>;
8311   def : Pat<(v2i64 (vselect VK2WM:$mask,
8312                             (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8313                             VR128X:$src0)),
8314             (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8315   def : Pat<(v2i64 (vselect VK2WM:$mask,
8316                             (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8317                             v2i64x_info.ImmAllZerosV)),
8318             (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8319 }
8320
8321 let Predicates = [HasVLX] in {
8322   def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8323             (VCVTDQ2PDZ128rm addr:$src)>;
8324   def : Pat<(v2f64 (vselect VK2WM:$mask,
8325                             (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8326                             VR128X:$src0)),
8327             (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8328   def : Pat<(v2f64 (vselect VK2WM:$mask,
8329                             (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8330                             v2f64x_info.ImmAllZerosV)),
8331             (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8332
8333   def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8334             (VCVTUDQ2PDZ128rm addr:$src)>;
8335   def : Pat<(v2f64 (vselect VK2WM:$mask,
8336                             (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8337                             VR128X:$src0)),
8338             (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8339   def : Pat<(v2f64 (vselect VK2WM:$mask,
8340                             (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8341                             v2f64x_info.ImmAllZerosV)),
8342             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8343 }
8344
8345 let Predicates = [HasDQI, HasVLX] in {
8346   // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8347   // patterns have been disabled with null_frag.
8348   def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
8349             (VCVTQQ2PSZ128rr VR128X:$src)>;
8350   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8351                            VK2WM:$mask),
8352             (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8353   def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8354                            VK2WM:$mask),
8355             (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8356
8357   def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
8358             (VCVTQQ2PSZ128rm addr:$src)>;
8359   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8360                            VK2WM:$mask),
8361             (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8362   def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8363                            VK2WM:$mask),
8364             (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8365
8366   def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8367             (VCVTQQ2PSZ128rmb addr:$src)>;
8368   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8369                            (v4f32 VR128X:$src0), VK2WM:$mask),
8370             (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8371   def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8372                            v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8373             (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8374
8375   // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8376   // patterns have been disabled with null_frag.
8377   def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
8378             (VCVTUQQ2PSZ128rr VR128X:$src)>;
8379   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8380                            VK2WM:$mask),
8381             (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8382   def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8383                            VK2WM:$mask),
8384             (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8385
8386   def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
8387             (VCVTUQQ2PSZ128rm addr:$src)>;
8388   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8389                            VK2WM:$mask),
8390             (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8391   def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8392                            VK2WM:$mask),
8393             (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8394
8395   def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8396             (VCVTUQQ2PSZ128rmb addr:$src)>;
8397   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8398                            (v4f32 VR128X:$src0), VK2WM:$mask),
8399             (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8400   def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8401                            v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8402             (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8403 }
8404
8405 //===----------------------------------------------------------------------===//
8406 // Half precision conversion instructions
8407 //===----------------------------------------------------------------------===//
8408
8409 let Uses = [MXCSR], mayRaiseFPException = 1 in
8410 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8411                            X86MemOperand x86memop, PatFrag ld_frag,
8412                            X86FoldableSchedWrite sched> {
8413   defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8414                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8415                             (X86cvtph2ps (_src.VT _src.RC:$src))>,
8416                             T8PD, Sched<[sched]>;
8417   defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8418                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8419                             (X86cvtph2ps (_src.VT
8420                                           (ld_frag addr:$src)))>,
8421                             T8PD, Sched<[sched.Folded]>;
8422 }
8423
8424 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8425                                X86FoldableSchedWrite sched> {
8426   let Uses = [MXCSR] in
8427   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8428                              (ins _src.RC:$src), "vcvtph2ps",
8429                              "{sae}, $src", "$src, {sae}",
8430                              (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8431                              T8PD, EVEX_B, Sched<[sched]>;
8432 }
8433
8434 let Predicates = [HasAVX512] in
8435   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8436                                     WriteCvtPH2PSZ>,
8437                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8438                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8439
8440 let Predicates = [HasVLX] in {
8441   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8442                        load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8443                        EVEX_CD8<32, CD8VH>;
8444   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8445                        load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8446                        EVEX_CD8<32, CD8VH>;
8447
8448   // Pattern match vcvtph2ps of a scalar i64 load.
8449   def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8450             (VCVTPH2PSZ128rm addr:$src)>;
8451   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8452               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8453             (VCVTPH2PSZ128rm addr:$src)>;
8454 }
8455
8456 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8457                            X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8458 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8459   def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8460              (ins _src.RC:$src1, i32u8imm:$src2),
8461              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8462              [(set _dest.RC:$dst,
8463                    (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8464              Sched<[RR]>;
8465   let Constraints = "$src0 = $dst" in
8466   def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8467              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8468              "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8469              [(set _dest.RC:$dst,
8470                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8471                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
8472              Sched<[RR]>, EVEX_K;
8473   def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8474              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8475              "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8476              [(set _dest.RC:$dst,
8477                    (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8478                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8479              Sched<[RR]>, EVEX_KZ;
8480   let hasSideEffects = 0, mayStore = 1 in {
8481     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8482                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8483                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8484                Sched<[MR]>;
8485     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8486                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8487                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8488                 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8489   }
8490 }
8491 }
8492
8493 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8494                                SchedWrite Sched> {
8495   let hasSideEffects = 0, Uses = [MXCSR] in
8496   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8497                    (outs _dest.RC:$dst),
8498                    (ins _src.RC:$src1, i32u8imm:$src2),
8499                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8500                    EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8501 }
8502
8503 let Predicates = [HasAVX512] in {
8504   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8505                                     WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8506                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8507                                         EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8508   let Predicates = [HasVLX] in {
8509     defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8510                                          WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8511                                          EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8512     defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8513                                          WriteCvtPS2PH, WriteCvtPS2PHSt>,
8514                                          EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8515   }
8516
8517   def : Pat<(store (f64 (extractelt
8518                          (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8519                          (iPTR 0))), addr:$dst),
8520             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8521   def : Pat<(store (i64 (extractelt
8522                          (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8523                          (iPTR 0))), addr:$dst),
8524             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8525   def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8526             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8527   def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8528             (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8529 }
8530
8531 // Patterns for matching conversions from float to half-float and vice versa.
8532 let Predicates = [HasVLX] in {
8533   // Use MXCSR.RC for rounding instead of explicitly specifying the default
8534   // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8535   // configurations we support (the default). However, falling back to MXCSR is
8536   // more consistent with other instructions, which are always controlled by it.
8537   // It's encoded as 0b100.
8538   def : Pat<(fp_to_f16 FR32X:$src),
8539             (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8540               (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8541
8542   def : Pat<(f16_to_fp GR16:$src),
8543             (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8544               (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8545
8546   def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8547             (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8548               (v8i16 (VCVTPS2PHZ128rr
8549                (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8550 }
8551
8552 //  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8553 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8554                             string OpcodeStr, Domain d,
8555                             X86FoldableSchedWrite sched = WriteFCom> {
8556   let hasSideEffects = 0, Uses = [MXCSR] in
8557   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8558                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8559                   EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8560 }
8561
8562 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8563   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
8564                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8565   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
8566                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8567   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
8568                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8569   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
8570                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8571 }
8572
8573 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8574   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
8575                                  "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8576                                  EVEX_CD8<32, CD8VT1>;
8577   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
8578                                   "ucomisd", SSEPackedDouble>, PD, EVEX,
8579                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8580   defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
8581                                  "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8582                                  EVEX_CD8<32, CD8VT1>;
8583   defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
8584                                  "comisd", SSEPackedDouble>, PD, EVEX,
8585                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8586   let isCodeGenOnly = 1 in {
8587     defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8588                           sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8589                           EVEX_CD8<32, CD8VT1>;
8590     defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8591                           sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
8592                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8593
8594     defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8595                           sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8596                           EVEX_CD8<32, CD8VT1>;
8597     defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8598                           sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
8599                           VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8600   }
8601 }
8602
8603 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8604 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8605                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8606   let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8607   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8608                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8609                            "$src2, $src1", "$src1, $src2",
8610                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8611                            EVEX_4V, VEX_LIG, Sched<[sched]>;
8612   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8613                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8614                          "$src2, $src1", "$src1, $src2",
8615                          (OpNode (_.VT _.RC:$src1),
8616                           _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8617                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8618 }
8619 }
8620
8621 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8622                                f32x_info>, EVEX_CD8<32, CD8VT1>,
8623                                T8PD;
8624 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8625                                f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8626                                T8PD;
8627 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8628                                  SchedWriteFRsqrt.Scl, f32x_info>,
8629                                  EVEX_CD8<32, CD8VT1>, T8PD;
8630 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8631                                  SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8632                                  EVEX_CD8<64, CD8VT1>, T8PD;
8633
8634 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8635 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8636                          X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8637   let ExeDomain = _.ExeDomain in {
8638   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8639                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8640                          (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8641                          Sched<[sched]>;
8642   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8643                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8644                          (OpNode (_.VT
8645                            (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8646                          Sched<[sched.Folded, sched.ReadAfterFold]>;
8647   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8648                           (ins _.ScalarMemOp:$src), OpcodeStr,
8649                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8650                           (OpNode (_.VT
8651                             (_.BroadcastLdFrag addr:$src)))>,
8652                           EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8653   }
8654 }
8655
8656 let Uses = [MXCSR] in
8657 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8658                                 X86SchedWriteWidths sched> {
8659   defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8660                            v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8661   defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8662                            v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8663
8664   // Define only if AVX512VL feature is present.
8665   let Predicates = [HasVLX] in {
8666     defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8667                                 OpNode, sched.XMM, v4f32x_info>,
8668                                EVEX_V128, EVEX_CD8<32, CD8VF>;
8669     defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8670                                 OpNode, sched.YMM, v8f32x_info>,
8671                                EVEX_V256, EVEX_CD8<32, CD8VF>;
8672     defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8673                                 OpNode, sched.XMM, v2f64x_info>,
8674                                EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8675     defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8676                                 OpNode, sched.YMM, v4f64x_info>,
8677                                EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8678   }
8679 }
8680
8681 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8682 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8683
8684 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8685 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8686                          SDNode OpNode, SDNode OpNodeSAE,
8687                          X86FoldableSchedWrite sched> {
8688   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8689   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8690                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8691                            "$src2, $src1", "$src1, $src2",
8692                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8693                            Sched<[sched]>, SIMD_EXC;
8694
8695   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8696                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8697                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8698                             (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8699                             EVEX_B, Sched<[sched]>;
8700
8701   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8702                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8703                          "$src2, $src1", "$src1, $src2",
8704                          (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8705                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8706   }
8707 }
8708
8709 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8710                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8711   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8712                            sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8713   defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8714                            sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8715 }
8716
8717 let Predicates = [HasERI] in {
8718   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8719                                SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8720   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8721                                SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8722 }
8723
8724 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8725                               SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8726 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8727
8728 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8729                          SDNode OpNode, X86FoldableSchedWrite sched> {
8730   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8731   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8732                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8733                          (OpNode (_.VT _.RC:$src))>,
8734                          Sched<[sched]>;
8735
8736   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8737                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8738                          (OpNode (_.VT
8739                              (bitconvert (_.LdFrag addr:$src))))>,
8740                           Sched<[sched.Folded, sched.ReadAfterFold]>;
8741
8742   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8743                          (ins _.ScalarMemOp:$src), OpcodeStr,
8744                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8745                          (OpNode (_.VT
8746                                   (_.BroadcastLdFrag addr:$src)))>,
8747                          EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8748   }
8749 }
8750 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8751                          SDNode OpNode, X86FoldableSchedWrite sched> {
8752   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
8753   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8754                         (ins _.RC:$src), OpcodeStr,
8755                         "{sae}, $src", "$src, {sae}",
8756                         (OpNode (_.VT _.RC:$src))>,
8757                         EVEX_B, Sched<[sched]>;
8758 }
8759
8760 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8761                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8762    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8763               avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8764               T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8765    defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8766               avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8767               T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8768 }
8769
8770 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8771                                   SDNode OpNode, X86SchedWriteWidths sched> {
8772   // Define only if AVX512VL feature is present.
8773   let Predicates = [HasVLX] in {
8774     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8775                                 sched.XMM>,
8776                                 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8777     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8778                                 sched.YMM>,
8779                                 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8780     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8781                                 sched.XMM>,
8782                                 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8783     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8784                                 sched.YMM>,
8785                                 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8786   }
8787 }
8788
8789 let Predicates = [HasERI] in {
8790  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8791                             SchedWriteFRsqrt>, EVEX;
8792  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8793                             SchedWriteFRcp>, EVEX;
8794  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8795                             SchedWriteFAdd>, EVEX;
8796 }
8797 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8798                             SchedWriteFRnd>,
8799                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8800                                           SchedWriteFRnd>, EVEX;
8801
8802 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8803                                     X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8804   let ExeDomain = _.ExeDomain in
8805   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8806                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8807                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8808                          EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8809 }
8810
8811 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8812                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8813   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8814   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8815                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
8816                          (_.VT (any_fsqrt _.RC:$src))>, EVEX,
8817                          Sched<[sched]>;
8818   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8819                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8820                          (any_fsqrt (_.VT
8821                            (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8822                            Sched<[sched.Folded, sched.ReadAfterFold]>;
8823   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8824                           (ins _.ScalarMemOp:$src), OpcodeStr,
8825                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8826                           (any_fsqrt (_.VT
8827                             (_.BroadcastLdFrag addr:$src)))>,
8828                           EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8829   }
8830 }
8831
8832 let Uses = [MXCSR], mayRaiseFPException = 1 in
8833 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8834                                   X86SchedWriteSizes sched> {
8835   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8836                                 sched.PS.ZMM, v16f32_info>,
8837                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8838   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8839                                 sched.PD.ZMM, v8f64_info>,
8840                                 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8841   // Define only if AVX512VL feature is present.
8842   let Predicates = [HasVLX] in {
8843     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8844                                      sched.PS.XMM, v4f32x_info>,
8845                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8846     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8847                                      sched.PS.YMM, v8f32x_info>,
8848                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8849     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8850                                      sched.PD.XMM, v2f64x_info>,
8851                                      EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8852     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8853                                      sched.PD.YMM, v4f64x_info>,
8854                                      EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8855   }
8856 }
8857
8858 let Uses = [MXCSR] in
8859 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8860                                         X86SchedWriteSizes sched> {
8861   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8862                                       sched.PS.ZMM, v16f32_info>,
8863                                       EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8864   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8865                                       sched.PD.ZMM, v8f64_info>,
8866                                       EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8867 }
8868
8869 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8870                               X86VectorVTInfo _, string Name> {
8871   let ExeDomain = _.ExeDomain in {
8872     defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8873                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8874                          "$src2, $src1", "$src1, $src2",
8875                          (X86fsqrts (_.VT _.RC:$src1),
8876                                     (_.VT _.RC:$src2))>,
8877                          Sched<[sched]>, SIMD_EXC;
8878     defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8879                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8880                          "$src2, $src1", "$src1, $src2",
8881                          (X86fsqrts (_.VT _.RC:$src1),
8882                                     _.ScalarIntMemCPat:$src2)>,
8883                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8884     let Uses = [MXCSR] in
8885     defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8886                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8887                          "$rc, $src2, $src1", "$src1, $src2, $rc",
8888                          (X86fsqrtRnds (_.VT _.RC:$src1),
8889                                      (_.VT _.RC:$src2),
8890                                      (i32 timm:$rc))>,
8891                          EVEX_B, EVEX_RC, Sched<[sched]>;
8892
8893     let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8894       def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8895                 (ins _.FRC:$src1, _.FRC:$src2),
8896                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8897                 Sched<[sched]>, SIMD_EXC;
8898       let mayLoad = 1 in
8899         def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8900                   (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8901                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8902                   Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8903     }
8904   }
8905
8906   let Predicates = [HasAVX512] in {
8907     def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
8908               (!cast<Instruction>(Name#Zr)
8909                   (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8910   }
8911
8912   let Predicates = [HasAVX512, OptForSize] in {
8913     def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
8914               (!cast<Instruction>(Name#Zm)
8915                   (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8916   }
8917 }
8918
8919 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8920                                   X86SchedWriteSizes sched> {
8921   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8922                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8923   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8924                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8925 }
8926
8927 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8928              avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8929
8930 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8931
8932 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8933                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8934   let ExeDomain = _.ExeDomain in {
8935   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8936                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8937                            "$src3, $src2, $src1", "$src1, $src2, $src3",
8938                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8939                            (i32 timm:$src3)))>,
8940                            Sched<[sched]>, SIMD_EXC;
8941
8942   let Uses = [MXCSR] in
8943   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8944                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8945                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
8946                          (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8947                          (i32 timm:$src3)))>, EVEX_B,
8948                          Sched<[sched]>;
8949
8950   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8951                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
8952                          OpcodeStr,
8953                          "$src3, $src2, $src1", "$src1, $src2, $src3",
8954                          (_.VT (X86RndScales _.RC:$src1,
8955                                 _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
8956                          Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8957
8958   let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
8959     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8960                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8961                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8962                []>, Sched<[sched]>, SIMD_EXC;
8963
8964     let mayLoad = 1 in
8965       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8966                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8967                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8968                  []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8969   }
8970   }
8971
8972   let Predicates = [HasAVX512] in {
8973     def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
8974               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8975                _.FRC:$src1, timm:$src2))>;
8976   }
8977
8978   let Predicates = [HasAVX512, OptForSize] in {
8979     def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
8980               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8981                addr:$src1, timm:$src2))>;
8982   }
8983 }
8984
8985 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
8986                                            SchedWriteFRnd.Scl, f32x_info>,
8987                                            AVX512AIi8Base, EVEX_4V, VEX_LIG,
8988                                            EVEX_CD8<32, CD8VT1>;
8989
8990 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
8991                                            SchedWriteFRnd.Scl, f64x_info>,
8992                                            VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
8993                                            EVEX_CD8<64, CD8VT1>;
8994
8995 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
8996                                 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8997                                 dag OutMask, Predicate BasePredicate> {
8998   let Predicates = [BasePredicate] in {
8999     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9000                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9001                (extractelt _.VT:$dst, (iPTR 0))))),
9002               (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9003                _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9004
9005     def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9006                (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9007                ZeroFP))),
9008               (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9009                OutMask, _.VT:$src2, _.VT:$src1)>;
9010   }
9011 }
9012
9013 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9014                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9015                             fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9016 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9017                             (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9018                             fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9019
9020
9021 //-------------------------------------------------
9022 // Integer truncate and extend operations
9023 //-------------------------------------------------
9024
9025 // PatFrags that contain a select and a truncate op. The take operands in the
9026 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9027 // either to the multiclasses.
9028 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9029                            (vselect node:$mask,
9030                                     (trunc node:$src), node:$src0)>;
9031 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9032                             (vselect node:$mask,
9033                                      (X86vtruncs node:$src), node:$src0)>;
9034 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9035                              (vselect node:$mask,
9036                                       (X86vtruncus node:$src), node:$src0)>;
9037
9038 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9039                               SDPatternOperator MaskNode,
9040                               X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9041                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9042   let ExeDomain = DestInfo.ExeDomain in {
9043   def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9044              (ins SrcInfo.RC:$src),
9045              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9046              [(set DestInfo.RC:$dst,
9047                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9048              EVEX, Sched<[sched]>;
9049   let Constraints = "$src0 = $dst" in
9050   def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9051              (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9052              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9053              [(set DestInfo.RC:$dst,
9054                    (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9055                              (DestInfo.VT DestInfo.RC:$src0),
9056                              SrcInfo.KRCWM:$mask))]>,
9057              EVEX, EVEX_K, Sched<[sched]>;
9058   def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9059              (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9060              OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9061              [(set DestInfo.RC:$dst,
9062                    (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9063                              DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9064              EVEX, EVEX_KZ, Sched<[sched]>;
9065   }
9066
9067   let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9068     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9069                (ins x86memop:$dst, SrcInfo.RC:$src),
9070                OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9071                EVEX, Sched<[sched.Folded]>;
9072
9073     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9074                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9075                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9076                EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9077   }//mayStore = 1, hasSideEffects = 0
9078 }
9079
9080 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9081                                     X86VectorVTInfo DestInfo,
9082                                     PatFrag truncFrag, PatFrag mtruncFrag,
9083                                     string Name> {
9084
9085   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9086             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9087                                     addr:$dst, SrcInfo.RC:$src)>;
9088
9089   def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9090                         SrcInfo.KRCWM:$mask),
9091             (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9092                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9093 }
9094
9095 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9096                         SDNode OpNode256, SDNode OpNode512,
9097                         SDPatternOperator MaskNode128,
9098                         SDPatternOperator MaskNode256,
9099                         SDPatternOperator MaskNode512,
9100                         X86FoldableSchedWrite sched,
9101                         AVX512VLVectorVTInfo VTSrcInfo,
9102                         X86VectorVTInfo DestInfoZ128,
9103                         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9104                         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9105                         X86MemOperand x86memopZ, PatFrag truncFrag,
9106                         PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9107
9108   let Predicates = [HasVLX, prd] in {
9109     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9110                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9111                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9112                              truncFrag, mtruncFrag, NAME>, EVEX_V128;
9113
9114     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9115                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9116                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9117                              truncFrag, mtruncFrag, NAME>, EVEX_V256;
9118   }
9119   let Predicates = [prd] in
9120     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9121                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9122                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9123                              truncFrag, mtruncFrag, NAME>, EVEX_V512;
9124 }
9125
9126 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9127                            SDPatternOperator MaskNode,
9128                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9129                            PatFrag MaskedStoreNode, SDNode InVecNode,
9130                            SDPatternOperator InVecMaskNode> {
9131   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9132                           InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9133                           avx512vl_i64_info, v16i8x_info, v16i8x_info,
9134                           v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9135                           MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9136 }
9137
9138 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9139                            SDPatternOperator MaskNode,
9140                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9141                            PatFrag MaskedStoreNode, SDNode InVecNode,
9142                            SDPatternOperator InVecMaskNode> {
9143   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9144                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9145                           avx512vl_i64_info, v8i16x_info, v8i16x_info,
9146                           v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9147                           MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9148 }
9149
9150 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9151                            SDPatternOperator MaskNode,
9152                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9153                            PatFrag MaskedStoreNode, SDNode InVecNode,
9154                            SDPatternOperator InVecMaskNode> {
9155   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9156                           InVecMaskNode, MaskNode, MaskNode, sched,
9157                           avx512vl_i64_info, v4i32x_info, v4i32x_info,
9158                           v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9159                           MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9160 }
9161
9162 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9163                            SDPatternOperator MaskNode,
9164                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9165                            PatFrag MaskedStoreNode, SDNode InVecNode,
9166                            SDPatternOperator InVecMaskNode> {
9167   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9168                           InVecMaskNode, InVecMaskNode, MaskNode, sched,
9169                           avx512vl_i32_info, v16i8x_info, v16i8x_info,
9170                           v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9171                           MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9172 }
9173
9174 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9175                            SDPatternOperator MaskNode,
9176                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9177                            PatFrag MaskedStoreNode, SDNode InVecNode,
9178                            SDPatternOperator InVecMaskNode> {
9179   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9180                           InVecMaskNode, MaskNode, MaskNode, sched,
9181                           avx512vl_i32_info, v8i16x_info, v8i16x_info,
9182                           v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9183                           MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9184 }
9185
9186 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9187                            SDPatternOperator MaskNode,
9188                            X86FoldableSchedWrite sched, PatFrag StoreNode,
9189                            PatFrag MaskedStoreNode, SDNode InVecNode,
9190                            SDPatternOperator InVecMaskNode> {
9191   defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9192                           InVecMaskNode, MaskNode, MaskNode, sched,
9193                           avx512vl_i16_info, v16i8x_info, v16i8x_info,
9194                           v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9195                           MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9196 }
9197
9198 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, select_trunc,
9199                                   WriteShuffle256, truncstorevi8,
9200                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9201 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, select_truncs,
9202                                   WriteShuffle256, truncstore_s_vi8,
9203                                   masked_truncstore_s_vi8, X86vtruncs,
9204                                   X86vmtruncs>;
9205 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9206                                   select_truncus, WriteShuffle256,
9207                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9208                                   X86vtruncus, X86vmtruncus>;
9209
9210 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9211                                   WriteShuffle256, truncstorevi16,
9212                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9213 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9214                                   WriteShuffle256, truncstore_s_vi16,
9215                                   masked_truncstore_s_vi16, X86vtruncs,
9216                                   X86vmtruncs>;
9217 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9218                                   select_truncus, WriteShuffle256,
9219                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9220                                   X86vtruncus, X86vmtruncus>;
9221
9222 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9223                                   WriteShuffle256, truncstorevi32,
9224                                   masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9225 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9226                                   WriteShuffle256, truncstore_s_vi32,
9227                                   masked_truncstore_s_vi32, X86vtruncs,
9228                                   X86vmtruncs>;
9229 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9230                                   select_truncus, WriteShuffle256,
9231                                   truncstore_us_vi32, masked_truncstore_us_vi32,
9232                                   X86vtruncus, X86vmtruncus>;
9233
9234 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9235                                   WriteShuffle256, truncstorevi8,
9236                                   masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9237 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9238                                   WriteShuffle256, truncstore_s_vi8,
9239                                   masked_truncstore_s_vi8, X86vtruncs,
9240                                   X86vmtruncs>;
9241 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
9242                                   select_truncus, WriteShuffle256,
9243                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9244                                   X86vtruncus, X86vmtruncus>;
9245
9246 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9247                                   WriteShuffle256, truncstorevi16,
9248                                   masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9249 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9250                                   WriteShuffle256, truncstore_s_vi16,
9251                                   masked_truncstore_s_vi16, X86vtruncs,
9252                                   X86vmtruncs>;
9253 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9254                                   select_truncus, WriteShuffle256,
9255                                   truncstore_us_vi16, masked_truncstore_us_vi16,
9256                                   X86vtruncus, X86vmtruncus>;
9257
9258 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9259                                   WriteShuffle256, truncstorevi8,
9260                                   masked_truncstorevi8, X86vtrunc,
9261                                   X86vmtrunc>;
9262 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9263                                   WriteShuffle256, truncstore_s_vi8,
9264                                   masked_truncstore_s_vi8, X86vtruncs,
9265                                   X86vmtruncs>;
9266 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9267                                   select_truncus, WriteShuffle256,
9268                                   truncstore_us_vi8, masked_truncstore_us_vi8,
9269                                   X86vtruncus, X86vmtruncus>;
9270
9271 let Predicates = [HasAVX512, NoVLX] in {
9272 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9273          (v8i16 (EXTRACT_SUBREG
9274                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9275                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9276 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9277          (v4i32 (EXTRACT_SUBREG
9278                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9279                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
9280 }
9281
9282 let Predicates = [HasBWI, NoVLX] in {
9283 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9284          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9285                                             VR256X:$src, sub_ymm))), sub_xmm))>;
9286 }
9287
9288 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9289 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9290                            X86VectorVTInfo DestInfo,
9291                            X86VectorVTInfo SrcInfo> {
9292   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9293                                  DestInfo.RC:$src0,
9294                                  SrcInfo.KRCWM:$mask)),
9295             (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9296                                                  SrcInfo.KRCWM:$mask,
9297                                                  SrcInfo.RC:$src)>;
9298
9299   def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9300                                  DestInfo.ImmAllZerosV,
9301                                  SrcInfo.KRCWM:$mask)),
9302             (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9303                                                   SrcInfo.RC:$src)>;
9304 }
9305
9306 let Predicates = [HasVLX] in {
9307 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9308 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9309 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9310 }
9311
9312 let Predicates = [HasAVX512] in {
9313 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9314 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9315 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9316
9317 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9318 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9319 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9320
9321 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9322 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9323 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9324 }
9325
9326 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9327               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9328               X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9329   let ExeDomain = DestInfo.ExeDomain in {
9330   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9331                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9332                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9333                   EVEX, Sched<[sched]>;
9334
9335   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9336                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9337                   (DestInfo.VT (LdFrag addr:$src))>,
9338                 EVEX, Sched<[sched.Folded]>;
9339   }
9340 }
9341
9342 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9343           SDNode OpNode, SDNode InVecNode, string ExtTy,
9344           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9345   let Predicates = [HasVLX, HasBWI] in {
9346     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9347                     v16i8x_info, i64mem, LdFrag, InVecNode>,
9348                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9349
9350     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9351                     v16i8x_info, i128mem, LdFrag, OpNode>,
9352                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9353   }
9354   let Predicates = [HasBWI] in {
9355     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9356                     v32i8x_info, i256mem, LdFrag, OpNode>,
9357                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9358   }
9359 }
9360
9361 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9362           SDNode OpNode, SDNode InVecNode, string ExtTy,
9363           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9364   let Predicates = [HasVLX, HasAVX512] in {
9365     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9366                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9367                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9368
9369     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9370                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9371                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9372   }
9373   let Predicates = [HasAVX512] in {
9374     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9375                    v16i8x_info, i128mem, LdFrag, OpNode>,
9376                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9377   }
9378 }
9379
9380 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9381           SDNode OpNode, SDNode InVecNode, string ExtTy,
9382           X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9383   let Predicates = [HasVLX, HasAVX512] in {
9384     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9385                    v16i8x_info, i16mem, LdFrag, InVecNode>,
9386                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9387
9388     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9389                    v16i8x_info, i32mem, LdFrag, InVecNode>,
9390                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9391   }
9392   let Predicates = [HasAVX512] in {
9393     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9394                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9395                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9396   }
9397 }
9398
9399 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9400          SDNode OpNode, SDNode InVecNode, string ExtTy,
9401          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9402   let Predicates = [HasVLX, HasAVX512] in {
9403     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9404                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9405                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9406
9407     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9408                    v8i16x_info, i128mem, LdFrag, OpNode>,
9409                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9410   }
9411   let Predicates = [HasAVX512] in {
9412     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9413                    v16i16x_info, i256mem, LdFrag, OpNode>,
9414                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9415   }
9416 }
9417
9418 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9419          SDNode OpNode, SDNode InVecNode, string ExtTy,
9420          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9421   let Predicates = [HasVLX, HasAVX512] in {
9422     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9423                    v8i16x_info, i32mem, LdFrag, InVecNode>,
9424                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9425
9426     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9427                    v8i16x_info, i64mem, LdFrag, InVecNode>,
9428                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9429   }
9430   let Predicates = [HasAVX512] in {
9431     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9432                    v8i16x_info, i128mem, LdFrag, OpNode>,
9433                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9434   }
9435 }
9436
9437 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9438          SDNode OpNode, SDNode InVecNode, string ExtTy,
9439          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9440
9441   let Predicates = [HasVLX, HasAVX512] in {
9442     defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9443                    v4i32x_info, i64mem, LdFrag, InVecNode>,
9444                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9445
9446     defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9447                    v4i32x_info, i128mem, LdFrag, OpNode>,
9448                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9449   }
9450   let Predicates = [HasAVX512] in {
9451     defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9452                    v8i32x_info, i256mem, LdFrag, OpNode>,
9453                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9454   }
9455 }
9456
9457 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9458 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9459 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9460 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9461 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9462 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9463
9464 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9465 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9466 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9467 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9468 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9469 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9470
9471
9472 // Patterns that we also need any extend versions of. aext_vector_inreg
9473 // is currently legalized to zext_vector_inreg.
9474 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9475   // 256-bit patterns
9476   let Predicates = [HasVLX, HasBWI] in {
9477     def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9478               (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9479   }
9480
9481   let Predicates = [HasVLX] in {
9482     def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9483               (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9484
9485     def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9486               (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9487   }
9488
9489   // 512-bit patterns
9490   let Predicates = [HasBWI] in {
9491     def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9492               (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9493   }
9494   let Predicates = [HasAVX512] in {
9495     def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9496               (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9497     def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9498               (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9499
9500     def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9501               (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9502
9503     def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9504               (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9505   }
9506 }
9507
9508 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9509                                  SDNode InVecOp> :
9510     AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9511   // 128-bit patterns
9512   let Predicates = [HasVLX, HasBWI] in {
9513   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9514             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9515   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9516             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9517   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9518             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9519   }
9520   let Predicates = [HasVLX] in {
9521   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9522             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9523   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9524             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9525
9526   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9527             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9528
9529   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9530             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9531   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9532             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9533   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9534             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9535
9536   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9537             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9538   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9539             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9540
9541   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9542             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9543   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9544             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9545   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9546             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9547   }
9548   let Predicates = [HasVLX] in {
9549   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9550             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9551   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9552             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9553
9554   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9555             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9556   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9557             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9558
9559   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9560             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9561   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9562             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9563   }
9564   // 512-bit patterns
9565   let Predicates = [HasAVX512] in {
9566   def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9567             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9568   }
9569 }
9570
9571 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9572 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9573
9574 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9575 // ext+trunc aggressively making it impossible to legalize the DAG to this
9576 // pattern directly.
9577 let Predicates = [HasAVX512, NoBWI] in {
9578 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9579          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9580 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9581          (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9582 }
9583
9584 //===----------------------------------------------------------------------===//
9585 // GATHER - SCATTER Operations
9586
9587 // FIXME: Improve scheduling of gather/scatter instructions.
9588 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9589                          X86MemOperand memop, PatFrag GatherNode,
9590                          RegisterClass MaskRC = _.KRCWM> {
9591   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9592       ExeDomain = _.ExeDomain in
9593   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9594             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9595             !strconcat(OpcodeStr#_.Suffix,
9596             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9597             [(set _.RC:$dst, MaskRC:$mask_wb,
9598               (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
9599                      vectoraddr:$src2))]>, EVEX, EVEX_K,
9600              EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9601 }
9602
9603 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9604                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9605   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9606                                       vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9607   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9608                                       vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
9609 let Predicates = [HasVLX] in {
9610   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9611                               vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9612   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9613                               vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9614   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9615                               vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9616   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9617                               vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9618 }
9619 }
9620
9621 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9622                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9623   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9624                                        mgatherv16i32>, EVEX_V512;
9625   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9626                                        mgatherv8i64>, EVEX_V512;
9627 let Predicates = [HasVLX] in {
9628   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9629                                           vy256xmem, mgatherv8i32>, EVEX_V256;
9630   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9631                                           vy128xmem, mgatherv4i64>, EVEX_V256;
9632   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9633                                           vx128xmem, mgatherv4i32>, EVEX_V128;
9634   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9635                                           vx64xmem, mgatherv2i64, VK2WM>,
9636                                           EVEX_V128;
9637 }
9638 }
9639
9640
9641 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9642                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9643
9644 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9645                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9646
9647 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9648                           X86MemOperand memop, PatFrag ScatterNode,
9649                           RegisterClass MaskRC = _.KRCWM> {
9650
9651 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9652
9653   def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9654             (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9655             !strconcat(OpcodeStr#_.Suffix,
9656             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9657             [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9658                                     MaskRC:$mask,  vectoraddr:$dst))]>,
9659             EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9660             Sched<[WriteStore]>;
9661 }
9662
9663 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9664                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9665   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9666                                       vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9667   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9668                                       vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
9669 let Predicates = [HasVLX] in {
9670   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9671                               vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9672   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9673                               vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9674   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9675                               vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9676   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9677                               vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9678 }
9679 }
9680
9681 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9682                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9683   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9684                                        mscatterv16i32>, EVEX_V512;
9685   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9686                                        mscatterv8i64>, EVEX_V512;
9687 let Predicates = [HasVLX] in {
9688   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9689                                           vy256xmem, mscatterv8i32>, EVEX_V256;
9690   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9691                                           vy128xmem, mscatterv4i64>, EVEX_V256;
9692   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9693                                           vx128xmem, mscatterv4i32>, EVEX_V128;
9694   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9695                                           vx64xmem, mscatterv2i64, VK2WM>,
9696                                           EVEX_V128;
9697 }
9698 }
9699
9700 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9701                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9702
9703 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9704                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9705
9706 // prefetch
9707 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9708                        RegisterClass KRC, X86MemOperand memop> {
9709   let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9710   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9711             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9712             EVEX, EVEX_K, Sched<[WriteLoad]>;
9713 }
9714
9715 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9716                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9717
9718 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9719                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9720
9721 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9722                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9723
9724 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9725                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9726
9727 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9728                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9729
9730 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9731                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9732
9733 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9734                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9735
9736 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9737                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9738
9739 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9740                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9741
9742 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9743                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9744
9745 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9746                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9747
9748 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9749                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9750
9751 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9752                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9753
9754 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9755                      VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9756
9757 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9758                      VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9759
9760 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9761                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9762
9763 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9764 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9765                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9766                   [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9767                   EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9768
9769 // Also need a pattern for anyextend.
9770 def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9771           (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9772 }
9773
9774 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9775                                  string OpcodeStr, Predicate prd> {
9776 let Predicates = [prd] in
9777   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9778
9779   let Predicates = [prd, HasVLX] in {
9780     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9781     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9782   }
9783 }
9784
9785 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9786 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9787 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9788 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9789
9790 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9791     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9792                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9793                         [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9794                         EVEX, Sched<[WriteMove]>;
9795 }
9796
9797 // Use 512bit version to implement 128/256 bit in case NoVLX.
9798 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9799                                            X86VectorVTInfo _,
9800                                            string Name> {
9801
9802   def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9803             (_.KVT (COPY_TO_REGCLASS
9804                      (!cast<Instruction>(Name#"Zrr")
9805                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9806                                       _.RC:$src, _.SubRegIdx)),
9807                    _.KRC))>;
9808 }
9809
9810 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9811                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9812   let Predicates = [prd] in
9813     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9814                                             EVEX_V512;
9815
9816   let Predicates = [prd, HasVLX] in {
9817     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9818                                               EVEX_V256;
9819     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9820                                                EVEX_V128;
9821   }
9822   let Predicates = [prd, NoVLX] in {
9823     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9824     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9825   }
9826 }
9827
9828 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9829                                               avx512vl_i8_info, HasBWI>;
9830 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9831                                               avx512vl_i16_info, HasBWI>, VEX_W;
9832 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9833                                               avx512vl_i32_info, HasDQI>;
9834 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9835                                               avx512vl_i64_info, HasDQI>, VEX_W;
9836
9837 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9838 // is available, but BWI is not. We can't handle this in lowering because
9839 // a target independent DAG combine likes to combine sext and trunc.
9840 let Predicates = [HasDQI, NoBWI] in {
9841   def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9842             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9843   def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9844             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9845
9846   def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9847             (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9848   def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9849             (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9850 }
9851
9852 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9853   def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9854             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9855
9856   def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9857             (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9858 }
9859
9860 //===----------------------------------------------------------------------===//
9861 // AVX-512 - COMPRESS and EXPAND
9862 //
9863
9864 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9865                                  string OpcodeStr, X86FoldableSchedWrite sched> {
9866   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9867               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9868               (null_frag)>, AVX5128IBase,
9869               Sched<[sched]>;
9870
9871   let mayStore = 1, hasSideEffects = 0 in
9872   def mr : AVX5128I<opc, MRMDestMem, (outs),
9873               (ins _.MemOp:$dst, _.RC:$src),
9874               OpcodeStr # "\t{$src, $dst|$dst, $src}",
9875               []>, EVEX_CD8<_.EltSize, CD8VT1>,
9876               Sched<[sched.Folded]>;
9877
9878   def mrk : AVX5128I<opc, MRMDestMem, (outs),
9879               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9880               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9881               []>,
9882               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9883               Sched<[sched.Folded]>;
9884 }
9885
9886 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9887   def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9888             (!cast<Instruction>(Name#_.ZSuffix##mrk)
9889                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9890
9891   def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9892             (!cast<Instruction>(Name#_.ZSuffix##rrk)
9893                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9894   def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9895             (!cast<Instruction>(Name#_.ZSuffix##rrkz)
9896                             _.KRCWM:$mask, _.RC:$src)>;
9897 }
9898
9899 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9900                                  X86FoldableSchedWrite sched,
9901                                  AVX512VLVectorVTInfo VTInfo,
9902                                  Predicate Pred = HasAVX512> {
9903   let Predicates = [Pred] in
9904   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9905            compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9906
9907   let Predicates = [Pred, HasVLX] in {
9908     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9909                 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9910     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9911                 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9912   }
9913 }
9914
9915 // FIXME: Is there a better scheduler class for VPCOMPRESS?
9916 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9917                                           avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9918 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9919                                           avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9920 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9921                                           avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9922 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9923                                           avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9924
9925 // expand
9926 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9927                                  string OpcodeStr, X86FoldableSchedWrite sched> {
9928   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9929               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9930               (null_frag)>, AVX5128IBase,
9931               Sched<[sched]>;
9932
9933   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9934               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9935               (null_frag)>,
9936             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9937             Sched<[sched.Folded, sched.ReadAfterFold]>;
9938 }
9939
9940 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9941
9942   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9943             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9944                                         _.KRCWM:$mask, addr:$src)>;
9945
9946   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
9947             (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9948                                         _.KRCWM:$mask, addr:$src)>;
9949
9950   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9951                                                (_.VT _.RC:$src0))),
9952             (!cast<Instruction>(Name#_.ZSuffix##rmk)
9953                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9954
9955   def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9956             (!cast<Instruction>(Name#_.ZSuffix##rrk)
9957                             _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9958   def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9959             (!cast<Instruction>(Name#_.ZSuffix##rrkz)
9960                             _.KRCWM:$mask, _.RC:$src)>;
9961 }
9962
9963 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
9964                                X86FoldableSchedWrite sched,
9965                                AVX512VLVectorVTInfo VTInfo,
9966                                Predicate Pred = HasAVX512> {
9967   let Predicates = [Pred] in
9968   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
9969            expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9970
9971   let Predicates = [Pred, HasVLX] in {
9972     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
9973                 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9974     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
9975                 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9976   }
9977 }
9978
9979 // FIXME: Is there a better scheduler class for VPEXPAND?
9980 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
9981                                       avx512vl_i32_info>, EVEX;
9982 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
9983                                       avx512vl_i64_info>, EVEX, VEX_W;
9984 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
9985                                       avx512vl_f32_info>, EVEX;
9986 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
9987                                       avx512vl_f64_info>, EVEX, VEX_W;
9988
9989 //handle instruction  reg_vec1 = op(reg_vec,imm)
9990 //                               op(mem_vec,imm)
9991 //                               op(broadcast(eltVt),imm)
9992 //all instruction created with FROUND_CURRENT
9993 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9994                                       X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9995   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9996   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9997                       (ins _.RC:$src1, i32u8imm:$src2),
9998                       OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9999                       (OpNode (_.VT _.RC:$src1),
10000                               (i32 timm:$src2))>, Sched<[sched]>;
10001   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10002                     (ins _.MemOp:$src1, i32u8imm:$src2),
10003                     OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10004                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10005                             (i32 timm:$src2))>,
10006                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10007   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10008                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10009                     OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10010                     "${src1}"##_.BroadcastStr##", $src2",
10011                     (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10012                             (i32 timm:$src2))>, EVEX_B,
10013                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10014   }
10015 }
10016
10017 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10018 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10019                                           SDNode OpNode, X86FoldableSchedWrite sched,
10020                                           X86VectorVTInfo _> {
10021   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10022   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10023                       (ins _.RC:$src1, i32u8imm:$src2),
10024                       OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10025                       "$src1, {sae}, $src2",
10026                       (OpNode (_.VT _.RC:$src1),
10027                               (i32 timm:$src2))>,
10028                       EVEX_B, Sched<[sched]>;
10029 }
10030
10031 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10032             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10033             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10034   let Predicates = [prd] in {
10035     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10036                                            _.info512>,
10037                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10038                                                sched.ZMM, _.info512>, EVEX_V512;
10039   }
10040   let Predicates = [prd, HasVLX] in {
10041     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10042                                            _.info128>, EVEX_V128;
10043     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10044                                            _.info256>, EVEX_V256;
10045   }
10046 }
10047
10048 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10049 //                               op(reg_vec2,mem_vec,imm)
10050 //                               op(reg_vec2,broadcast(eltVt),imm)
10051 //all instruction created with FROUND_CURRENT
10052 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10053                                 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10054   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10055   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10056                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10057                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10058                       (OpNode (_.VT _.RC:$src1),
10059                               (_.VT _.RC:$src2),
10060                               (i32 timm:$src3))>,
10061                       Sched<[sched]>;
10062   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10063                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10064                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10065                     (OpNode (_.VT _.RC:$src1),
10066                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
10067                             (i32 timm:$src3))>,
10068                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10069   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10070                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10071                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10072                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10073                     (OpNode (_.VT _.RC:$src1),
10074                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10075                             (i32 timm:$src3))>, EVEX_B,
10076                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10077   }
10078 }
10079
10080 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10081 //                               op(reg_vec2,mem_vec,imm)
10082 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10083                               X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10084                               X86VectorVTInfo SrcInfo>{
10085   let ExeDomain = DestInfo.ExeDomain in {
10086   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10087                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10088                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10089                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10090                                (SrcInfo.VT SrcInfo.RC:$src2),
10091                                (i8 timm:$src3)))>,
10092                   Sched<[sched]>;
10093   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10094                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10095                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10096                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10097                              (SrcInfo.VT (bitconvert
10098                                                 (SrcInfo.LdFrag addr:$src2))),
10099                              (i8 timm:$src3)))>,
10100                 Sched<[sched.Folded, sched.ReadAfterFold]>;
10101   }
10102 }
10103
10104 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10105 //                               op(reg_vec2,mem_vec,imm)
10106 //                               op(reg_vec2,broadcast(eltVt),imm)
10107 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10108                            X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10109   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10110
10111   let ExeDomain = _.ExeDomain in
10112   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10113                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10114                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10115                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10116                     (OpNode (_.VT _.RC:$src1),
10117                             (_.VT (_.BroadcastLdFrag addr:$src2)),
10118                             (i8 timm:$src3))>, EVEX_B,
10119                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10120 }
10121
10122 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10123 //                                      op(reg_vec2,mem_scalar,imm)
10124 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10125                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10126   let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10127   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10128                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10129                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10130                       (OpNode (_.VT _.RC:$src1),
10131                               (_.VT _.RC:$src2),
10132                               (i32 timm:$src3))>,
10133                       Sched<[sched]>;
10134   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10135                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10136                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10137                     (OpNode (_.VT _.RC:$src1),
10138                             (_.VT _.ScalarIntMemCPat:$src2),
10139                             (i32 timm:$src3))>,
10140                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10141   }
10142 }
10143
10144 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10145 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10146                                     SDNode OpNode, X86FoldableSchedWrite sched,
10147                                     X86VectorVTInfo _> {
10148   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10149   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10150                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10151                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10152                       "$src1, $src2, {sae}, $src3",
10153                       (OpNode (_.VT _.RC:$src1),
10154                               (_.VT _.RC:$src2),
10155                               (i32 timm:$src3))>,
10156                       EVEX_B, Sched<[sched]>;
10157 }
10158
10159 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10160 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10161                                     X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10162   let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10163   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10164                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10165                       OpcodeStr, "$src3, {sae}, $src2, $src1",
10166                       "$src1, $src2, {sae}, $src3",
10167                       (OpNode (_.VT _.RC:$src1),
10168                               (_.VT _.RC:$src2),
10169                               (i32 timm:$src3))>,
10170                       EVEX_B, Sched<[sched]>;
10171 }
10172
10173 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10174             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10175             SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10176   let Predicates = [prd] in {
10177     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10178                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10179                                   EVEX_V512;
10180
10181   }
10182   let Predicates = [prd, HasVLX] in {
10183     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10184                                   EVEX_V128;
10185     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10186                                   EVEX_V256;
10187   }
10188 }
10189
10190 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10191                    X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10192                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10193   let Predicates = [Pred] in {
10194     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10195                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10196   }
10197   let Predicates = [Pred, HasVLX] in {
10198     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10199                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10200     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10201                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10202   }
10203 }
10204
10205 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10206                                   bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10207                                   Predicate Pred = HasAVX512> {
10208   let Predicates = [Pred] in {
10209     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10210                                 EVEX_V512;
10211   }
10212   let Predicates = [Pred, HasVLX] in {
10213     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10214                                 EVEX_V128;
10215     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10216                                 EVEX_V256;
10217   }
10218 }
10219
10220 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10221                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10222                   SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10223   let Predicates = [prd] in {
10224      defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10225               avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10226   }
10227 }
10228
10229 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10230                     bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10231                     SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10232   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10233                             opcPs, OpNode, OpNodeSAE, sched, prd>,
10234                             EVEX_CD8<32, CD8VF>;
10235   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10236                             opcPd, OpNode, OpNodeSAE, sched, prd>,
10237                             EVEX_CD8<64, CD8VF>, VEX_W;
10238 }
10239
10240 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10241                               X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10242                               AVX512AIi8Base, EVEX;
10243 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10244                               X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10245                               AVX512AIi8Base, EVEX;
10246 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10247                               X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10248                               AVX512AIi8Base, EVEX;
10249
10250 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10251                                                 0x50, X86VRange, X86VRangeSAE,
10252                                                 SchedWriteFAdd, HasDQI>,
10253       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10254 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10255                                                 0x50, X86VRange, X86VRangeSAE,
10256                                                 SchedWriteFAdd, HasDQI>,
10257       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10258
10259 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10260       f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10261       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10262 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10263       0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10264       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10265
10266 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10267       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10268       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10269 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10270       0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10271       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10272
10273 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10274       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10275       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10276 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10277       0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10278       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10279
10280 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10281                                           X86FoldableSchedWrite sched,
10282                                           X86VectorVTInfo _,
10283                                           X86VectorVTInfo CastInfo,
10284                                           string EVEX2VEXOvrd> {
10285   let ExeDomain = _.ExeDomain in {
10286   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10287                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10288                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10289                   (_.VT (bitconvert
10290                          (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10291                                                   (i8 timm:$src3)))))>,
10292                   Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10293   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10294                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10295                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10296                 (_.VT
10297                  (bitconvert
10298                   (CastInfo.VT (X86Shuf128 _.RC:$src1,
10299                                            (CastInfo.LdFrag addr:$src2),
10300                                            (i8 timm:$src3)))))>,
10301                 Sched<[sched.Folded, sched.ReadAfterFold]>,
10302                 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10303   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10304                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10305                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10306                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
10307                     (_.VT
10308                      (bitconvert
10309                       (CastInfo.VT
10310                        (X86Shuf128 _.RC:$src1,
10311                                    (_.BroadcastLdFrag addr:$src2),
10312                                    (i8 timm:$src3)))))>, EVEX_B,
10313                     Sched<[sched.Folded, sched.ReadAfterFold]>;
10314   }
10315 }
10316
10317 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10318                                    AVX512VLVectorVTInfo _,
10319                                    AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10320                                    string EVEX2VEXOvrd>{
10321   let Predicates = [HasAVX512] in
10322   defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10323                                           _.info512, CastInfo.info512, "">, EVEX_V512;
10324
10325   let Predicates = [HasAVX512, HasVLX] in
10326   defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10327                                              _.info256, CastInfo.info256,
10328                                              EVEX2VEXOvrd>, EVEX_V256;
10329 }
10330
10331 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10332       avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10333 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10334       avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10335 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10336       avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10337 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10338       avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10339
10340 let Predicates = [HasAVX512] in {
10341 // Provide fallback in case the load node that is used in the broadcast
10342 // patterns above is used by additional users, which prevents the pattern
10343 // selection.
10344 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10345           (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10346                           (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10347                           0)>;
10348 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10349           (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10350                           (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10351                           0)>;
10352
10353 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10354           (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10355                           (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10356                           0)>;
10357 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10358           (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10359                           (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10360                           0)>;
10361
10362 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10363           (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10364                           (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10365                           0)>;
10366
10367 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10368           (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10369                           (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10370                           0)>;
10371 }
10372
10373 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10374                          X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10375   // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10376   // instantiation of this class.
10377   let ExeDomain = _.ExeDomain in {
10378   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10379                   (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10380                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10381                   (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10382                   Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10383   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10384                 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10385                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10386                 (_.VT (X86VAlign _.RC:$src1,
10387                                  (bitconvert (_.LdFrag addr:$src2)),
10388                                  (i8 timm:$src3)))>,
10389                 Sched<[sched.Folded, sched.ReadAfterFold]>,
10390                 EVEX2VEXOverride<"VPALIGNRrmi">;
10391
10392   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10393                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10394                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10395                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10396                    (X86VAlign _.RC:$src1,
10397                               (_.VT (_.BroadcastLdFrag addr:$src2)),
10398                               (i8 timm:$src3))>, EVEX_B,
10399                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10400   }
10401 }
10402
10403 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10404                                 AVX512VLVectorVTInfo _> {
10405   let Predicates = [HasAVX512] in {
10406     defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10407                                 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10408   }
10409   let Predicates = [HasAVX512, HasVLX] in {
10410     defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10411                                 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10412     // We can't really override the 256-bit version so change it back to unset.
10413     let EVEX2VEXOverride = ? in
10414     defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10415                                 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10416   }
10417 }
10418
10419 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10420                                    avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10421 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10422                                    avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10423                                    VEX_W;
10424
10425 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10426                                          SchedWriteShuffle, avx512vl_i8_info,
10427                                          avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10428
10429 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10430 // into vpalignr.
10431 def ValignqImm32XForm : SDNodeXForm<timm, [{
10432   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10433 }]>;
10434 def ValignqImm8XForm : SDNodeXForm<timm, [{
10435   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10436 }]>;
10437 def ValigndImm8XForm : SDNodeXForm<timm, [{
10438   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10439 }]>;
10440
10441 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10442                                         X86VectorVTInfo From, X86VectorVTInfo To,
10443                                         SDNodeXForm ImmXForm> {
10444   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10445                             (bitconvert
10446                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10447                                               timm:$src3))),
10448                             To.RC:$src0)),
10449             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10450                                                   To.RC:$src1, To.RC:$src2,
10451                                                   (ImmXForm timm:$src3))>;
10452
10453   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10454                             (bitconvert
10455                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10456                                               timm:$src3))),
10457                             To.ImmAllZerosV)),
10458             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10459                                                    To.RC:$src1, To.RC:$src2,
10460                                                    (ImmXForm timm:$src3))>;
10461
10462   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10463                             (bitconvert
10464                              (From.VT (OpNode From.RC:$src1,
10465                                               (From.LdFrag addr:$src2),
10466                                       timm:$src3))),
10467                             To.RC:$src0)),
10468             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10469                                                   To.RC:$src1, addr:$src2,
10470                                                   (ImmXForm timm:$src3))>;
10471
10472   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10473                             (bitconvert
10474                              (From.VT (OpNode From.RC:$src1,
10475                                               (From.LdFrag addr:$src2),
10476                                       timm:$src3))),
10477                             To.ImmAllZerosV)),
10478             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10479                                                    To.RC:$src1, addr:$src2,
10480                                                    (ImmXForm timm:$src3))>;
10481 }
10482
10483 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10484                                            X86VectorVTInfo From,
10485                                            X86VectorVTInfo To,
10486                                            SDNodeXForm ImmXForm> :
10487       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10488   def : Pat<(From.VT (OpNode From.RC:$src1,
10489                              (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10490                              timm:$src3)),
10491             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10492                                                   (ImmXForm timm:$src3))>;
10493
10494   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10495                             (bitconvert
10496                              (From.VT (OpNode From.RC:$src1,
10497                                       (bitconvert
10498                                        (To.VT (To.BroadcastLdFrag addr:$src2))),
10499                                       timm:$src3))),
10500                             To.RC:$src0)),
10501             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10502                                                    To.RC:$src1, addr:$src2,
10503                                                    (ImmXForm timm:$src3))>;
10504
10505   def : Pat<(To.VT (vselect To.KRCWM:$mask,
10506                             (bitconvert
10507                              (From.VT (OpNode From.RC:$src1,
10508                                       (bitconvert
10509                                        (To.VT (To.BroadcastLdFrag addr:$src2))),
10510                                       timm:$src3))),
10511                             To.ImmAllZerosV)),
10512             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10513                                                     To.RC:$src1, addr:$src2,
10514                                                     (ImmXForm timm:$src3))>;
10515 }
10516
10517 let Predicates = [HasAVX512] in {
10518   // For 512-bit we lower to the widest element type we can. So we only need
10519   // to handle converting valignq to valignd.
10520   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10521                                          v16i32_info, ValignqImm32XForm>;
10522 }
10523
10524 let Predicates = [HasVLX] in {
10525   // For 128-bit we lower to the widest element type we can. So we only need
10526   // to handle converting valignq to valignd.
10527   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10528                                          v4i32x_info, ValignqImm32XForm>;
10529   // For 256-bit we lower to the widest element type we can. So we only need
10530   // to handle converting valignq to valignd.
10531   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10532                                          v8i32x_info, ValignqImm32XForm>;
10533 }
10534
10535 let Predicates = [HasVLX, HasBWI] in {
10536   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10537   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10538                                       v16i8x_info, ValignqImm8XForm>;
10539   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10540                                       v16i8x_info, ValigndImm8XForm>;
10541 }
10542
10543 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10544                 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10545                 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10546
10547 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10548                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10549   let ExeDomain = _.ExeDomain in {
10550   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10551                     (ins _.RC:$src1), OpcodeStr,
10552                     "$src1", "$src1",
10553                     (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10554                     Sched<[sched]>;
10555
10556   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10557                   (ins _.MemOp:$src1), OpcodeStr,
10558                   "$src1", "$src1",
10559                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10560             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10561             Sched<[sched.Folded]>;
10562   }
10563 }
10564
10565 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10566                             X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10567            avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10568   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10569                   (ins _.ScalarMemOp:$src1), OpcodeStr,
10570                   "${src1}"##_.BroadcastStr,
10571                   "${src1}"##_.BroadcastStr,
10572                   (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10573              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10574              Sched<[sched.Folded]>;
10575 }
10576
10577 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10578                               X86SchedWriteWidths sched,
10579                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10580   let Predicates = [prd] in
10581     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10582                              EVEX_V512;
10583
10584   let Predicates = [prd, HasVLX] in {
10585     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10586                               EVEX_V256;
10587     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10588                               EVEX_V128;
10589   }
10590 }
10591
10592 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10593                                X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10594                                Predicate prd> {
10595   let Predicates = [prd] in
10596     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10597                               EVEX_V512;
10598
10599   let Predicates = [prd, HasVLX] in {
10600     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10601                                  EVEX_V256;
10602     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10603                                  EVEX_V128;
10604   }
10605 }
10606
10607 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10608                                  SDNode OpNode, X86SchedWriteWidths sched,
10609                                  Predicate prd> {
10610   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10611                                avx512vl_i64_info, prd>, VEX_W;
10612   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10613                                avx512vl_i32_info, prd>;
10614 }
10615
10616 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10617                                  SDNode OpNode, X86SchedWriteWidths sched,
10618                                  Predicate prd> {
10619   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10620                               avx512vl_i16_info, prd>, VEX_WIG;
10621   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10622                               avx512vl_i8_info, prd>, VEX_WIG;
10623 }
10624
10625 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10626                                   bits<8> opc_d, bits<8> opc_q,
10627                                   string OpcodeStr, SDNode OpNode,
10628                                   X86SchedWriteWidths sched> {
10629   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10630                                     HasAVX512>,
10631               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10632                                     HasBWI>;
10633 }
10634
10635 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10636                                     SchedWriteVecALU>;
10637
10638 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10639 let Predicates = [HasAVX512, NoVLX] in {
10640   def : Pat<(v4i64 (abs VR256X:$src)),
10641             (EXTRACT_SUBREG
10642                 (VPABSQZrr
10643                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10644              sub_ymm)>;
10645   def : Pat<(v2i64 (abs VR128X:$src)),
10646             (EXTRACT_SUBREG
10647                 (VPABSQZrr
10648                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10649              sub_xmm)>;
10650 }
10651
10652 // Use 512bit version to implement 128/256 bit.
10653 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10654                                  AVX512VLVectorVTInfo _, Predicate prd> {
10655   let Predicates = [prd, NoVLX] in {
10656     def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10657               (EXTRACT_SUBREG
10658                 (!cast<Instruction>(InstrStr # "Zrr")
10659                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10660                                  _.info256.RC:$src1,
10661                                  _.info256.SubRegIdx)),
10662               _.info256.SubRegIdx)>;
10663
10664     def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10665               (EXTRACT_SUBREG
10666                 (!cast<Instruction>(InstrStr # "Zrr")
10667                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10668                                  _.info128.RC:$src1,
10669                                  _.info128.SubRegIdx)),
10670               _.info128.SubRegIdx)>;
10671   }
10672 }
10673
10674 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10675                                         SchedWriteVecIMul, HasCDI>;
10676
10677 // FIXME: Is there a better scheduler class for VPCONFLICT?
10678 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10679                                         SchedWriteVecALU, HasCDI>;
10680
10681 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10682 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10683 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10684
10685 //===---------------------------------------------------------------------===//
10686 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10687 //===---------------------------------------------------------------------===//
10688
10689 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10690 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10691                                      SchedWriteVecALU, HasVPOPCNTDQ>;
10692
10693 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10694 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10695
10696 //===---------------------------------------------------------------------===//
10697 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10698 //===---------------------------------------------------------------------===//
10699
10700 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10701                             X86SchedWriteWidths sched> {
10702   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10703                                       avx512vl_f32_info, HasAVX512>, XS;
10704 }
10705
10706 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10707                                   SchedWriteFShuffle>;
10708 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10709                                   SchedWriteFShuffle>;
10710
10711 //===----------------------------------------------------------------------===//
10712 // AVX-512 - MOVDDUP
10713 //===----------------------------------------------------------------------===//
10714
10715 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10716                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10717   let ExeDomain = _.ExeDomain in {
10718   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10719                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
10720                    (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10721                    Sched<[sched]>;
10722   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10723                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10724                  (_.VT (_.BroadcastLdFrag addr:$src))>,
10725                  EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10726                  Sched<[sched.Folded]>;
10727   }
10728 }
10729
10730 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10731                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10732   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10733                            VTInfo.info512>, EVEX_V512;
10734
10735   let Predicates = [HasAVX512, HasVLX] in {
10736     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10737                                 VTInfo.info256>, EVEX_V256;
10738     defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10739                                    VTInfo.info128>, EVEX_V128;
10740   }
10741 }
10742
10743 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10744                           X86SchedWriteWidths sched> {
10745   defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10746                                         avx512vl_f64_info>, XD, VEX_W;
10747 }
10748
10749 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10750
10751 let Predicates = [HasVLX] in {
10752 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10753           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10754 def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10755           (VMOVDDUPZ128rm addr:$src)>;
10756 def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10757           (VMOVDDUPZ128rm addr:$src)>;
10758
10759 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10760                    (v2f64 VR128X:$src0)),
10761           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10762                            (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10763 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10764                    immAllZerosV),
10765           (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10766
10767 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10768                    (v2f64 VR128X:$src0)),
10769           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10770 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10771                    immAllZerosV),
10772           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10773
10774 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10775                    (v2f64 VR128X:$src0)),
10776           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10777 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10778                    immAllZerosV),
10779           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10780 }
10781
10782 //===----------------------------------------------------------------------===//
10783 // AVX-512 - Unpack Instructions
10784 //===----------------------------------------------------------------------===//
10785
10786 let Uses = []<Register>, mayRaiseFPException = 0 in {
10787 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10788                                  SchedWriteFShuffleSizes, 0, 1>;
10789 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10790                                  SchedWriteFShuffleSizes>;
10791 }
10792
10793 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10794                                        SchedWriteShuffle, HasBWI>;
10795 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10796                                        SchedWriteShuffle, HasBWI>;
10797 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10798                                        SchedWriteShuffle, HasBWI>;
10799 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10800                                        SchedWriteShuffle, HasBWI>;
10801
10802 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10803                                        SchedWriteShuffle, HasAVX512>;
10804 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10805                                        SchedWriteShuffle, HasAVX512>;
10806 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10807                                         SchedWriteShuffle, HasAVX512>;
10808 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10809                                         SchedWriteShuffle, HasAVX512>;
10810
10811 //===----------------------------------------------------------------------===//
10812 // AVX-512 - Extract & Insert Integer Instructions
10813 //===----------------------------------------------------------------------===//
10814
10815 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10816                                                             X86VectorVTInfo _> {
10817   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10818               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10819               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10820               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10821                        addr:$dst)]>,
10822               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10823 }
10824
10825 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10826   let Predicates = [HasBWI] in {
10827     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10828                   (ins _.RC:$src1, u8imm:$src2),
10829                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10830                   [(set GR32orGR64:$dst,
10831                         (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10832                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10833
10834     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10835   }
10836 }
10837
10838 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10839   let Predicates = [HasBWI] in {
10840     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10841                   (ins _.RC:$src1, u8imm:$src2),
10842                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10843                   [(set GR32orGR64:$dst,
10844                         (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10845                   EVEX, PD, Sched<[WriteVecExtract]>;
10846
10847     let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10848     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10849                    (ins _.RC:$src1, u8imm:$src2),
10850                    OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10851                    EVEX, TAPD, FoldGenData<NAME#rr>,
10852                    Sched<[WriteVecExtract]>;
10853
10854     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10855   }
10856 }
10857
10858 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10859                                                             RegisterClass GRC> {
10860   let Predicates = [HasDQI] in {
10861     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10862                   (ins _.RC:$src1, u8imm:$src2),
10863                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10864                   [(set GRC:$dst,
10865                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10866                   EVEX, TAPD, Sched<[WriteVecExtract]>;
10867
10868     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10869                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10870                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10871                 [(store (extractelt (_.VT _.RC:$src1),
10872                                     imm:$src2),addr:$dst)]>,
10873                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10874                 Sched<[WriteVecExtractSt]>;
10875   }
10876 }
10877
10878 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10879 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10880 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10881 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10882
10883 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10884                                             X86VectorVTInfo _, PatFrag LdFrag> {
10885   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10886       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10887       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10888       [(set _.RC:$dst,
10889           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10890       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10891 }
10892
10893 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10894                                             X86VectorVTInfo _, PatFrag LdFrag> {
10895   let Predicates = [HasBWI] in {
10896     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10897         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10898         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10899         [(set _.RC:$dst,
10900             (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10901         Sched<[WriteVecInsert]>;
10902
10903     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10904   }
10905 }
10906
10907 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10908                                          X86VectorVTInfo _, RegisterClass GRC> {
10909   let Predicates = [HasDQI] in {
10910     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10911         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10912         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10913         [(set _.RC:$dst,
10914             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10915         EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10916
10917     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10918                                     _.ScalarLdFrag>, TAPD;
10919   }
10920 }
10921
10922 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10923                                      extloadi8>, TAPD, VEX_WIG;
10924 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10925                                      extloadi16>, PD, VEX_WIG;
10926 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10927 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10928
10929 //===----------------------------------------------------------------------===//
10930 // VSHUFPS - VSHUFPD Operations
10931 //===----------------------------------------------------------------------===//
10932
10933 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10934                         AVX512VLVectorVTInfo VTInfo_FP>{
10935   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10936                                     SchedWriteFShuffle>,
10937                                     EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10938                                     AVX512AIi8Base, EVEX_4V;
10939 }
10940
10941 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10942 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
10943
10944 //===----------------------------------------------------------------------===//
10945 // AVX-512 - Byte shift Left/Right
10946 //===----------------------------------------------------------------------===//
10947
10948 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
10949 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
10950                                Format MRMm, string OpcodeStr,
10951                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10952   def rr : AVX512<opc, MRMr,
10953              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10954              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10955              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
10956              Sched<[sched]>;
10957   def rm : AVX512<opc, MRMm,
10958            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10959            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10960            [(set _.RC:$dst,(_.VT (OpNode
10961                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
10962                                  (i8 timm:$src2))))]>,
10963            Sched<[sched.Folded, sched.ReadAfterFold]>;
10964 }
10965
10966 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
10967                                    Format MRMm, string OpcodeStr,
10968                                    X86SchedWriteWidths sched, Predicate prd>{
10969   let Predicates = [prd] in
10970     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10971                                  sched.ZMM, v64i8_info>, EVEX_V512;
10972   let Predicates = [prd, HasVLX] in {
10973     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10974                                     sched.YMM, v32i8x_info>, EVEX_V256;
10975     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10976                                     sched.XMM, v16i8x_info>, EVEX_V128;
10977   }
10978 }
10979 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
10980                                        SchedWriteShuffle, HasBWI>,
10981                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
10982 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
10983                                        SchedWriteShuffle, HasBWI>,
10984                                        AVX512PDIi8Base, EVEX_4V, VEX_WIG;
10985
10986 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
10987                                 string OpcodeStr, X86FoldableSchedWrite sched,
10988                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
10989   let isCommutable = 1 in
10990   def rr : AVX512BI<opc, MRMSrcReg,
10991              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
10992              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10993              [(set _dst.RC:$dst,(_dst.VT
10994                                 (OpNode (_src.VT _src.RC:$src1),
10995                                         (_src.VT _src.RC:$src2))))]>,
10996              Sched<[sched]>;
10997   def rm : AVX512BI<opc, MRMSrcMem,
10998            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
10999            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11000            [(set _dst.RC:$dst,(_dst.VT
11001                               (OpNode (_src.VT _src.RC:$src1),
11002                               (_src.VT (bitconvert
11003                                         (_src.LdFrag addr:$src2))))))]>,
11004            Sched<[sched.Folded, sched.ReadAfterFold]>;
11005 }
11006
11007 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11008                                     string OpcodeStr, X86SchedWriteWidths sched,
11009                                     Predicate prd> {
11010   let Predicates = [prd] in
11011     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11012                                   v8i64_info, v64i8_info>, EVEX_V512;
11013   let Predicates = [prd, HasVLX] in {
11014     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11015                                      v4i64x_info, v32i8x_info>, EVEX_V256;
11016     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11017                                      v2i64x_info, v16i8x_info>, EVEX_V128;
11018   }
11019 }
11020
11021 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11022                                         SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11023
11024 // Transforms to swizzle an immediate to enable better matching when
11025 // memory operand isn't in the right place.
11026 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11027   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11028   uint8_t Imm = N->getZExtValue();
11029   // Swap bits 1/4 and 3/6.
11030   uint8_t NewImm = Imm & 0xa5;
11031   if (Imm & 0x02) NewImm |= 0x10;
11032   if (Imm & 0x10) NewImm |= 0x02;
11033   if (Imm & 0x08) NewImm |= 0x40;
11034   if (Imm & 0x40) NewImm |= 0x08;
11035   return getI8Imm(NewImm, SDLoc(N));
11036 }]>;
11037 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11038   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11039   uint8_t Imm = N->getZExtValue();
11040   // Swap bits 2/4 and 3/5.
11041   uint8_t NewImm = Imm & 0xc3;
11042   if (Imm & 0x04) NewImm |= 0x10;
11043   if (Imm & 0x10) NewImm |= 0x04;
11044   if (Imm & 0x08) NewImm |= 0x20;
11045   if (Imm & 0x20) NewImm |= 0x08;
11046   return getI8Imm(NewImm, SDLoc(N));
11047 }]>;
11048 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11049   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11050   uint8_t Imm = N->getZExtValue();
11051   // Swap bits 1/2 and 5/6.
11052   uint8_t NewImm = Imm & 0x99;
11053   if (Imm & 0x02) NewImm |= 0x04;
11054   if (Imm & 0x04) NewImm |= 0x02;
11055   if (Imm & 0x20) NewImm |= 0x40;
11056   if (Imm & 0x40) NewImm |= 0x20;
11057   return getI8Imm(NewImm, SDLoc(N));
11058 }]>;
11059 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11060   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11061   uint8_t Imm = N->getZExtValue();
11062   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11063   uint8_t NewImm = Imm & 0x81;
11064   if (Imm & 0x02) NewImm |= 0x04;
11065   if (Imm & 0x04) NewImm |= 0x10;
11066   if (Imm & 0x08) NewImm |= 0x40;
11067   if (Imm & 0x10) NewImm |= 0x02;
11068   if (Imm & 0x20) NewImm |= 0x08;
11069   if (Imm & 0x40) NewImm |= 0x20;
11070   return getI8Imm(NewImm, SDLoc(N));
11071 }]>;
11072 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11073   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11074   uint8_t Imm = N->getZExtValue();
11075   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11076   uint8_t NewImm = Imm & 0x81;
11077   if (Imm & 0x02) NewImm |= 0x10;
11078   if (Imm & 0x04) NewImm |= 0x02;
11079   if (Imm & 0x08) NewImm |= 0x20;
11080   if (Imm & 0x10) NewImm |= 0x04;
11081   if (Imm & 0x20) NewImm |= 0x40;
11082   if (Imm & 0x40) NewImm |= 0x08;
11083   return getI8Imm(NewImm, SDLoc(N));
11084 }]>;
11085
11086 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11087                           X86FoldableSchedWrite sched, X86VectorVTInfo _,
11088                           string Name>{
11089   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11090   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11091                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11092                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11093                       (OpNode (_.VT _.RC:$src1),
11094                               (_.VT _.RC:$src2),
11095                               (_.VT _.RC:$src3),
11096                               (i8 timm:$src4)), 1, 1>,
11097                       AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11098   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11099                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11100                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11101                     (OpNode (_.VT _.RC:$src1),
11102                             (_.VT _.RC:$src2),
11103                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
11104                             (i8 timm:$src4)), 1, 0>,
11105                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11106                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11107   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11108                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11109                     OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11110                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
11111                     (OpNode (_.VT _.RC:$src1),
11112                             (_.VT _.RC:$src2),
11113                             (_.VT (_.BroadcastLdFrag addr:$src3)),
11114                             (i8 timm:$src4)), 1, 0>, EVEX_B,
11115                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11116                     Sched<[sched.Folded, sched.ReadAfterFold]>;
11117   }// Constraints = "$src1 = $dst"
11118
11119   // Additional patterns for matching passthru operand in other positions.
11120   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11121                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11122                    _.RC:$src1)),
11123             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11124              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11125   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11126                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11127                    _.RC:$src1)),
11128             (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11129              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11130
11131   // Additional patterns for matching loads in other positions.
11132   def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11133                           _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11134             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11135                                    addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11136   def : Pat<(_.VT (OpNode _.RC:$src1,
11137                           (bitconvert (_.LdFrag addr:$src3)),
11138                           _.RC:$src2, (i8 timm:$src4))),
11139             (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11140                                    addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11141
11142   // Additional patterns for matching zero masking with loads in other
11143   // positions.
11144   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11145                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11146                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11147                    _.ImmAllZerosV)),
11148             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11149              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11150   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11151                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11152                     _.RC:$src2, (i8 timm:$src4)),
11153                    _.ImmAllZerosV)),
11154             (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11155              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11156
11157   // Additional patterns for matching masked loads with different
11158   // operand orders.
11159   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11160                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11161                     _.RC:$src2, (i8 timm:$src4)),
11162                    _.RC:$src1)),
11163             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11164              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11165   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11166                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11167                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11168                    _.RC:$src1)),
11169             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11170              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11171   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11172                    (OpNode _.RC:$src2, _.RC:$src1,
11173                     (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11174                    _.RC:$src1)),
11175             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11176              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11177   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11178                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11179                     _.RC:$src1, (i8 timm:$src4)),
11180                    _.RC:$src1)),
11181             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11182              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11183   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11184                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
11185                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11186                    _.RC:$src1)),
11187             (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11188              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11189
11190   // Additional patterns for matching broadcasts in other positions.
11191   def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
11192                           _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11193             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11194                                    addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11195   def : Pat<(_.VT (OpNode _.RC:$src1,
11196                           (_.BroadcastLdFrag addr:$src3),
11197                           _.RC:$src2, (i8 timm:$src4))),
11198             (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11199                                    addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11200
11201   // Additional patterns for matching zero masking with broadcasts in other
11202   // positions.
11203   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11204                    (OpNode (_.BroadcastLdFrag addr:$src3),
11205                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11206                    _.ImmAllZerosV)),
11207             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11208              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11209              (VPTERNLOG321_imm8 timm:$src4))>;
11210   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11211                    (OpNode _.RC:$src1,
11212                     (_.BroadcastLdFrag addr:$src3),
11213                     _.RC:$src2, (i8 timm:$src4)),
11214                    _.ImmAllZerosV)),
11215             (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11216              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11217              (VPTERNLOG132_imm8 timm:$src4))>;
11218
11219   // Additional patterns for matching masked broadcasts with different
11220   // operand orders.
11221   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11222                    (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11223                     _.RC:$src2, (i8 timm:$src4)),
11224                    _.RC:$src1)),
11225             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11226              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11227   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11228                    (OpNode (_.BroadcastLdFrag addr:$src3),
11229                     _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11230                    _.RC:$src1)),
11231             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11232              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11233   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11234                    (OpNode _.RC:$src2, _.RC:$src1,
11235                     (_.BroadcastLdFrag addr:$src3),
11236                     (i8 timm:$src4)), _.RC:$src1)),
11237             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11238              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11239   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11240                    (OpNode _.RC:$src2,
11241                     (_.BroadcastLdFrag addr:$src3),
11242                     _.RC:$src1, (i8 timm:$src4)),
11243                    _.RC:$src1)),
11244             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11245              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11246   def : Pat<(_.VT (vselect _.KRCWM:$mask,
11247                    (OpNode (_.BroadcastLdFrag addr:$src3),
11248                     _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11249                    _.RC:$src1)),
11250             (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11251              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11252 }
11253
11254 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11255                                  AVX512VLVectorVTInfo _> {
11256   let Predicates = [HasAVX512] in
11257     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11258                                _.info512, NAME>, EVEX_V512;
11259   let Predicates = [HasAVX512, HasVLX] in {
11260     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11261                                _.info128, NAME>, EVEX_V128;
11262     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11263                                _.info256, NAME>, EVEX_V256;
11264   }
11265 }
11266
11267 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11268                                         avx512vl_i32_info>;
11269 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11270                                         avx512vl_i64_info>, VEX_W;
11271
11272 // Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11273 let Predicates = [HasVLX] in {
11274   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11275                                  (i8 timm:$src4))),
11276             (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11277                                timm:$src4)>;
11278   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11279                                  (loadv16i8 addr:$src3), (i8 timm:$src4))),
11280             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11281                                timm:$src4)>;
11282   def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11283                                  VR128X:$src1, (i8 timm:$src4))),
11284             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11285                                (VPTERNLOG321_imm8 timm:$src4))>;
11286   def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11287                                  VR128X:$src2, (i8 timm:$src4))),
11288             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11289                                (VPTERNLOG132_imm8 timm:$src4))>;
11290
11291   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11292                                  (i8 timm:$src4))),
11293             (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11294                                timm:$src4)>;
11295   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11296                                  (loadv8i16 addr:$src3), (i8 timm:$src4))),
11297             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11298                                timm:$src4)>;
11299   def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11300                                  VR128X:$src1, (i8 timm:$src4))),
11301             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11302                                (VPTERNLOG321_imm8 timm:$src4))>;
11303   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11304                                  VR128X:$src2, (i8 timm:$src4))),
11305             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11306                                (VPTERNLOG132_imm8 timm:$src4))>;
11307
11308   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11309                                  (i8 timm:$src4))),
11310             (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11311                                timm:$src4)>;
11312   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11313                                  (loadv32i8 addr:$src3), (i8 timm:$src4))),
11314             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11315                                timm:$src4)>;
11316   def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11317                                  VR256X:$src1, (i8 timm:$src4))),
11318             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11319                                (VPTERNLOG321_imm8 timm:$src4))>;
11320   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11321                                  VR256X:$src2, (i8 timm:$src4))),
11322             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11323                                (VPTERNLOG132_imm8 timm:$src4))>;
11324
11325   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11326                                   (i8 timm:$src4))),
11327             (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11328                                timm:$src4)>;
11329   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11330                                   (loadv16i16 addr:$src3), (i8 timm:$src4))),
11331             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11332                                timm:$src4)>;
11333   def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11334                                   VR256X:$src1, (i8 timm:$src4))),
11335             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11336                                (VPTERNLOG321_imm8 timm:$src4))>;
11337   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11338                                   VR256X:$src2, (i8 timm:$src4))),
11339             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11340                                (VPTERNLOG132_imm8 timm:$src4))>;
11341 }
11342
11343 let Predicates = [HasAVX512] in {
11344   def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11345                                  (i8 timm:$src4))),
11346             (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11347                             timm:$src4)>;
11348   def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11349                                  (loadv64i8 addr:$src3), (i8 timm:$src4))),
11350             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11351                             timm:$src4)>;
11352   def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11353                                   VR512:$src1, (i8 timm:$src4))),
11354             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11355                             (VPTERNLOG321_imm8 timm:$src4))>;
11356   def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11357                                  VR512:$src2, (i8 timm:$src4))),
11358             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11359                             (VPTERNLOG132_imm8 timm:$src4))>;
11360
11361   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11362                                   (i8 timm:$src4))),
11363             (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11364                             timm:$src4)>;
11365   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11366                                   (loadv32i16 addr:$src3), (i8 timm:$src4))),
11367             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11368                             timm:$src4)>;
11369   def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11370                                   VR512:$src1, (i8 timm:$src4))),
11371             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11372                             (VPTERNLOG321_imm8 timm:$src4))>;
11373   def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11374                                  VR512:$src2, (i8 timm:$src4))),
11375             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11376                             (VPTERNLOG132_imm8 timm:$src4))>;
11377 }
11378
11379 // Patterns to implement vnot using vpternlog instead of creating all ones
11380 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11381 // so that the result is only dependent on src0. But we use the same source
11382 // for all operands to prevent a false dependency.
11383 // TODO: We should maybe have a more generalized algorithm for folding to
11384 // vpternlog.
11385 let Predicates = [HasAVX512] in {
11386   def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11387             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11388   def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11389             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11390   def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11391             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11392   def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11393             (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11394 }
11395
11396 let Predicates = [HasAVX512, NoVLX] in {
11397   def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11398             (EXTRACT_SUBREG
11399              (VPTERNLOGQZrri
11400               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11401               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11402               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11403               (i8 15)), sub_xmm)>;
11404   def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11405             (EXTRACT_SUBREG
11406              (VPTERNLOGQZrri
11407               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11408               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11409               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11410               (i8 15)), sub_xmm)>;
11411   def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11412             (EXTRACT_SUBREG
11413              (VPTERNLOGQZrri
11414               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11415               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11416               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11417               (i8 15)), sub_xmm)>;
11418   def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11419             (EXTRACT_SUBREG
11420              (VPTERNLOGQZrri
11421               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11422               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11423               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11424               (i8 15)), sub_xmm)>;
11425
11426   def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11427             (EXTRACT_SUBREG
11428              (VPTERNLOGQZrri
11429               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11430               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11431               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11432               (i8 15)), sub_ymm)>;
11433   def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11434             (EXTRACT_SUBREG
11435              (VPTERNLOGQZrri
11436               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11437               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11438               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11439               (i8 15)), sub_ymm)>;
11440   def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11441             (EXTRACT_SUBREG
11442              (VPTERNLOGQZrri
11443               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11444               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11445               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11446               (i8 15)), sub_ymm)>;
11447   def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11448             (EXTRACT_SUBREG
11449              (VPTERNLOGQZrri
11450               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11451               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11452               (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11453               (i8 15)), sub_ymm)>;
11454 }
11455
11456 let Predicates = [HasVLX] in {
11457   def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11458             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11459   def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11460             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11461   def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11462             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11463   def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11464             (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11465
11466   def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11467             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11468   def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11469             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11470   def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11471             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11472   def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11473             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11474 }
11475
11476 //===----------------------------------------------------------------------===//
11477 // AVX-512 - FixupImm
11478 //===----------------------------------------------------------------------===//
11479
11480 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11481                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11482                                   X86VectorVTInfo TblVT>{
11483   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11484       Uses = [MXCSR], mayRaiseFPException = 1 in {
11485     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11486                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11487                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11488                         (X86VFixupimm (_.VT _.RC:$src1),
11489                                       (_.VT _.RC:$src2),
11490                                       (TblVT.VT _.RC:$src3),
11491                                       (i32 timm:$src4))>, Sched<[sched]>;
11492     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11493                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11494                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11495                       (X86VFixupimm (_.VT _.RC:$src1),
11496                                     (_.VT _.RC:$src2),
11497                                     (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11498                                     (i32 timm:$src4))>,
11499                       Sched<[sched.Folded, sched.ReadAfterFold]>;
11500     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11501                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11502                     OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11503                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
11504                       (X86VFixupimm (_.VT _.RC:$src1),
11505                                     (_.VT _.RC:$src2),
11506                                     (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11507                                     (i32 timm:$src4))>,
11508                     EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11509   } // Constraints = "$src1 = $dst"
11510 }
11511
11512 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11513                                       X86FoldableSchedWrite sched,
11514                                       X86VectorVTInfo _, X86VectorVTInfo TblVT>
11515   : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11516 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11517   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11518                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11519                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11520                       "$src2, $src3, {sae}, $src4",
11521                       (X86VFixupimmSAE (_.VT _.RC:$src1),
11522                                        (_.VT _.RC:$src2),
11523                                        (TblVT.VT _.RC:$src3),
11524                                        (i32 timm:$src4))>,
11525                       EVEX_B, Sched<[sched]>;
11526   }
11527 }
11528
11529 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11530                                   X86FoldableSchedWrite sched, X86VectorVTInfo _,
11531                                   X86VectorVTInfo _src3VT> {
11532   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11533       ExeDomain = _.ExeDomain in {
11534     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11535                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11536                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11537                       (X86VFixupimms (_.VT _.RC:$src1),
11538                                      (_.VT _.RC:$src2),
11539                                      (_src3VT.VT _src3VT.RC:$src3),
11540                                      (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11541     let Uses = [MXCSR] in
11542     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11543                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11544                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11545                       "$src2, $src3, {sae}, $src4",
11546                       (X86VFixupimmSAEs (_.VT _.RC:$src1),
11547                                         (_.VT _.RC:$src2),
11548                                         (_src3VT.VT _src3VT.RC:$src3),
11549                                         (i32 timm:$src4))>,
11550                       EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11551     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11552                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11553                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11554                      (X86VFixupimms (_.VT _.RC:$src1),
11555                                     (_.VT _.RC:$src2),
11556                                     (_src3VT.VT (scalar_to_vector
11557                                               (_src3VT.ScalarLdFrag addr:$src3))),
11558                                     (i32 timm:$src4))>,
11559                      Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11560   }
11561 }
11562
11563 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11564                                       AVX512VLVectorVTInfo _Vec,
11565                                       AVX512VLVectorVTInfo _Tbl> {
11566   let Predicates = [HasAVX512] in
11567     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11568                                 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11569                                 EVEX_4V, EVEX_V512;
11570   let Predicates = [HasAVX512, HasVLX] in {
11571     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11572                             _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11573                             EVEX_4V, EVEX_V128;
11574     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11575                             _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11576                             EVEX_4V, EVEX_V256;
11577   }
11578 }
11579
11580 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11581                                            SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11582                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11583 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11584                                            SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11585                           AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11586 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11587                          avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11588 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11589                          avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11590
11591 // Patterns used to select SSE scalar fp arithmetic instructions from
11592 // either:
11593 //
11594 // (1) a scalar fp operation followed by a blend
11595 //
11596 // The effect is that the backend no longer emits unnecessary vector
11597 // insert instructions immediately after SSE scalar fp instructions
11598 // like addss or mulss.
11599 //
11600 // For example, given the following code:
11601 //   __m128 foo(__m128 A, __m128 B) {
11602 //     A[0] += B[0];
11603 //     return A;
11604 //   }
11605 //
11606 // Previously we generated:
11607 //   addss %xmm0, %xmm1
11608 //   movss %xmm1, %xmm0
11609 //
11610 // We now generate:
11611 //   addss %xmm1, %xmm0
11612 //
11613 // (2) a vector packed single/double fp operation followed by a vector insert
11614 //
11615 // The effect is that the backend converts the packed fp instruction
11616 // followed by a vector insert into a single SSE scalar fp instruction.
11617 //
11618 // For example, given the following code:
11619 //   __m128 foo(__m128 A, __m128 B) {
11620 //     __m128 C = A + B;
11621 //     return (__m128) {c[0], a[1], a[2], a[3]};
11622 //   }
11623 //
11624 // Previously we generated:
11625 //   addps %xmm0, %xmm1
11626 //   movss %xmm1, %xmm0
11627 //
11628 // We now generate:
11629 //   addss %xmm1, %xmm0
11630
11631 // TODO: Some canonicalization in lowering would simplify the number of
11632 // patterns we have to try to match.
11633 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11634                                            X86VectorVTInfo _, PatLeaf ZeroFP> {
11635   let Predicates = [HasAVX512] in {
11636     // extracted scalar math op with insert via movss
11637     def : Pat<(MoveNode
11638                (_.VT VR128X:$dst),
11639                (_.VT (scalar_to_vector
11640                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11641                           _.FRC:$src)))),
11642               (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11643                (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11644     def : Pat<(MoveNode
11645                (_.VT VR128X:$dst),
11646                (_.VT (scalar_to_vector
11647                       (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11648                           (_.ScalarLdFrag addr:$src))))),
11649               (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11650
11651     // extracted masked scalar math op with insert via movss
11652     def : Pat<(MoveNode (_.VT VR128X:$src1),
11653                (scalar_to_vector
11654                 (X86selects VK1WM:$mask,
11655                             (Op (_.EltVT
11656                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11657                                 _.FRC:$src2),
11658                             _.FRC:$src0))),
11659               (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11660                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11661                VK1WM:$mask, _.VT:$src1,
11662                (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11663     def : Pat<(MoveNode (_.VT VR128X:$src1),
11664                (scalar_to_vector
11665                 (X86selects VK1WM:$mask,
11666                             (Op (_.EltVT
11667                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11668                                 (_.ScalarLdFrag addr:$src2)),
11669                             _.FRC:$src0))),
11670               (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11671                (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11672                VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11673
11674     // extracted masked scalar math op with insert via movss
11675     def : Pat<(MoveNode (_.VT VR128X:$src1),
11676                (scalar_to_vector
11677                 (X86selects VK1WM:$mask,
11678                             (Op (_.EltVT
11679                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11680                                 _.FRC:$src2), (_.EltVT ZeroFP)))),
11681       (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 
11682           VK1WM:$mask, _.VT:$src1,
11683           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11684     def : Pat<(MoveNode (_.VT VR128X:$src1),
11685                (scalar_to_vector
11686                 (X86selects VK1WM:$mask,
11687                             (Op (_.EltVT
11688                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11689                                 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11690       (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11691   }
11692 }
11693
11694 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11695 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11696 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11697 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11698
11699 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11700 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11701 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11702 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11703
11704 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11705                                              SDNode Move, X86VectorVTInfo _> {
11706   let Predicates = [HasAVX512] in {
11707     def : Pat<(_.VT (Move _.VT:$dst,
11708                      (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11709               (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11710   }
11711 }
11712
11713 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11714 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11715
11716 //===----------------------------------------------------------------------===//
11717 // AES instructions
11718 //===----------------------------------------------------------------------===//
11719
11720 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11721   let Predicates = [HasVLX, HasVAES] in {
11722     defm Z128 : AESI_binop_rm_int<Op, OpStr,
11723                                   !cast<Intrinsic>(IntPrefix),
11724                                   loadv2i64, 0, VR128X, i128mem>,
11725                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11726     defm Z256 : AESI_binop_rm_int<Op, OpStr,
11727                                   !cast<Intrinsic>(IntPrefix##"_256"),
11728                                   loadv4i64, 0, VR256X, i256mem>,
11729                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11730     }
11731     let Predicates = [HasAVX512, HasVAES] in
11732     defm Z    : AESI_binop_rm_int<Op, OpStr,
11733                                   !cast<Intrinsic>(IntPrefix##"_512"),
11734                                   loadv8i64, 0, VR512, i512mem>,
11735                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11736 }
11737
11738 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11739 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11740 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11741 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11742
11743 //===----------------------------------------------------------------------===//
11744 // PCLMUL instructions - Carry less multiplication
11745 //===----------------------------------------------------------------------===//
11746
11747 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11748 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11749                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11750
11751 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11752 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11753                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11754
11755 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11756                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11757                                 EVEX_CD8<64, CD8VF>, VEX_WIG;
11758 }
11759
11760 // Aliases
11761 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11762 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11763 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11764
11765 //===----------------------------------------------------------------------===//
11766 // VBMI2
11767 //===----------------------------------------------------------------------===//
11768
11769 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11770                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11771   let Constraints = "$src1 = $dst",
11772       ExeDomain   = VTI.ExeDomain in {
11773     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11774                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11775                 "$src3, $src2", "$src2, $src3",
11776                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11777                 AVX512FMA3Base, Sched<[sched]>;
11778     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11779                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11780                 "$src3, $src2", "$src2, $src3",
11781                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11782                         (VTI.VT (VTI.LdFrag addr:$src3))))>,
11783                 AVX512FMA3Base,
11784                 Sched<[sched.Folded, sched.ReadAfterFold]>;
11785   }
11786 }
11787
11788 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11789                                X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11790          : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11791   let Constraints = "$src1 = $dst",
11792       ExeDomain   = VTI.ExeDomain in
11793   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11794               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11795               "${src3}"##VTI.BroadcastStr##", $src2",
11796               "$src2, ${src3}"##VTI.BroadcastStr,
11797               (OpNode VTI.RC:$src1, VTI.RC:$src2,
11798                (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11799               AVX512FMA3Base, EVEX_B,
11800               Sched<[sched.Folded, sched.ReadAfterFold]>;
11801 }
11802
11803 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11804                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11805   let Predicates = [HasVBMI2] in
11806   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11807                                    EVEX_V512;
11808   let Predicates = [HasVBMI2, HasVLX] in {
11809     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11810                                    EVEX_V256;
11811     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11812                                    EVEX_V128;
11813   }
11814 }
11815
11816 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11817                                       X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11818   let Predicates = [HasVBMI2] in
11819   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11820                                     EVEX_V512;
11821   let Predicates = [HasVBMI2, HasVLX] in {
11822     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11823                                     EVEX_V256;
11824     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11825                                     EVEX_V128;
11826   }
11827 }
11828 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11829                            SDNode OpNode, X86SchedWriteWidths sched> {
11830   defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11831              avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11832   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11833              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11834   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11835              avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11836 }
11837
11838 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11839                            SDNode OpNode, X86SchedWriteWidths sched> {
11840   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11841              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11842              VEX_W, EVEX_CD8<16, CD8VF>;
11843   defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11844              OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11845   defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11846              sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11847 }
11848
11849 // Concat & Shift
11850 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11851 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11852 defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11853 defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11854
11855 // Compress
11856 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11857                                          avx512vl_i8_info, HasVBMI2>, EVEX,
11858                                          NotMemoryFoldable;
11859 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11860                                           avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11861                                           NotMemoryFoldable;
11862 // Expand
11863 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11864                                       avx512vl_i8_info, HasVBMI2>, EVEX;
11865 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11866                                       avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11867
11868 //===----------------------------------------------------------------------===//
11869 // VNNI
11870 //===----------------------------------------------------------------------===//
11871
11872 let Constraints = "$src1 = $dst" in
11873 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11874                     X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11875                     bit IsCommutable> {
11876   let ExeDomain = VTI.ExeDomain in {
11877   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11878                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11879                                    "$src3, $src2", "$src2, $src3",
11880                                    (VTI.VT (OpNode VTI.RC:$src1,
11881                                             VTI.RC:$src2, VTI.RC:$src3)),
11882                                    IsCommutable, IsCommutable>,
11883                                    EVEX_4V, T8PD, Sched<[sched]>;
11884   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11885                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11886                                    "$src3, $src2", "$src2, $src3",
11887                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11888                                             (VTI.VT (VTI.LdFrag addr:$src3))))>,
11889                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11890                                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11891   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11892                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11893                                    OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11894                                    "$src2, ${src3}"##VTI.BroadcastStr,
11895                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
11896                                     (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11897                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11898                                    T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11899   }
11900 }
11901
11902 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11903                        X86SchedWriteWidths sched, bit IsCommutable> {
11904   let Predicates = [HasVNNI] in
11905   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
11906                            IsCommutable>, EVEX_V512;
11907   let Predicates = [HasVNNI, HasVLX] in {
11908     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
11909                            IsCommutable>, EVEX_V256;
11910     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
11911                            IsCommutable>, EVEX_V128;
11912   }
11913 }
11914
11915 // FIXME: Is there a better scheduler class for VPDP?
11916 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
11917 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
11918 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
11919 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
11920
11921 def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
11922                              (X86vpmaddwd node:$lhs, node:$rhs), [{
11923   return N->hasOneUse();
11924 }]>;
11925
11926 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
11927 let Predicates = [HasVNNI] in {
11928   def : Pat<(v16i32 (add VR512:$src1,
11929                          (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
11930             (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
11931   def : Pat<(v16i32 (add VR512:$src1,
11932                          (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
11933             (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
11934 }
11935 let Predicates = [HasVNNI,HasVLX] in {
11936   def : Pat<(v8i32 (add VR256X:$src1,
11937                         (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
11938             (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
11939   def : Pat<(v8i32 (add VR256X:$src1,
11940                         (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
11941             (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
11942   def : Pat<(v4i32 (add VR128X:$src1,
11943                         (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
11944             (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
11945   def : Pat<(v4i32 (add VR128X:$src1,
11946                         (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
11947             (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
11948 }
11949
11950 //===----------------------------------------------------------------------===//
11951 // Bit Algorithms
11952 //===----------------------------------------------------------------------===//
11953
11954 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11955 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11956                                    avx512vl_i8_info, HasBITALG>;
11957 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11958                                    avx512vl_i16_info, HasBITALG>, VEX_W;
11959
11960 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11961 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11962
11963 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11964                                  (X86Vpshufbitqmb node:$src1, node:$src2), [{
11965   return N->hasOneUse();
11966 }]>;
11967
11968 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11969   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11970                                 (ins VTI.RC:$src1, VTI.RC:$src2),
11971                                 "vpshufbitqmb",
11972                                 "$src2, $src1", "$src1, $src2",
11973                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11974                                 (VTI.VT VTI.RC:$src2)),
11975                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11976                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11977                                 Sched<[sched]>;
11978   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11979                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11980                                 "vpshufbitqmb",
11981                                 "$src2, $src1", "$src1, $src2",
11982                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11983                                 (VTI.VT (VTI.LdFrag addr:$src2))),
11984                                 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11985                                 (VTI.VT (VTI.LdFrag addr:$src2)))>,
11986                                 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11987                                 Sched<[sched.Folded, sched.ReadAfterFold]>;
11988 }
11989
11990 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11991   let Predicates = [HasBITALG] in
11992   defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11993   let Predicates = [HasBITALG, HasVLX] in {
11994     defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11995     defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11996   }
11997 }
11998
11999 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12000 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12001
12002 //===----------------------------------------------------------------------===//
12003 // GFNI
12004 //===----------------------------------------------------------------------===//
12005
12006 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12007                                    X86SchedWriteWidths sched> {
12008   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12009   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12010                                 EVEX_V512;
12011   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12012     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12013                                 EVEX_V256;
12014     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12015                                 EVEX_V128;
12016   }
12017 }
12018
12019 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12020                                           SchedWriteVecALU>,
12021                                           EVEX_CD8<8, CD8VF>, T8PD;
12022
12023 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12024                                       X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12025                                       X86VectorVTInfo BcstVTI>
12026            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12027   let ExeDomain = VTI.ExeDomain in
12028   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12029                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12030                 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12031                 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12032                 (OpNode (VTI.VT VTI.RC:$src1),
12033                  (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12034                  (i8 timm:$src3))>, EVEX_B,
12035                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12036 }
12037
12038 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12039                                      X86SchedWriteWidths sched> {
12040   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12041   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12042                                            v64i8_info, v8i64_info>, EVEX_V512;
12043   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12044     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12045                                            v32i8x_info, v4i64x_info>, EVEX_V256;
12046     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12047                                            v16i8x_info, v2i64x_info>, EVEX_V128;
12048   }
12049 }
12050
12051 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12052                          X86GF2P8affineinvqb, SchedWriteVecIMul>,
12053                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12054 defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12055                          X86GF2P8affineqb, SchedWriteVecIMul>,
12056                          EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12057
12058
12059 //===----------------------------------------------------------------------===//
12060 // AVX5124FMAPS
12061 //===----------------------------------------------------------------------===//
12062
12063 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12064     Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12065 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12066                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12067                     "v4fmaddps", "$src3, $src2", "$src2, $src3",
12068                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12069                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12070
12071 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12072                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12073                      "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12074                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12075                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12076
12077 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12078                     (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12079                     "v4fmaddss", "$src3, $src2", "$src2, $src3",
12080                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12081                     Sched<[SchedWriteFMA.Scl.Folded]>;
12082
12083 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12084                      (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12085                      "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12086                      []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12087                      Sched<[SchedWriteFMA.Scl.Folded]>;
12088 }
12089
12090 //===----------------------------------------------------------------------===//
12091 // AVX5124VNNIW
12092 //===----------------------------------------------------------------------===//
12093
12094 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12095     Constraints = "$src1 = $dst" in {
12096 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12097                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12098                      "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12099                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12100                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12101
12102 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12103                      (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12104                      "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12105                      []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12106                      Sched<[SchedWriteFMA.ZMM.Folded]>;
12107 }
12108
12109 let hasSideEffects = 0 in {
12110   let mayStore = 1, SchedRW = [WriteFStoreX] in
12111   def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12112   let mayLoad = 1, SchedRW = [WriteFLoadX] in
12113   def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12114 }
12115
12116 //===----------------------------------------------------------------------===//
12117 // VP2INTERSECT
12118 //===----------------------------------------------------------------------===//
12119
12120 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12121   def rr : I<0x68, MRMSrcReg,
12122                   (outs _.KRPC:$dst),
12123                   (ins _.RC:$src1, _.RC:$src2),
12124                   !strconcat("vp2intersect", _.Suffix,
12125                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12126                   [(set _.KRPC:$dst, (X86vp2intersect
12127                             _.RC:$src1, (_.VT _.RC:$src2)))]>,
12128                   EVEX_4V, T8XD, Sched<[sched]>;
12129
12130   def rm : I<0x68, MRMSrcMem,
12131                   (outs _.KRPC:$dst),
12132                   (ins  _.RC:$src1, _.MemOp:$src2),
12133                   !strconcat("vp2intersect", _.Suffix,
12134                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12135                   [(set _.KRPC:$dst, (X86vp2intersect
12136                             _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12137                   EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12138                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12139
12140   def rmb : I<0x68, MRMSrcMem,
12141                   (outs _.KRPC:$dst),
12142                   (ins _.RC:$src1, _.ScalarMemOp:$src2),
12143                   !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12144                              ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12145                   [(set _.KRPC:$dst, (X86vp2intersect
12146                              _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12147                   EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12148                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12149 }
12150
12151 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12152   let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12153     defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12154
12155   let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12156     defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12157     defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12158   }
12159 }
12160
12161 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12162 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12163
12164 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12165                              X86SchedWriteWidths sched,
12166                              AVX512VLVectorVTInfo _SrcVTInfo,
12167                              AVX512VLVectorVTInfo _DstVTInfo,
12168                              SDNode OpNode, Predicate prd,
12169                              bit IsCommutable = 0> {
12170   let Predicates = [prd] in
12171     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12172                                    _SrcVTInfo.info512, _DstVTInfo.info512,
12173                                    _SrcVTInfo.info512, IsCommutable>,
12174                                    EVEX_V512, EVEX_CD8<32, CD8VF>;
12175   let Predicates = [HasVLX, prd] in {
12176     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12177                                       _SrcVTInfo.info256, _DstVTInfo.info256,
12178                                       _SrcVTInfo.info256, IsCommutable>,
12179                                      EVEX_V256, EVEX_CD8<32, CD8VF>;
12180     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12181                                       _SrcVTInfo.info128, _DstVTInfo.info128,
12182                                       _SrcVTInfo.info128, IsCommutable>,
12183                                       EVEX_V128, EVEX_CD8<32, CD8VF>;
12184   }
12185 }
12186
12187 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12188                                         SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12189                                         avx512vl_f32_info, avx512vl_i16_info,
12190                                         X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12191
12192 // Truncate Float to BFloat16
12193 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12194                              X86SchedWriteWidths sched> {
12195   let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12196     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12197                             X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12198   }
12199   let Predicates = [HasBF16, HasVLX] in {
12200     let Uses = []<Register>, mayRaiseFPException = 0 in {
12201     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12202                                null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12203                                VK4WM>, EVEX_V128;
12204     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12205                                X86cvtneps2bf16,
12206                                sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12207     }
12208
12209     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12210                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12211                     VR128X:$src), 0>;
12212     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12213                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12214                     f128mem:$src), 0, "intel">;
12215     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12216                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12217                     VR256X:$src), 0>;
12218     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12219                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12220                     f256mem:$src), 0, "intel">;
12221   }
12222 }
12223
12224 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12225                                        SchedWriteCvtPD2PS>, T8XS,
12226                                        EVEX_CD8<32, CD8VF>;
12227
12228 let Predicates = [HasBF16, HasVLX] in {
12229   // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12230   // patterns have been disabled with null_frag.
12231   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12232             (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12233   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12234                               VK4WM:$mask),
12235             (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12236   def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12237                               VK4WM:$mask),
12238             (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12239
12240   def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12241             (VCVTNEPS2BF16Z128rm addr:$src)>;
12242   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12243                               VK4WM:$mask),
12244             (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12245   def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12246                               VK4WM:$mask),
12247             (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12248
12249   def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12250                                      (X86VBroadcastld32 addr:$src)))),
12251             (VCVTNEPS2BF16Z128rmb addr:$src)>;
12252   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12253                               (v8i16 VR128X:$src0), VK4WM:$mask),
12254             (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12255   def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12256                               v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12257             (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12258 }
12259
12260 let Constraints = "$src1 = $dst" in {
12261 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12262                               X86FoldableSchedWrite sched,
12263                               X86VectorVTInfo _, X86VectorVTInfo src_v> {
12264   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12265                            (ins _.RC:$src2, _.RC:$src3),
12266                            OpcodeStr, "$src3, $src2", "$src2, $src3",
12267                            (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12268                            EVEX_4V, Sched<[sched]>;
12269
12270   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12271                                (ins _.RC:$src2, _.MemOp:$src3),
12272                                OpcodeStr, "$src3, $src2", "$src2, $src3",
12273                                (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12274                                (src_v.VT (bitconvert
12275                                (src_v.LdFrag addr:$src3)))))>, EVEX_4V,
12276                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12277
12278   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12279                   (ins _.RC:$src2, _.ScalarMemOp:$src3),
12280                   OpcodeStr,
12281                   !strconcat("${src3}", _.BroadcastStr,", $src2"),
12282                   !strconcat("$src2, ${src3}", _.BroadcastStr),
12283                   (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12284                   (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12285                   EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12286
12287 }
12288 } // Constraints = "$src1 = $dst"
12289
12290 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12291                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12292                                  AVX512VLVectorVTInfo src_v, Predicate prd> {
12293   let Predicates = [prd] in {
12294     defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12295                                    src_v.info512>, EVEX_V512;
12296   }
12297   let Predicates = [HasVLX, prd] in {
12298     defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12299                                    src_v.info256>, EVEX_V256;
12300     defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12301                                    src_v.info128>, EVEX_V128;
12302   }
12303 }
12304
12305 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12306                                        avx512vl_f32_info, avx512vl_i32_info,
12307                                        HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;