]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/X86/X86InstrAVX512.td
MFV r328251: 8652 Tautological comparisons with ZPROP_INVAL
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / X86 / X86InstrAVX512.td
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
13 //
14 //===----------------------------------------------------------------------===//
15
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT).  These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22                       string suffix = ""> {
23   RegisterClass RC = rc;
24   ValueType EltVT = eltvt;
25   int NumElts = numelts;
26
27   // Corresponding mask register class.
28   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30   // Corresponding write-mask register class.
31   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
33   // The mask VT.
34   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
35
36   // Suffix used in the instruction mnemonic.
37   string Suffix = suffix;
38
39   // VTName is a string name for vector VT. For vector types it will be
40   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41   // It is a little bit complex for scalar types, where NumElts = 1.
42   // In this case we build v4f32 or v2f64
43   string VTName = "v" # !if (!eq (NumElts, 1),
44                         !if (!eq (EltVT.Size, 32), 4,
45                         !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
46
47   // The vector VT.
48   ValueType VT = !cast<ValueType>(VTName);
49
50   string EltTypeName = !cast<string>(EltVT);
51   // Size of the element type in bits, e.g. 32 for v16i32.
52   string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53   int EltSize = EltVT.Size;
54
55   // "i" for integer types and "f" for floating-point types
56   string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
57
58   // Size of RC in bits, e.g. 512 for VR512.
59   int Size = VT.Size;
60
61   // The corresponding memory operand, e.g. i512mem for VR512.
62   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64   // FP scalar memory operand for intrinsics - ssmem/sdmem.
65   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
67
68   // Load patterns
69   // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70   //       due to load promotion during legalization
71   PatFrag LdFrag = !cast<PatFrag>("load" #
72                                   !if (!eq (TypeVariantName, "i"),
73                                        !if (!eq (Size, 128), "v2i64",
74                                        !if (!eq (Size, 256), "v4i64",
75                                        !if (!eq (Size, 512), "v8i64",
76                                             VTName))), VTName));
77
78   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79                                          !if (!eq (TypeVariantName, "i"),
80                                                !if (!eq (Size, 128), "v2i64",
81                                                !if (!eq (Size, 256), "v4i64",
82                                                !if (!eq (Size, 512), "v8i64",
83                                                    VTName))), VTName));
84
85   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
86
87   ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88                                           !cast<ComplexPattern>("sse_load_f32"),
89                                     !if (!eq (EltTypeName, "f64"),
90                                           !cast<ComplexPattern>("sse_load_f64"),
91                                     ?));
92
93   // The corresponding float type, e.g. v16f32 for v16i32
94   // Note: For EltSize < 32, FloatVT is illegal and TableGen
95   //       fails to compile, so we choose FloatVT = VT
96   ValueType FloatVT = !cast<ValueType>(
97                         !if (!eq (!srl(EltSize,5),0),
98                              VTName,
99                              !if (!eq(TypeVariantName, "i"),
100                                   "v" # NumElts # "f" # EltSize,
101                                   VTName)));
102
103   ValueType IntVT = !cast<ValueType>(
104                         !if (!eq (!srl(EltSize,5),0),
105                              VTName,
106                              !if (!eq(TypeVariantName, "f"),
107                                   "v" # NumElts # "i" # EltSize,
108                                   VTName)));
109   // The string to specify embedded broadcast in assembly.
110   string BroadcastStr = "{1to" # NumElts # "}";
111
112   // 8-bit compressed displacement tuple/subvector format.  This is only
113   // defined for NumElts <= 8.
114   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
116
117   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118                           !if (!eq (Size, 256), sub_ymm, ?));
119
120   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
122                      SSEPackedInt));
123
124   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
125
126   // A vector tye of the same width with element type i64. This is used to
127   // create patterns for logic ops.
128   ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
129
130   // A vector type of the same width with element type i32.  This is used to
131   // create the canonical constant zero node ImmAllZerosV.
132   ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133   dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
134
135   string ZSuffix = !if (!eq (Size, 128), "Z128",
136                    !if (!eq (Size, 256), "Z256", "Z"));
137 }
138
139 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
140 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
141 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
143 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
145
146 // "x" in v32i8x_info means RC = VR256X
147 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
148 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
150 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
151 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
152 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
153
154 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
155 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
156 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
157 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
158 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
159 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
160
161 // We map scalar types to the smallest (128-bit) vector type
162 // with the appropriate element type. This allows to use the same masking logic.
163 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
164 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
165 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
166 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
167
168 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169                            X86VectorVTInfo i128> {
170   X86VectorVTInfo info512 = i512;
171   X86VectorVTInfo info256 = i256;
172   X86VectorVTInfo info128 = i128;
173 }
174
175 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
176                                              v16i8x_info>;
177 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
178                                              v8i16x_info>;
179 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
180                                              v4i32x_info>;
181 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
182                                              v2i64x_info>;
183 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
184                                              v4f32x_info>;
185 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
186                                              v2f64x_info>;
187
188 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
189                        ValueType _vt> {
190   RegisterClass KRC = _krc;
191   RegisterClass KRCWM = _krcwm;
192   ValueType KVT = _vt;
193 }
194
195 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
196 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
202
203 // This multiclass generates the masking variants from the non-masking
204 // variant.  It only provides the assembly pieces for the masking variants.
205 // It assumes custom ISel patterns for masking which can be provided as
206 // template arguments.
207 multiclass AVX512_maskable_custom<bits<8> O, Format F,
208                                   dag Outs,
209                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210                                   string OpcodeStr,
211                                   string AttSrcAsm, string IntelSrcAsm,
212                                   list<dag> Pattern,
213                                   list<dag> MaskingPattern,
214                                   list<dag> ZeroMaskingPattern,
215                                   InstrItinClass itin,
216                                   string MaskingConstraint = "",
217                                   bit IsCommutable = 0,
218                                   bit IsKCommutable = 0> {
219   let isCommutable = IsCommutable in
220     def NAME: AVX512<O, F, Outs, Ins,
221                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222                                      "$dst, "#IntelSrcAsm#"}",
223                        Pattern, itin>;
224
225   // Prefer over VMOV*rrk Pat<>
226   let isCommutable = IsKCommutable in
227     def NAME#k: AVX512<O, F, Outs, MaskingIns,
228                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
229                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
230                        MaskingPattern, itin>,
231               EVEX_K {
232       // In case of the 3src subclass this is overridden with a let.
233       string Constraints = MaskingConstraint;
234     }
235
236   // Zero mask does not add any restrictions to commute operands transformation.
237   // So, it is Ok to use IsCommutable instead of IsKCommutable.
238   let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
239     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
240                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
241                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
242                        ZeroMaskingPattern,
243                        itin>,
244               EVEX_KZ;
245 }
246
247
248 // Common base class of AVX512_maskable and AVX512_maskable_3src.
249 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
250                                   dag Outs,
251                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
252                                   string OpcodeStr,
253                                   string AttSrcAsm, string IntelSrcAsm,
254                                   dag RHS, dag MaskingRHS,
255                                   InstrItinClass itin,
256                                   SDNode Select = vselect,
257                                   string MaskingConstraint = "",
258                                   bit IsCommutable = 0,
259                                   bit IsKCommutable = 0> :
260   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261                          AttSrcAsm, IntelSrcAsm,
262                          [(set _.RC:$dst, RHS)],
263                          [(set _.RC:$dst, MaskingRHS)],
264                          [(set _.RC:$dst,
265                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
266                          itin, MaskingConstraint, IsCommutable,
267                          IsKCommutable>;
268
269 // This multiclass generates the unconditional/non-masking, the masking and
270 // the zero-masking variant of the vector instruction.  In the masking case, the
271 // perserved vector elements come from a new dummy input operand tied to $dst.
272 // This version uses a separate dag for non-masking and masking.
273 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
274                            dag Outs, dag Ins, string OpcodeStr,
275                            string AttSrcAsm, string IntelSrcAsm,
276                            dag RHS, dag MaskRHS,
277                            InstrItinClass itin,
278                            bit IsCommutable = 0, bit IsKCommutable = 0,
279                            SDNode Select = vselect> :
280    AVX512_maskable_custom<O, F, Outs, Ins,
281                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282                           !con((ins _.KRCWM:$mask), Ins),
283                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
284                           [(set _.RC:$dst, RHS)],
285                           [(set _.RC:$dst,
286                               (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
287                           [(set _.RC:$dst,
288                               (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
289                           itin, "$src0 = $dst", IsCommutable, IsKCommutable>;
290
291 // This multiclass generates the unconditional/non-masking, the masking and
292 // the zero-masking variant of the vector instruction.  In the masking case, the
293 // perserved vector elements come from a new dummy input operand tied to $dst.
294 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
295                            dag Outs, dag Ins, string OpcodeStr,
296                            string AttSrcAsm, string IntelSrcAsm,
297                            dag RHS,
298                            InstrItinClass itin,
299                            bit IsCommutable = 0, bit IsKCommutable = 0,
300                            SDNode Select = vselect> :
301    AVX512_maskable_common<O, F, _, Outs, Ins,
302                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
303                           !con((ins _.KRCWM:$mask), Ins),
304                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
305                           (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin,
306                           Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
307
308 // This multiclass generates the unconditional/non-masking, the masking and
309 // the zero-masking variant of the scalar instruction.
310 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
311                            dag Outs, dag Ins, string OpcodeStr,
312                            string AttSrcAsm, string IntelSrcAsm,
313                            dag RHS,
314                            InstrItinClass itin,
315                            bit IsCommutable = 0> :
316    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
317                    RHS, itin, IsCommutable, 0, X86selects>;
318
319 // Similar to AVX512_maskable but in this case one of the source operands
320 // ($src1) is already tied to $dst so we just use that for the preserved
321 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
322 // $src1.
323 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
324                                 dag Outs, dag NonTiedIns, string OpcodeStr,
325                                 string AttSrcAsm, string IntelSrcAsm,
326                                 dag RHS, InstrItinClass itin,
327                                 bit IsCommutable = 0,
328                                 bit IsKCommutable = 0,
329                                 SDNode Select = vselect,
330                                 bit MaskOnly = 0> :
331    AVX512_maskable_common<O, F, _, Outs,
332                           !con((ins _.RC:$src1), NonTiedIns),
333                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
334                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
335                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
336                           !if(MaskOnly, (null_frag), RHS),
337                           (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin,
338                           Select, "", IsCommutable, IsKCommutable>;
339
340 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341                                      dag Outs, dag NonTiedIns, string OpcodeStr,
342                                      string AttSrcAsm, string IntelSrcAsm,
343                                      dag RHS, InstrItinClass itin,
344                                      bit IsCommutable = 0,
345                                      bit IsKCommutable = 0,
346                                      bit MaskOnly = 0> :
347    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
348                         IntelSrcAsm, RHS, itin, IsCommutable, IsKCommutable,
349                         X86selects, MaskOnly>;
350
351 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
352                                   dag Outs, dag Ins,
353                                   string OpcodeStr,
354                                   string AttSrcAsm, string IntelSrcAsm,
355                                   list<dag> Pattern,
356                                   InstrItinClass itin> :
357    AVX512_maskable_custom<O, F, Outs, Ins,
358                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
359                           !con((ins _.KRCWM:$mask), Ins),
360                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
361                           itin, "$src0 = $dst">;
362
363
364 // Instruction with mask that puts result in mask register,
365 // like "compare" and "vptest"
366 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
367                                   dag Outs,
368                                   dag Ins, dag MaskingIns,
369                                   string OpcodeStr,
370                                   string AttSrcAsm, string IntelSrcAsm,
371                                   list<dag> Pattern,
372                                   list<dag> MaskingPattern,
373                                   InstrItinClass itin,
374                                   bit IsCommutable = 0> {
375     let isCommutable = IsCommutable in
376     def NAME: AVX512<O, F, Outs, Ins,
377                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
378                                      "$dst, "#IntelSrcAsm#"}",
379                        Pattern, itin>;
380
381     def NAME#k: AVX512<O, F, Outs, MaskingIns,
382                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
383                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
384                        MaskingPattern, itin>, EVEX_K;
385 }
386
387 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
388                                   dag Outs,
389                                   dag Ins, dag MaskingIns,
390                                   string OpcodeStr,
391                                   string AttSrcAsm, string IntelSrcAsm,
392                                   dag RHS, dag MaskingRHS,
393                                   InstrItinClass itin,
394                                   bit IsCommutable = 0> :
395   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
396                          AttSrcAsm, IntelSrcAsm,
397                          [(set _.KRC:$dst, RHS)],
398                          [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
399
400 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
401                            dag Outs, dag Ins, string OpcodeStr,
402                            string AttSrcAsm, string IntelSrcAsm,
403                            dag RHS, InstrItinClass itin,
404                            bit IsCommutable = 0> :
405    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
406                           !con((ins _.KRCWM:$mask), Ins),
407                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
408                           (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
409
410 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
411                            dag Outs, dag Ins, string OpcodeStr,
412                            string AttSrcAsm, string IntelSrcAsm,
413                            InstrItinClass itin> :
414    AVX512_maskable_custom_cmp<O, F, Outs,
415                              Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
416                              AttSrcAsm, IntelSrcAsm, [],[], itin>;
417
418 // This multiclass generates the unconditional/non-masking, the masking and
419 // the zero-masking variant of the vector instruction.  In the masking case, the
420 // perserved vector elements come from a new dummy input operand tied to $dst.
421 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
422                            dag Outs, dag Ins, string OpcodeStr,
423                            string AttSrcAsm, string IntelSrcAsm,
424                            dag RHS, dag MaskedRHS,
425                            InstrItinClass itin,
426                            bit IsCommutable = 0, SDNode Select = vselect> :
427    AVX512_maskable_custom<O, F, Outs, Ins,
428                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
429                           !con((ins _.KRCWM:$mask), Ins),
430                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
431                           [(set _.RC:$dst, RHS)],
432                           [(set _.RC:$dst,
433                                 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
434                           [(set _.RC:$dst,
435                                 (Select _.KRCWM:$mask, MaskedRHS,
436                                         _.ImmAllZerosV))],
437                           itin, "$src0 = $dst", IsCommutable>;
438
439
440 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
441 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
442 // swizzled by ExecutionDepsFix to pxor.
443 // We set canFoldAsLoad because this can be converted to a constant-pool
444 // load of an all-zeros value if folding it would be beneficial.
445 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
446     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
447 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
448                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
449 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
450                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
451 }
452
453 // Alias instructions that allow VPTERNLOG to be used with a mask to create
454 // a mix of all ones and all zeros elements. This is done this way to force
455 // the same register to be used as input for all three sources.
456 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
457 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
458                                 (ins VK16WM:$mask), "",
459                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
460                                                       (v16i32 immAllOnesV),
461                                                       (v16i32 immAllZerosV)))]>;
462 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
463                                 (ins VK8WM:$mask), "",
464                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
465                                            (bc_v8i64 (v16i32 immAllOnesV)),
466                                            (bc_v8i64 (v16i32 immAllZerosV))))]>;
467 }
468
469 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
470     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
471 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
472                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
473 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
474                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
475 }
476
477 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
478 // This is expanded by ExpandPostRAPseudos.
479 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
480     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
481   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
482                           [(set FR32X:$dst, fp32imm0)]>;
483   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
484                           [(set FR64X:$dst, fpimm0)]>;
485 }
486
487 //===----------------------------------------------------------------------===//
488 // AVX-512 - VECTOR INSERT
489 //
490
491 // Supports two different pattern operators for mask and unmasked ops. Allows
492 // null_frag to be passed for one.
493 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
494                                   X86VectorVTInfo To,
495                                   SDPatternOperator vinsert_insert,
496                                   SDPatternOperator vinsert_for_mask,
497                                   OpndItins itins> {
498   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
499     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
500                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
501                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
502                    "$src3, $src2, $src1", "$src1, $src2, $src3",
503                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
504                                          (From.VT From.RC:$src2),
505                                          (iPTR imm)),
506                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
507                                            (From.VT From.RC:$src2),
508                                            (iPTR imm)), itins.rr>,
509                    AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
510     let mayLoad = 1 in
511     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
512                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
513                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
514                    "$src3, $src2, $src1", "$src1, $src2, $src3",
515                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
516                                (From.VT (bitconvert (From.LdFrag addr:$src2))),
517                                (iPTR imm)),
518                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
519                                (From.VT (bitconvert (From.LdFrag addr:$src2))),
520                                (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V,
521                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
522                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
523   }
524 }
525
526 // Passes the same pattern operator for masked and unmasked ops.
527 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
528                             X86VectorVTInfo To,
529                             SDPatternOperator vinsert_insert,
530                             OpndItins itins> :
531   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>;
532
533 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
534                        X86VectorVTInfo To, PatFrag vinsert_insert,
535                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
536   let Predicates = p in {
537     def : Pat<(vinsert_insert:$ins
538                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
539               (To.VT (!cast<Instruction>(InstrStr#"rr")
540                      To.RC:$src1, From.RC:$src2,
541                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
542
543     def : Pat<(vinsert_insert:$ins
544                   (To.VT To.RC:$src1),
545                   (From.VT (bitconvert (From.LdFrag addr:$src2))),
546                   (iPTR imm)),
547               (To.VT (!cast<Instruction>(InstrStr#"rm")
548                   To.RC:$src1, addr:$src2,
549                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
550   }
551 }
552
553 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
554                             ValueType EltVT64, int Opcode256,
555                             OpndItins itins> {
556
557   let Predicates = [HasVLX] in
558     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
559                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
560                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
561                                  vinsert128_insert, itins>, EVEX_V256;
562
563   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
564                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
565                                  X86VectorVTInfo<16, EltVT32, VR512>,
566                                  vinsert128_insert, itins>, EVEX_V512;
567
568   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
569                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
570                                  X86VectorVTInfo< 8, EltVT64, VR512>,
571                                  vinsert256_insert, itins>, VEX_W, EVEX_V512;
572
573   // Even with DQI we'd like to only use these instructions for masking.
574   let Predicates = [HasVLX, HasDQI] in
575     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
576                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
577                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
578                                    null_frag, vinsert128_insert, itins>,
579                                    VEX_W, EVEX_V256;
580
581   // Even with DQI we'd like to only use these instructions for masking.
582   let Predicates = [HasDQI] in {
583     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
584                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
585                                  X86VectorVTInfo< 8, EltVT64, VR512>,
586                                  null_frag, vinsert128_insert, itins>,
587                                  VEX_W, EVEX_V512;
588
589     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
590                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
591                                    X86VectorVTInfo<16, EltVT32, VR512>,
592                                    null_frag, vinsert256_insert, itins>,
593                                    EVEX_V512;
594   }
595 }
596
597 // FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI?
598 let Sched = WriteFShuffle256 in
599 def AVX512_VINSERTF : OpndItins<
600   IIC_SSE_SHUFP, IIC_SSE_SHUFP
601 >;
602 let Sched = WriteShuffle256 in
603 def AVX512_VINSERTI : OpndItins<
604   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
605 >;
606
607 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>;
608 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>;
609
610 // Codegen pattern with the alternative types,
611 // Even with AVX512DQ we'll still use these for unmasked operations.
612 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
613               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
614 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
615               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
616
617 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
618               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
619 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
620               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
621
622 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
623               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
624 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
625               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
626
627 // Codegen pattern with the alternative types insert VEC128 into VEC256
628 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
629               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
630 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
631               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
632 // Codegen pattern with the alternative types insert VEC128 into VEC512
633 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
634               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
635 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
636                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
637 // Codegen pattern with the alternative types insert VEC256 into VEC512
638 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
639               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
641               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
642
643
644 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
645                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
646                                  PatFrag vinsert_insert,
647                                  SDNodeXForm INSERT_get_vinsert_imm,
648                                  list<Predicate> p> {
649 let Predicates = p in {
650   def : Pat<(Cast.VT
651              (vselect Cast.KRCWM:$mask,
652                       (bitconvert
653                        (vinsert_insert:$ins (To.VT To.RC:$src1),
654                                             (From.VT From.RC:$src2),
655                                             (iPTR imm))),
656                       Cast.RC:$src0)),
657             (!cast<Instruction>(InstrStr#"rrk")
658              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659              (INSERT_get_vinsert_imm To.RC:$ins))>;
660   def : Pat<(Cast.VT
661              (vselect Cast.KRCWM:$mask,
662                       (bitconvert
663                        (vinsert_insert:$ins (To.VT To.RC:$src1),
664                                             (From.VT
665                                              (bitconvert
666                                               (From.LdFrag addr:$src2))),
667                                             (iPTR imm))),
668                       Cast.RC:$src0)),
669             (!cast<Instruction>(InstrStr#"rmk")
670              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
671              (INSERT_get_vinsert_imm To.RC:$ins))>;
672
673   def : Pat<(Cast.VT
674              (vselect Cast.KRCWM:$mask,
675                       (bitconvert
676                        (vinsert_insert:$ins (To.VT To.RC:$src1),
677                                             (From.VT From.RC:$src2),
678                                             (iPTR imm))),
679                       Cast.ImmAllZerosV)),
680             (!cast<Instruction>(InstrStr#"rrkz")
681              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
682              (INSERT_get_vinsert_imm To.RC:$ins))>;
683   def : Pat<(Cast.VT
684              (vselect Cast.KRCWM:$mask,
685                       (bitconvert
686                        (vinsert_insert:$ins (To.VT To.RC:$src1),
687                                             (From.VT
688                                              (bitconvert
689                                               (From.LdFrag addr:$src2))),
690                                             (iPTR imm))),
691                       Cast.ImmAllZerosV)),
692             (!cast<Instruction>(InstrStr#"rmkz")
693              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
694              (INSERT_get_vinsert_imm To.RC:$ins))>;
695 }
696 }
697
698 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
699                              v8f32x_info, vinsert128_insert,
700                              INSERT_get_vinsert128_imm, [HasVLX]>;
701 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
702                              v4f64x_info, vinsert128_insert,
703                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
704
705 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
706                              v8i32x_info, vinsert128_insert,
707                              INSERT_get_vinsert128_imm, [HasVLX]>;
708 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
709                              v8i32x_info, vinsert128_insert,
710                              INSERT_get_vinsert128_imm, [HasVLX]>;
711 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
712                              v8i32x_info, vinsert128_insert,
713                              INSERT_get_vinsert128_imm, [HasVLX]>;
714 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
715                              v4i64x_info, vinsert128_insert,
716                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
717 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
718                              v4i64x_info, vinsert128_insert,
719                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
721                              v4i64x_info, vinsert128_insert,
722                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
723
724 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
725                              v16f32_info, vinsert128_insert,
726                              INSERT_get_vinsert128_imm, [HasAVX512]>;
727 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
728                              v8f64_info, vinsert128_insert,
729                              INSERT_get_vinsert128_imm, [HasDQI]>;
730
731 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
732                              v16i32_info, vinsert128_insert,
733                              INSERT_get_vinsert128_imm, [HasAVX512]>;
734 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
735                              v16i32_info, vinsert128_insert,
736                              INSERT_get_vinsert128_imm, [HasAVX512]>;
737 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
738                              v16i32_info, vinsert128_insert,
739                              INSERT_get_vinsert128_imm, [HasAVX512]>;
740 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
741                              v8i64_info, vinsert128_insert,
742                              INSERT_get_vinsert128_imm, [HasDQI]>;
743 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
744                              v8i64_info, vinsert128_insert,
745                              INSERT_get_vinsert128_imm, [HasDQI]>;
746 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
747                              v8i64_info, vinsert128_insert,
748                              INSERT_get_vinsert128_imm, [HasDQI]>;
749
750 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
751                              v16f32_info, vinsert256_insert,
752                              INSERT_get_vinsert256_imm, [HasDQI]>;
753 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
754                              v8f64_info, vinsert256_insert,
755                              INSERT_get_vinsert256_imm, [HasAVX512]>;
756
757 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
758                              v16i32_info, vinsert256_insert,
759                              INSERT_get_vinsert256_imm, [HasDQI]>;
760 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
761                              v16i32_info, vinsert256_insert,
762                              INSERT_get_vinsert256_imm, [HasDQI]>;
763 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
764                              v16i32_info, vinsert256_insert,
765                              INSERT_get_vinsert256_imm, [HasDQI]>;
766 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
767                              v8i64_info, vinsert256_insert,
768                              INSERT_get_vinsert256_imm, [HasAVX512]>;
769 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
770                              v8i64_info, vinsert256_insert,
771                              INSERT_get_vinsert256_imm, [HasAVX512]>;
772 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
773                              v8i64_info, vinsert256_insert,
774                              INSERT_get_vinsert256_imm, [HasAVX512]>;
775
776 // vinsertps - insert f32 to XMM
777 let ExeDomain = SSEPackedSingle in {
778 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
779       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
780       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
781       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))],
782       IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>;
783 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
784       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
785       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
786       [(set VR128X:$dst, (X86insertps VR128X:$src1,
787                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
788                           imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V,
789       EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>;
790 }
791
792 //===----------------------------------------------------------------------===//
793 // AVX-512 VECTOR EXTRACT
794 //---
795
796 // Supports two different pattern operators for mask and unmasked ops. Allows
797 // null_frag to be passed for one.
798 multiclass vextract_for_size_split<int Opcode,
799                                    X86VectorVTInfo From, X86VectorVTInfo To,
800                                    SDPatternOperator vextract_extract,
801                                    SDPatternOperator vextract_for_mask,
802                                    OpndItins itins> {
803
804   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
805     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
806                 (ins From.RC:$src1, u8imm:$idx),
807                 "vextract" # To.EltTypeName # "x" # To.NumElts,
808                 "$idx, $src1", "$src1, $idx",
809                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
810                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)),
811                 itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>;
812
813     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
814                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
815                     "vextract" # To.EltTypeName # "x" # To.NumElts #
816                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
817                     [(store (To.VT (vextract_extract:$idx
818                                     (From.VT From.RC:$src1), (iPTR imm))),
819                              addr:$dst)], itins.rm>, EVEX,
820                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
821
822     let mayStore = 1, hasSideEffects = 0 in
823     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
824                     (ins To.MemOp:$dst, To.KRCWM:$mask,
825                                         From.RC:$src1, u8imm:$idx),
826                      "vextract" # To.EltTypeName # "x" # To.NumElts #
827                           "\t{$idx, $src1, $dst {${mask}}|"
828                           "$dst {${mask}}, $src1, $idx}",
829                     [], itins.rm>, EVEX_K, EVEX,
830                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
831   }
832 }
833
834 // Passes the same pattern operator for masked and unmasked ops.
835 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
836                              X86VectorVTInfo To,
837                              SDPatternOperator vextract_extract,
838                              OpndItins itins> :
839   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, itins>;
840
841 // Codegen pattern for the alternative types
842 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
843                 X86VectorVTInfo To, PatFrag vextract_extract,
844                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
845   let Predicates = p in {
846      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
847                (To.VT (!cast<Instruction>(InstrStr#"rr")
848                           From.RC:$src1,
849                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
850      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
851                               (iPTR imm))), addr:$dst),
852                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
853                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
854   }
855 }
856
857 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
858                              ValueType EltVT64, int Opcode256,
859                              OpndItins itins> {
860   let Predicates = [HasAVX512] in {
861     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
862                                    X86VectorVTInfo<16, EltVT32, VR512>,
863                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
864                                    vextract128_extract, itins>,
865                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
866     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
867                                    X86VectorVTInfo< 8, EltVT64, VR512>,
868                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
869                                    vextract256_extract, itins>,
870                                        VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
871   }
872   let Predicates = [HasVLX] in
873     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
874                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
875                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
876                                  vextract128_extract, itins>,
877                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
878
879   // Even with DQI we'd like to only use these instructions for masking.
880   let Predicates = [HasVLX, HasDQI] in
881     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
882                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
883                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
884                                  null_frag, vextract128_extract, itins>,
885                                      VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
886
887   // Even with DQI we'd like to only use these instructions for masking.
888   let Predicates = [HasDQI] in {
889     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
890                                  X86VectorVTInfo< 8, EltVT64, VR512>,
891                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
892                                  null_frag, vextract128_extract, itins>,
893                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
894     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
895                                  X86VectorVTInfo<16, EltVT32, VR512>,
896                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
897                                  null_frag, vextract256_extract, itins>,
898                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
899   }
900 }
901
902 // FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI?
903 let Sched = WriteFShuffle256 in
904 def AVX512_VEXTRACTF : OpndItins<
905   IIC_SSE_SHUFP, IIC_SSE_SHUFP
906 >;
907 let Sched = WriteShuffle256 in
908 def AVX512_VEXTRACTI : OpndItins<
909   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
910 >;
911
912 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, AVX512_VEXTRACTF>;
913 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, AVX512_VEXTRACTI>;
914
915 // extract_subvector codegen patterns with the alternative types.
916 // Even with AVX512DQ we'll still use these for unmasked operations.
917 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
918           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
919 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
920           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
921
922 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
923           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
924 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
925           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
926
927 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
928           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
929 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
930           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
931
932 // Codegen pattern with the alternative types extract VEC128 from VEC256
933 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
934           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
935 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
937
938 // Codegen pattern with the alternative types extract VEC128 from VEC512
939 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
940                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
941 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
942                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
943 // Codegen pattern with the alternative types extract VEC256 from VEC512
944 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
945                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
946 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
947                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
948
949
950 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
951 // smaller extract to enable EVEX->VEX.
952 let Predicates = [NoVLX] in {
953 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
954           (v2i64 (VEXTRACTI128rr
955                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
956                   (iPTR 1)))>;
957 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
958           (v2f64 (VEXTRACTF128rr
959                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
960                   (iPTR 1)))>;
961 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
962           (v4i32 (VEXTRACTI128rr
963                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
964                   (iPTR 1)))>;
965 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
966           (v4f32 (VEXTRACTF128rr
967                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
968                   (iPTR 1)))>;
969 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
970           (v8i16 (VEXTRACTI128rr
971                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
972                   (iPTR 1)))>;
973 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
974           (v16i8 (VEXTRACTI128rr
975                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
976                   (iPTR 1)))>;
977 }
978
979 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
980 // smaller extract to enable EVEX->VEX.
981 let Predicates = [HasVLX] in {
982 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
983           (v2i64 (VEXTRACTI32x4Z256rr
984                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
985                   (iPTR 1)))>;
986 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
987           (v2f64 (VEXTRACTF32x4Z256rr
988                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
989                   (iPTR 1)))>;
990 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
991           (v4i32 (VEXTRACTI32x4Z256rr
992                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
993                   (iPTR 1)))>;
994 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
995           (v4f32 (VEXTRACTF32x4Z256rr
996                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
997                   (iPTR 1)))>;
998 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
999           (v8i16 (VEXTRACTI32x4Z256rr
1000                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1001                   (iPTR 1)))>;
1002 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1003           (v16i8 (VEXTRACTI32x4Z256rr
1004                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1005                   (iPTR 1)))>;
1006 }
1007
1008
1009 // Additional patterns for handling a bitcast between the vselect and the
1010 // extract_subvector.
1011 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1012                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
1013                                   PatFrag vextract_extract,
1014                                   SDNodeXForm EXTRACT_get_vextract_imm,
1015                                   list<Predicate> p> {
1016 let Predicates = p in {
1017   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1018                               (bitconvert
1019                                (To.VT (vextract_extract:$ext
1020                                        (From.VT From.RC:$src), (iPTR imm)))),
1021                               To.RC:$src0)),
1022             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1023                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1024                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1025
1026   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1027                               (bitconvert
1028                                (To.VT (vextract_extract:$ext
1029                                        (From.VT From.RC:$src), (iPTR imm)))),
1030                               Cast.ImmAllZerosV)),
1031             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1032                       Cast.KRCWM:$mask, From.RC:$src,
1033                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1034 }
1035 }
1036
1037 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1038                               v4f32x_info, vextract128_extract,
1039                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1041                               v2f64x_info, vextract128_extract,
1042                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043
1044 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1045                               v4i32x_info, vextract128_extract,
1046                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1047 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1048                               v4i32x_info, vextract128_extract,
1049                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1050 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1051                               v4i32x_info, vextract128_extract,
1052                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1053 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1054                               v2i64x_info, vextract128_extract,
1055                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1056 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1057                               v2i64x_info, vextract128_extract,
1058                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1059 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1060                               v2i64x_info, vextract128_extract,
1061                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1062
1063 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1064                               v4f32x_info, vextract128_extract,
1065                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1067                               v2f64x_info, vextract128_extract,
1068                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1069
1070 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1071                               v4i32x_info, vextract128_extract,
1072                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1073 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1074                               v4i32x_info, vextract128_extract,
1075                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1077                               v4i32x_info, vextract128_extract,
1078                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1079 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1080                               v2i64x_info, vextract128_extract,
1081                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1082 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1083                               v2i64x_info, vextract128_extract,
1084                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1085 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1086                               v2i64x_info, vextract128_extract,
1087                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1088
1089 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1090                               v8f32x_info, vextract256_extract,
1091                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1092 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1093                               v4f64x_info, vextract256_extract,
1094                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095
1096 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1097                               v8i32x_info, vextract256_extract,
1098                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1099 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1100                               v8i32x_info, vextract256_extract,
1101                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1102 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1103                               v8i32x_info, vextract256_extract,
1104                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1105 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1106                               v4i64x_info, vextract256_extract,
1107                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1108 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1109                               v4i64x_info, vextract256_extract,
1110                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1111 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1112                               v4i64x_info, vextract256_extract,
1113                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1114
1115 // vextractps - extract 32 bits from XMM
1116 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1117       (ins VR128X:$src1, u8imm:$src2),
1118       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1119       [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))],
1120       IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>;
1121
1122 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1123       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1124       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1125       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1126                           addr:$dst)], IIC_SSE_EXTRACTPS_RM>,
1127       EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>;
1128
1129 //===---------------------------------------------------------------------===//
1130 // AVX-512 BROADCAST
1131 //---
1132 // broadcast with a scalar argument.
1133 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1134                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1135   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136             (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
1137              (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139                                   (X86VBroadcast SrcInfo.FRC:$src),
1140                                   DestInfo.RC:$src0)),
1141             (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
1142              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143              (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145                                   (X86VBroadcast SrcInfo.FRC:$src),
1146                                   DestInfo.ImmAllZerosV)),
1147             (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
1148              DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1149 }
1150
1151 // Split version to allow mask and broadcast node to be different types. This
1152 // helps support the 32x2 broadcasts.
1153 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1154                                      SchedWrite SchedRR, SchedWrite SchedRM,
1155                                      X86VectorVTInfo MaskInfo,
1156                                      X86VectorVTInfo DestInfo,
1157                                      X86VectorVTInfo SrcInfo,
1158                                      SDPatternOperator UnmaskedOp = X86VBroadcast> {
1159   let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1160   defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1161                    (outs MaskInfo.RC:$dst),
1162                    (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1163                    (MaskInfo.VT
1164                     (bitconvert
1165                      (DestInfo.VT
1166                       (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1167                    (MaskInfo.VT
1168                     (bitconvert
1169                      (DestInfo.VT
1170                       (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1171                    NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
1172   let mayLoad = 1 in
1173   defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1174                    (outs MaskInfo.RC:$dst),
1175                    (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1176                    (MaskInfo.VT
1177                     (bitconvert
1178                      (DestInfo.VT (UnmaskedOp
1179                                    (SrcInfo.ScalarLdFrag addr:$src))))),
1180                    (MaskInfo.VT
1181                     (bitconvert
1182                      (DestInfo.VT (X86VBroadcast
1183                                    (SrcInfo.ScalarLdFrag addr:$src))))),
1184                    NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1185                    Sched<[SchedRM]>;
1186   }
1187
1188   def : Pat<(MaskInfo.VT
1189              (bitconvert
1190               (DestInfo.VT (UnmaskedOp
1191                             (SrcInfo.VT (scalar_to_vector
1192                                          (SrcInfo.ScalarLdFrag addr:$src))))))),
1193             (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
1194   def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1195                           (bitconvert
1196                            (DestInfo.VT
1197                             (X86VBroadcast
1198                              (SrcInfo.VT (scalar_to_vector
1199                                           (SrcInfo.ScalarLdFrag addr:$src)))))),
1200                           MaskInfo.RC:$src0)),
1201             (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
1202              MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1203   def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1204                           (bitconvert
1205                            (DestInfo.VT
1206                             (X86VBroadcast
1207                              (SrcInfo.VT (scalar_to_vector
1208                                           (SrcInfo.ScalarLdFrag addr:$src)))))),
1209                           MaskInfo.ImmAllZerosV)),
1210             (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
1211              MaskInfo.KRCWM:$mask, addr:$src)>;
1212 }
1213
1214 // Helper class to force mask and broadcast result to same type.
1215 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1216                                SchedWrite SchedRR, SchedWrite SchedRM,
1217                                X86VectorVTInfo DestInfo,
1218                                X86VectorVTInfo SrcInfo> :
1219   avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1220                             DestInfo, DestInfo, SrcInfo>;
1221
1222 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1223                                                        AVX512VLVectorVTInfo _> {
1224   let Predicates = [HasAVX512] in
1225     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 
1226                                   WriteFShuffle256Ld, _.info512, _.info128>,
1227               avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1228                                       EVEX_V512;
1229
1230   let Predicates = [HasVLX] in {
1231     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1232                                      WriteFShuffle256Ld, _.info256, _.info128>,
1233                  avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1234                                          EVEX_V256;
1235   }
1236 }
1237
1238 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1239                                                        AVX512VLVectorVTInfo _> {
1240   let Predicates = [HasAVX512] in
1241     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1242                                   WriteFShuffle256Ld, _.info512, _.info128>,
1243               avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1244                                EVEX_V512;
1245
1246   let Predicates = [HasVLX] in {
1247     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1248                                      WriteFShuffle256Ld, _.info256, _.info128>,
1249                  avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1250                              EVEX_V256;
1251     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1252                                      WriteFShuffle256Ld, _.info128, _.info128>,
1253                  avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
1254                              EVEX_V128;
1255   }
1256 }
1257 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1258                                        avx512vl_f32_info>;
1259 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1260                                        avx512vl_f64_info>, VEX_W;
1261
1262 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
1263           (VBROADCASTSSZm addr:$src)>;
1264 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
1265           (VBROADCASTSDZm addr:$src)>;
1266
1267 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1269                                     RegisterClass SrcRC> {
1270   let ExeDomain = _.ExeDomain in
1271   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1272                          (ins SrcRC:$src),
1273                          "vpbroadcast"##_.Suffix, "$src", "$src",
1274                          (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
1275                          Sched<[SchedRR]>;
1276 }
1277
1278 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1280                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1281   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282   defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283                         (outs _.RC:$dst), (ins GR32:$src),
1284                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286                         "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287                         NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1288
1289   def : Pat <(_.VT (OpNode SrcRC:$src)),
1290              (!cast<Instruction>(Name#r)
1291               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1292
1293   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294              (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1296
1297   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298              (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1300 }
1301
1302 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305   let Predicates = [prd] in
1306     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307               OpNode, SrcRC, Subreg>, EVEX_V512;
1308   let Predicates = [prd, HasVLX] in {
1309     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1313   }
1314 }
1315
1316 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317                                        SDPatternOperator OpNode,
1318                                        RegisterClass SrcRC, Predicate prd> {
1319   let Predicates = [prd] in
1320     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1321                                       SrcRC>, EVEX_V512;
1322   let Predicates = [prd, HasVLX] in {
1323     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1324                                          SrcRC>, EVEX_V256;
1325     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1326                                          SrcRC>, EVEX_V128;
1327   }
1328 }
1329
1330 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1334                        HasBWI>;
1335 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336                                                  X86VBroadcast, GR32, HasAVX512>;
1337 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338                                                  X86VBroadcast, GR64, HasAVX512>, VEX_W;
1339
1340 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
1341            (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
1342 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
1343            (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
1344
1345 // Provide aliases for broadcast from the same register class that
1346 // automatically does the extract.
1347 multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1348                                             X86VectorVTInfo SrcInfo> {
1349   def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1350             (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1351                 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1352 }
1353
1354 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1355                                         AVX512VLVectorVTInfo _, Predicate prd> {
1356   let Predicates = [prd] in {
1357     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1358                                    WriteShuffle256Ld, _.info512, _.info128>,
1359                avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1360                                   EVEX_V512;
1361     // Defined separately to avoid redefinition.
1362     defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1363   }
1364   let Predicates = [prd, HasVLX] in {
1365     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1366                                     WriteShuffle256Ld, _.info256, _.info128>,
1367                 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1368                                  EVEX_V256;
1369     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1370                                     WriteShuffleLd, _.info128, _.info128>,
1371                                  EVEX_V128;
1372   }
1373 }
1374
1375 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1376                                            avx512vl_i8_info, HasBWI>;
1377 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1378                                            avx512vl_i16_info, HasBWI>;
1379 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1380                                            avx512vl_i32_info, HasAVX512>;
1381 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1382                                            avx512vl_i64_info, HasAVX512>, VEX_W;
1383
1384 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1385                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1386   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1388                            (_Dst.VT (X86SubVBroadcast
1389                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1390                            NoItinerary>, AVX5128IBase, EVEX,
1391                            Sched<[WriteShuffleLd]>;
1392 }
1393
1394 // This should be used for the AVX512DQ broadcast instructions. It disables
1395 // the unmasked patterns so that we only use the DQ instructions when masking
1396 //  is requested.
1397 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1398                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1399   let hasSideEffects = 0, mayLoad = 1 in
1400   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1401                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1402                            (null_frag),
1403                            (_Dst.VT (X86SubVBroadcast
1404                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1405                             NoItinerary>, AVX5128IBase, EVEX,
1406                             Sched<[WriteShuffleLd]>;
1407 }
1408
1409 let Predicates = [HasAVX512] in {
1410   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1411   def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1412             (VPBROADCASTQZm addr:$src)>;
1413 }
1414
1415 let Predicates = [HasVLX] in {
1416   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1417   def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1418             (VPBROADCASTQZ128m addr:$src)>;
1419   def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1420             (VPBROADCASTQZ256m addr:$src)>;
1421 }
1422 let Predicates = [HasVLX, HasBWI] in {
1423   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1424   // This means we'll encounter truncated i32 loads; match that here.
1425   def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1426             (VPBROADCASTWZ128m addr:$src)>;
1427   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1428             (VPBROADCASTWZ256m addr:$src)>;
1429   def : Pat<(v8i16 (X86VBroadcast
1430               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1431             (VPBROADCASTWZ128m addr:$src)>;
1432   def : Pat<(v16i16 (X86VBroadcast
1433               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1434             (VPBROADCASTWZ256m addr:$src)>;
1435 }
1436
1437 //===----------------------------------------------------------------------===//
1438 // AVX-512 BROADCAST SUBVECTORS
1439 //
1440
1441 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1442                        v16i32_info, v4i32x_info>,
1443                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1444 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1445                        v16f32_info, v4f32x_info>,
1446                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1447 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1448                        v8i64_info, v4i64x_info>, VEX_W,
1449                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1450 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1451                        v8f64_info, v4f64x_info>, VEX_W,
1452                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1453
1454 let Predicates = [HasAVX512] in {
1455 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1456           (VBROADCASTF64X4rm addr:$src)>;
1457 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1458           (VBROADCASTI64X4rm addr:$src)>;
1459 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1460           (VBROADCASTI64X4rm addr:$src)>;
1461 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1462           (VBROADCASTI64X4rm addr:$src)>;
1463
1464 // Provide fallback in case the load node that is used in the patterns above
1465 // is used by additional users, which prevents the pattern selection.
1466 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1467           (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468                            (v4f64 VR256X:$src), 1)>;
1469 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1470           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1471                            (v8f32 VR256X:$src), 1)>;
1472 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1473           (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1474                            (v4i64 VR256X:$src), 1)>;
1475 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1476           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1477                            (v8i32 VR256X:$src), 1)>;
1478 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1479           (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1480                            (v16i16 VR256X:$src), 1)>;
1481 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1482           (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1483                            (v32i8 VR256X:$src), 1)>;
1484
1485 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1486           (VBROADCASTF32X4rm addr:$src)>;
1487 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1488           (VBROADCASTI32X4rm addr:$src)>;
1489 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1490           (VBROADCASTI32X4rm addr:$src)>;
1491 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1492           (VBROADCASTI32X4rm addr:$src)>;
1493 }
1494
1495 let Predicates = [HasVLX] in {
1496 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1497                            v8i32x_info, v4i32x_info>,
1498                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1499 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1500                            v8f32x_info, v4f32x_info>,
1501                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1502
1503 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1504           (VBROADCASTF32X4Z256rm addr:$src)>;
1505 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1506           (VBROADCASTI32X4Z256rm addr:$src)>;
1507 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1508           (VBROADCASTI32X4Z256rm addr:$src)>;
1509 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1510           (VBROADCASTI32X4Z256rm addr:$src)>;
1511
1512 // Provide fallback in case the load node that is used in the patterns above
1513 // is used by additional users, which prevents the pattern selection.
1514 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1515           (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1516                               (v2f64 VR128X:$src), 1)>;
1517 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1518           (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1519                               (v4f32 VR128X:$src), 1)>;
1520 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1521           (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1522                               (v2i64 VR128X:$src), 1)>;
1523 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1524           (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1525                               (v4i32 VR128X:$src), 1)>;
1526 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1527           (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1528                               (v8i16 VR128X:$src), 1)>;
1529 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1530           (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1531                               (v16i8 VR128X:$src), 1)>;
1532 }
1533
1534 let Predicates = [HasVLX, HasDQI] in {
1535 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1536                            v4i64x_info, v2i64x_info>, VEX_W,
1537                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1538 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1539                            v4f64x_info, v2f64x_info>, VEX_W,
1540                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1541 }
1542
1543 let Predicates = [HasDQI] in {
1544 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1545                        v8i64_info, v2i64x_info>, VEX_W,
1546                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1547 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1548                        v16i32_info, v8i32x_info>,
1549                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1550 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1551                        v8f64_info, v2f64x_info>, VEX_W,
1552                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1553 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1554                        v16f32_info, v8f32x_info>,
1555                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1556 }
1557
1558 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1559                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1560   let Predicates = [HasDQI] in
1561     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1562                                           WriteShuffle256Ld, _Dst.info512,
1563                                           _Src.info512, _Src.info128, null_frag>,
1564                                           EVEX_V512;
1565   let Predicates = [HasDQI, HasVLX] in
1566     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1567                                           WriteShuffle256Ld, _Dst.info256,
1568                                           _Src.info256, _Src.info128, null_frag>,
1569                                           EVEX_V256;
1570 }
1571
1572 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1573                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1574   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1575
1576   let Predicates = [HasDQI, HasVLX] in
1577     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1578                                           WriteShuffleLd, _Dst.info128,
1579                                           _Src.info128, _Src.info128, null_frag>,
1580                                           EVEX_V128;
1581 }
1582
1583 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1584                                           avx512vl_i32_info, avx512vl_i64_info>;
1585 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1586                                           avx512vl_f32_info, avx512vl_f64_info>;
1587
1588 let Predicates = [HasVLX] in {
1589 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1590           (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1591 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1592           (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1593 }
1594
1595 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1596           (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
1597 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1598           (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1599
1600 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1601           (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
1602 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1603           (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1604
1605 //===----------------------------------------------------------------------===//
1606 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1607 //---
1608 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1609                                   X86VectorVTInfo _, RegisterClass KRC> {
1610   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1611                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1612                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
1613                   IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
1614 }
1615
1616 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1617                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1618   let Predicates = [HasCDI] in
1619     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1620   let Predicates = [HasCDI, HasVLX] in {
1621     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1622     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1623   }
1624 }
1625
1626 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1627                                                avx512vl_i32_info, VK16>;
1628 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1629                                                avx512vl_i64_info, VK8>, VEX_W;
1630
1631 //===----------------------------------------------------------------------===//
1632 // -- VPERMI2 - 3 source operands form --
1633
1634 let Sched = WriteFShuffle256 in
1635 def AVX512_PERM2_F : OpndItins<
1636   IIC_SSE_SHUFP, IIC_SSE_SHUFP
1637 >;
1638
1639 let Sched = WriteShuffle256 in
1640 def AVX512_PERM2_I : OpndItins<
1641   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
1642 >;
1643
1644 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins,
1645                          X86VectorVTInfo _> {
1646 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1647   // The index operand in the pattern should really be an integer type. However,
1648   // if we do that and it happens to come from a bitcast, then it becomes
1649   // difficult to find the bitcast needed to convert the index to the
1650   // destination type for the passthru since it will be folded with the bitcast
1651   // of the index operand.
1652   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1653           (ins _.RC:$src2, _.RC:$src3),
1654           OpcodeStr, "$src3, $src2", "$src2, $src3",
1655           (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)),
1656           itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
1657
1658   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1659             (ins _.RC:$src2, _.MemOp:$src3),
1660             OpcodeStr, "$src3, $src2", "$src2, $src3",
1661             (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
1662                    (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>,
1663             EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
1664   }
1665 }
1666
1667 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
1668                             X86VectorVTInfo _> {
1669   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1670   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1671               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1672               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1673               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1674               (_.VT (X86VPermi2X _.RC:$src1,
1675                _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1676               itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1677               Sched<[itins.Sched.Folded, ReadAfterLd]>;
1678 }
1679
1680 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
1681                                AVX512VLVectorVTInfo VTInfo> {
1682   defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>,
1683             avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1684   let Predicates = [HasVLX] in {
1685   defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>,
1686                  avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1687   defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,
1688                  avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1689   }
1690 }
1691
1692 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1693                                   OpndItins itins,
1694                                   AVX512VLVectorVTInfo VTInfo,
1695                                   Predicate Prd> {
1696   let Predicates = [Prd] in
1697   defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1698   let Predicates = [Prd, HasVLX] in {
1699   defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1700   defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,  EVEX_V256;
1701   }
1702 }
1703
1704 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I,
1705                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1706 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I,
1707                   avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1708 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I,
1709                   avx512vl_i16_info, HasBWI>,
1710                   VEX_W, EVEX_CD8<16, CD8VF>;
1711 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I,
1712                   avx512vl_i8_info, HasVBMI>,
1713                   EVEX_CD8<8, CD8VF>;
1714 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F,
1715                   avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
1716 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F,
1717                   avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1718
1719 // VPERMT2
1720 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins,
1721                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1722 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1723   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1724           (ins IdxVT.RC:$src2, _.RC:$src3),
1725           OpcodeStr, "$src3, $src2", "$src2, $src3",
1726           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)),
1727           itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
1728
1729   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1730             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1731             OpcodeStr, "$src3, $src2", "$src2, $src3",
1732             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1733                    (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>,
1734             EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
1735   }
1736 }
1737 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
1738                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1739   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1740   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1741               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1742               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1743               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1744               (_.VT (X86VPermt2 _.RC:$src1,
1745                IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1746               itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1747               Sched<[itins.Sched.Folded, ReadAfterLd]>;
1748 }
1749
1750 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
1751                                AVX512VLVectorVTInfo VTInfo,
1752                                AVX512VLVectorVTInfo ShuffleMask> {
1753   defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
1754                               ShuffleMask.info512>,
1755             avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512,
1756                               ShuffleMask.info512>, EVEX_V512;
1757   let Predicates = [HasVLX] in {
1758   defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
1759                               ShuffleMask.info128>,
1760                  avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128,
1761                               ShuffleMask.info128>, EVEX_V128;
1762   defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
1763                               ShuffleMask.info256>,
1764                  avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256,
1765                               ShuffleMask.info256>, EVEX_V256;
1766   }
1767 }
1768
1769 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins,
1770                                  AVX512VLVectorVTInfo VTInfo,
1771                                  AVX512VLVectorVTInfo Idx,
1772                                  Predicate Prd> {
1773   let Predicates = [Prd] in
1774   defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
1775                            Idx.info512>, EVEX_V512;
1776   let Predicates = [Prd, HasVLX] in {
1777   defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
1778                                Idx.info128>, EVEX_V128;
1779   defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
1780                                Idx.info256>, EVEX_V256;
1781   }
1782 }
1783
1784 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I,
1785                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1786 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I,
1787                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1788 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I,
1789                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1790                   VEX_W, EVEX_CD8<16, CD8VF>;
1791 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I,
1792                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1793                   EVEX_CD8<8, CD8VF>;
1794 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F,
1795                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1796 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F,
1797                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1798
1799 //===----------------------------------------------------------------------===//
1800 // AVX-512 - BLEND using mask
1801 //
1802
1803 let Sched = WriteFVarBlend in
1804 def AVX512_BLENDM : OpndItins<
1805   IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
1806 >;
1807
1808 let Sched = WriteVarBlend in
1809 def AVX512_PBLENDM : OpndItins<
1810   IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
1811 >;
1812
1813 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins,
1814                             X86VectorVTInfo _> {
1815   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1816   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1817              (ins _.RC:$src1, _.RC:$src2),
1818              !strconcat(OpcodeStr,
1819              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1820              [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1821   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1822              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1823              !strconcat(OpcodeStr,
1824              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1825              [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
1826   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1827              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1828              !strconcat(OpcodeStr,
1829              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1830              [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>;
1831   let mayLoad = 1 in {
1832   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1833              (ins _.RC:$src1, _.MemOp:$src2),
1834              !strconcat(OpcodeStr,
1835              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1836              [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1837              Sched<[itins.Sched.Folded, ReadAfterLd]>;
1838   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1840              !strconcat(OpcodeStr,
1841              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1842              [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1843              Sched<[itins.Sched.Folded, ReadAfterLd]>;
1844   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1845              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1846              !strconcat(OpcodeStr,
1847              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1848              [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1849              Sched<[itins.Sched.Folded, ReadAfterLd]>;
1850   }
1851   }
1852 }
1853 multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins,
1854                                 X86VectorVTInfo _> {
1855   let mayLoad = 1, hasSideEffects = 0 in {
1856   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1857       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1858        !strconcat(OpcodeStr,
1859             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1860             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1861       [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1862       Sched<[itins.Sched.Folded, ReadAfterLd]>;
1863
1864   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1865       (ins _.RC:$src1, _.ScalarMemOp:$src2),
1866        !strconcat(OpcodeStr,
1867             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1868             "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1869       [], itins.rm>,  EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1870       Sched<[itins.Sched.Folded, ReadAfterLd]>;
1871   }
1872 }
1873
1874 multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins,
1875                                  AVX512VLVectorVTInfo VTInfo> {
1876   defm Z : avx512_blendmask      <opc, OpcodeStr, itins, VTInfo.info512>,
1877            avx512_blendmask_rmb  <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1878
1879   let Predicates = [HasVLX] in {
1880     defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>,
1881                 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1882     defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>,
1883                 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1884   }
1885 }
1886
1887 multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins,
1888                          AVX512VLVectorVTInfo VTInfo> {
1889   let Predicates = [HasBWI] in
1890     defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1891
1892   let Predicates = [HasBWI, HasVLX] in {
1893     defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1894     defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1895   }
1896 }
1897
1898
1899 defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>;
1900 defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W;
1901 defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>;
1902 defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W;
1903 defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>;
1904 defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W;
1905
1906
1907 //===----------------------------------------------------------------------===//
1908 // Compare Instructions
1909 //===----------------------------------------------------------------------===//
1910
1911 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1912
1913 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
1914                              OpndItins itins> {
1915   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1916                       (outs _.KRC:$dst),
1917                       (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1918                       "vcmp${cc}"#_.Suffix,
1919                       "$src2, $src1", "$src1, $src2",
1920                       (OpNode (_.VT _.RC:$src1),
1921                               (_.VT _.RC:$src2),
1922                               imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1923   let mayLoad = 1 in
1924   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1925                     (outs _.KRC:$dst),
1926                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
1927                     "vcmp${cc}"#_.Suffix,
1928                     "$src2, $src1", "$src1, $src2",
1929                     (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
1930                         imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1931                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
1932
1933   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1934                      (outs _.KRC:$dst),
1935                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1936                      "vcmp${cc}"#_.Suffix,
1937                      "{sae}, $src2, $src1", "$src1, $src2, {sae}",
1938                      (OpNodeRnd (_.VT _.RC:$src1),
1939                                 (_.VT _.RC:$src2),
1940                                 imm:$cc,
1941                                 (i32 FROUND_NO_EXC)), itins.rr>,
1942                      EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
1943   // Accept explicit immediate argument form instead of comparison code.
1944   let isAsmParserOnly = 1, hasSideEffects = 0 in {
1945     defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1946                         (outs VK1:$dst),
1947                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1948                         "vcmp"#_.Suffix,
1949                         "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V,
1950                         Sched<[itins.Sched]>;
1951   let mayLoad = 1 in
1952     defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1953                         (outs _.KRC:$dst),
1954                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1955                         "vcmp"#_.Suffix,
1956                         "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
1957                         EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1958                         Sched<[itins.Sched.Folded, ReadAfterLd]>;
1959
1960     defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1961                        (outs _.KRC:$dst),
1962                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1963                        "vcmp"#_.Suffix,
1964                        "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>,
1965                        EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
1966   }// let isAsmParserOnly = 1, hasSideEffects = 0
1967
1968   let isCodeGenOnly = 1 in {
1969     let isCommutable = 1 in
1970     def rr : AVX512Ii8<0xC2, MRMSrcReg,
1971                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
1972                 !strconcat("vcmp${cc}", _.Suffix,
1973                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1974                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1975                                           _.FRC:$src2,
1976                                           imm:$cc))],
1977                 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1978     def rm : AVX512Ii8<0xC2, MRMSrcMem,
1979               (outs _.KRC:$dst),
1980               (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1981               !strconcat("vcmp${cc}", _.Suffix,
1982                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1983               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1984                                         (_.ScalarLdFrag addr:$src2),
1985                                         imm:$cc))],
1986               itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1987               Sched<[itins.Sched.Folded, ReadAfterLd]>;
1988   }
1989 }
1990
1991 let Predicates = [HasAVX512] in {
1992   let ExeDomain = SSEPackedSingle in
1993   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
1994                                    SSE_ALU_F32S>, AVX512XSIi8Base;
1995   let ExeDomain = SSEPackedDouble in
1996   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
1997                                    SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
1998 }
1999
2000 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2001               OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
2002   let isCommutable = IsCommutable in
2003   def rr : AVX512BI<opc, MRMSrcReg,
2004              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2005              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2006              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
2007              itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2008   def rm : AVX512BI<opc, MRMSrcMem,
2009              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2010              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2011              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2012                                      (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
2013              itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2014   let isCommutable = IsCommutable in
2015   def rrk : AVX512BI<opc, MRMSrcReg,
2016               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2017               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2018                           "$dst {${mask}}, $src1, $src2}"),
2019               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2020                                    (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
2021               itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2022   def rmk : AVX512BI<opc, MRMSrcMem,
2023               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2024               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2025                           "$dst {${mask}}, $src1, $src2}"),
2026               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2027                                    (OpNode (_.VT _.RC:$src1),
2028                                        (_.VT (bitconvert
2029                                               (_.LdFrag addr:$src2))))))],
2030               itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2031 }
2032
2033 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
2034               OpndItins itins,  X86VectorVTInfo _, bit IsCommutable> :
2035            avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
2036   def rmb : AVX512BI<opc, MRMSrcMem,
2037               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2038               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2039                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2040               [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2041                               (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
2042               itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2043   def rmbk : AVX512BI<opc, MRMSrcMem,
2044                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2045                                        _.ScalarMemOp:$src2),
2046                !strconcat(OpcodeStr,
2047                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2048                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2049                [(set _.KRC:$dst, (and _.KRCWM:$mask,
2050                                       (OpNode (_.VT _.RC:$src1),
2051                                         (X86VBroadcast
2052                                           (_.ScalarLdFrag addr:$src2)))))],
2053                itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2054                Sched<[itins.Sched.Folded, ReadAfterLd]>;
2055 }
2056
2057 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
2058                                  OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2059                                  Predicate prd, bit IsCommutable = 0> {
2060   let Predicates = [prd] in
2061   defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
2062                               IsCommutable>, EVEX_V512;
2063
2064   let Predicates = [prd, HasVLX] in {
2065     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
2066                                    IsCommutable>, EVEX_V256;
2067     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
2068                                    IsCommutable>, EVEX_V128;
2069   }
2070 }
2071
2072 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2073                                      SDNode OpNode, OpndItins itins,
2074                                      AVX512VLVectorVTInfo VTInfo,
2075                                      Predicate prd, bit IsCommutable = 0> {
2076   let Predicates = [prd] in
2077   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
2078                                   IsCommutable>, EVEX_V512;
2079
2080   let Predicates = [prd, HasVLX] in {
2081     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
2082                                        IsCommutable>, EVEX_V256;
2083     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
2084                                        IsCommutable>, EVEX_V128;
2085   }
2086 }
2087
2088 // FIXME: Is there a better scheduler itinerary for VPCMP?
2089 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
2090                       SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
2091                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2092
2093 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
2094                       SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
2095                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2096
2097 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
2098                       SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
2099                 EVEX_CD8<32, CD8VF>;
2100
2101 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
2102                       SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
2103                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2104
2105 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2106                       SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
2107                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2108
2109 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2110                       SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
2111                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2112
2113 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2114                       SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
2115                 EVEX_CD8<32, CD8VF>;
2116
2117 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2118                       SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
2119                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2120
2121 // Transforms to swizzle an immediate to help matching memory operand in first
2122 // operand.
2123 def CommutePCMPCC : SDNodeXForm<imm, [{
2124   uint8_t Imm = N->getZExtValue() & 0x7;
2125   switch (Imm) {
2126   default: llvm_unreachable("Unreachable!");
2127   case 0x01: Imm = 0x06; break; // LT  -> NLE
2128   case 0x02: Imm = 0x05; break; // LE  -> NLT
2129   case 0x05: Imm = 0x02; break; // NLT -> LE
2130   case 0x06: Imm = 0x01; break; // NLE -> LT
2131   case 0x00: // EQ
2132   case 0x03: // FALSE
2133   case 0x04: // NE
2134   case 0x07: // TRUE
2135     break;
2136   }
2137   return getI8Imm(Imm, SDLoc(N));
2138 }]>;
2139
2140 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
2141                           OpndItins itins, X86VectorVTInfo _> {
2142   let isCommutable = 1 in
2143   def rri : AVX512AIi8<opc, MRMSrcReg,
2144              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2145              !strconcat("vpcmp${cc}", Suffix,
2146                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2147              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2148                                        imm:$cc))],
2149              itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2150   def rmi : AVX512AIi8<opc, MRMSrcMem,
2151              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2152              !strconcat("vpcmp${cc}", Suffix,
2153                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2154              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2155                               (_.VT (bitconvert (_.LdFrag addr:$src2))),
2156                               imm:$cc))],
2157              itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2158   let isCommutable = 1 in
2159   def rrik : AVX512AIi8<opc, MRMSrcReg,
2160               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2161                                       AVX512ICC:$cc),
2162               !strconcat("vpcmp${cc}", Suffix,
2163                          "\t{$src2, $src1, $dst {${mask}}|",
2164                          "$dst {${mask}}, $src1, $src2}"),
2165               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2166                                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2167                                           imm:$cc)))],
2168               itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2169   def rmik : AVX512AIi8<opc, MRMSrcMem,
2170               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2171                                     AVX512ICC:$cc),
2172               !strconcat("vpcmp${cc}", Suffix,
2173                          "\t{$src2, $src1, $dst {${mask}}|",
2174                          "$dst {${mask}}, $src1, $src2}"),
2175               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2176                                    (OpNode (_.VT _.RC:$src1),
2177                                       (_.VT (bitconvert (_.LdFrag addr:$src2))),
2178                                       imm:$cc)))],
2179               itins.rm>, EVEX_4V, EVEX_K,
2180               Sched<[itins.Sched.Folded, ReadAfterLd]>;
2181
2182   // Accept explicit immediate argument form instead of comparison code.
2183   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2184     def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2185                (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2186                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2187                           "$dst, $src1, $src2, $cc}"),
2188                [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2189     let mayLoad = 1 in
2190     def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2191                (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2192                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2193                           "$dst, $src1, $src2, $cc}"),
2194                [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2195     def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2196                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2197                                        u8imm:$cc),
2198                !strconcat("vpcmp", Suffix,
2199                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
2200                           "$dst {${mask}}, $src1, $src2, $cc}"),
2201                [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2202     let mayLoad = 1 in
2203     def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2204                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2205                                        u8imm:$cc),
2206                !strconcat("vpcmp", Suffix,
2207                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
2208                           "$dst {${mask}}, $src1, $src2, $cc}"),
2209                [], itins.rm>, EVEX_4V, EVEX_K,
2210                Sched<[itins.Sched.Folded, ReadAfterLd]>;
2211   }
2212
2213   def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
2214                     (_.VT _.RC:$src1), imm:$cc),
2215             (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2216                                                       (CommutePCMPCC imm:$cc))>;
2217
2218   def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
2219                                         (_.VT _.RC:$src1), imm:$cc)),
2220             (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2221                                                        _.RC:$src1, addr:$src2,
2222                                                        (CommutePCMPCC imm:$cc))>;
2223 }
2224
2225 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
2226                               OpndItins itins, X86VectorVTInfo _> :
2227            avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
2228   def rmib : AVX512AIi8<opc, MRMSrcMem,
2229              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2230                                      AVX512ICC:$cc),
2231              !strconcat("vpcmp${cc}", Suffix,
2232                         "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2233                         "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2234              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2235                                (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2236                                imm:$cc))],
2237              itins.rm>, EVEX_4V, EVEX_B,
2238              Sched<[itins.Sched.Folded, ReadAfterLd]>;
2239   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2240               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2241                                        _.ScalarMemOp:$src2, AVX512ICC:$cc),
2242               !strconcat("vpcmp${cc}", Suffix,
2243                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2244                        "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2245               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2246                                   (OpNode (_.VT _.RC:$src1),
2247                                     (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2248                                     imm:$cc)))],
2249               itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2250               Sched<[itins.Sched.Folded, ReadAfterLd]>;
2251
2252   // Accept explicit immediate argument form instead of comparison code.
2253   let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2254     def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2255                (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2256                                        u8imm:$cc),
2257                !strconcat("vpcmp", Suffix,
2258                    "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2259                    "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2260                [], itins.rm>, EVEX_4V, EVEX_B,
2261                Sched<[itins.Sched.Folded, ReadAfterLd]>;
2262     def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2263                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2264                                        _.ScalarMemOp:$src2, u8imm:$cc),
2265                !strconcat("vpcmp", Suffix,
2266                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2267                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2268                [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2269                Sched<[itins.Sched.Folded, ReadAfterLd]>;
2270   }
2271
2272   def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2273                     (_.VT _.RC:$src1), imm:$cc),
2274             (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
2275                                                        (CommutePCMPCC imm:$cc))>;
2276
2277   def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
2278                                          (_.ScalarLdFrag addr:$src2)),
2279                                         (_.VT _.RC:$src1), imm:$cc)),
2280             (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
2281                                                        _.RC:$src1, addr:$src2,
2282                                                        (CommutePCMPCC imm:$cc))>;
2283 }
2284
2285 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
2286                              OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2287                              Predicate prd> {
2288   let Predicates = [prd] in
2289   defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
2290                           EVEX_V512;
2291
2292   let Predicates = [prd, HasVLX] in {
2293     defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
2294                                EVEX_V256;
2295     defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
2296                                EVEX_V128;
2297   }
2298 }
2299
2300 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
2301                                  OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2302                                  Predicate prd> {
2303   let Predicates = [prd] in
2304   defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
2305            EVEX_V512;
2306
2307   let Predicates = [prd, HasVLX] in {
2308     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
2309                 EVEX_V256;
2310     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
2311                 EVEX_V128;
2312   }
2313 }
2314
2315 // FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
2316 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
2317                                 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2318 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
2319                                  avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2320
2321 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
2322                                 avx512vl_i16_info, HasBWI>,
2323                                 VEX_W, EVEX_CD8<16, CD8VF>;
2324 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
2325                                  avx512vl_i16_info, HasBWI>,
2326                                  VEX_W, EVEX_CD8<16, CD8VF>;
2327
2328 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
2329                                     avx512vl_i32_info, HasAVX512>,
2330                                     EVEX_CD8<32, CD8VF>;
2331 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
2332                                      avx512vl_i32_info, HasAVX512>,
2333                                      EVEX_CD8<32, CD8VF>;
2334
2335 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
2336                                     avx512vl_i64_info, HasAVX512>,
2337                                     VEX_W, EVEX_CD8<64, CD8VF>;
2338 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
2339                                      avx512vl_i64_info, HasAVX512>,
2340                                      VEX_W, EVEX_CD8<64, CD8VF>;
2341
2342
2343 multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
2344   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2345                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2346                    "vcmp${cc}"#_.Suffix,
2347                    "$src2, $src1", "$src1, $src2",
2348                    (X86cmpm (_.VT _.RC:$src1),
2349                          (_.VT _.RC:$src2),
2350                            imm:$cc), itins.rr, 1>,
2351                    Sched<[itins.Sched]>;
2352
2353   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2354                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2355                 "vcmp${cc}"#_.Suffix,
2356                 "$src2, $src1", "$src1, $src2",
2357                 (X86cmpm (_.VT _.RC:$src1),
2358                         (_.VT (bitconvert (_.LdFrag addr:$src2))),
2359                         imm:$cc), itins.rm>,
2360                 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2361
2362   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2363                 (outs _.KRC:$dst),
2364                 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2365                 "vcmp${cc}"#_.Suffix,
2366                 "${src2}"##_.BroadcastStr##", $src1",
2367                 "$src1, ${src2}"##_.BroadcastStr,
2368                 (X86cmpm (_.VT _.RC:$src1),
2369                         (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2370                         imm:$cc), itins.rm>,
2371                 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2372   // Accept explicit immediate argument form instead of comparison code.
2373   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2374     defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2375                          (outs _.KRC:$dst),
2376                          (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2377                          "vcmp"#_.Suffix,
2378                          "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
2379                          Sched<[itins.Sched]>;
2380
2381     let mayLoad = 1 in {
2382       defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2383                              (outs _.KRC:$dst),
2384                              (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2385                              "vcmp"#_.Suffix,
2386                              "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2387                              Sched<[itins.Sched.Folded, ReadAfterLd]>;
2388
2389       defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2390                          (outs _.KRC:$dst),
2391                          (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2392                          "vcmp"#_.Suffix,
2393                          "$cc, ${src2}"##_.BroadcastStr##", $src1",
2394                          "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
2395                          EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2396     }
2397   }
2398
2399   // Patterns for selecting with loads in other operand.
2400   def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2401                      CommutableCMPCC:$cc),
2402             (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2403                                                       imm:$cc)>;
2404
2405   def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2406                                          (_.VT _.RC:$src1),
2407                                          CommutableCMPCC:$cc)),
2408             (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2409                                                        _.RC:$src1, addr:$src2,
2410                                                        imm:$cc)>;
2411
2412   def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2413                      (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2414             (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2415                                                        imm:$cc)>;
2416
2417   def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2418                                           (_.ScalarLdFrag addr:$src2)),
2419                                          (_.VT _.RC:$src1),
2420                                          CommutableCMPCC:$cc)),
2421             (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2422                                                         _.RC:$src1, addr:$src2,
2423                                                         imm:$cc)>;
2424 }
2425
2426 multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
2427   // comparison code form (VCMP[EQ/LT/LE/...]
2428   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2429                      (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2430                      "vcmp${cc}"#_.Suffix,
2431                      "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2432                      (X86cmpmRnd (_.VT _.RC:$src1),
2433                                     (_.VT _.RC:$src2),
2434                                     imm:$cc,
2435                                 (i32 FROUND_NO_EXC)), itins.rr>,
2436                      EVEX_B, Sched<[itins.Sched]>;
2437
2438   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2439     defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2440                          (outs _.KRC:$dst),
2441                          (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2442                          "vcmp"#_.Suffix,
2443                          "$cc, {sae}, $src2, $src1",
2444                          "$src1, $src2, {sae}, $cc", itins.rr>,
2445                          EVEX_B, Sched<[itins.Sched]>;
2446    }
2447 }
2448
2449 multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
2450   let Predicates = [HasAVX512] in {
2451     defm Z    : avx512_vcmp_common<itins, _.info512>,
2452                 avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
2453
2454   }
2455   let Predicates = [HasAVX512,HasVLX] in {
2456    defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
2457    defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
2458   }
2459 }
2460
2461 defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
2462                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2463 defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
2464                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2465
2466
2467 // Patterns to select fp compares with load as first operand.
2468 let Predicates = [HasAVX512] in {
2469   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2470                             CommutableCMPCC:$cc)),
2471             (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2472
2473   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2474                             CommutableCMPCC:$cc)),
2475             (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2476 }
2477
2478 // ----------------------------------------------------------------
2479 // FPClass
2480 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2481 //                                    op(mem_scalar,imm)
2482 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2483                                  OpndItins itins,  X86VectorVTInfo _,
2484                                  Predicate prd> {
2485   let Predicates = [prd], ExeDomain = _.ExeDomain in {
2486       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2487                       (ins _.RC:$src1, i32u8imm:$src2),
2488                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2489                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2490                               (i32 imm:$src2)))], itins.rr>,
2491                       Sched<[itins.Sched]>;
2492       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2493                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2494                       OpcodeStr##_.Suffix#
2495                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2496                       [(set _.KRC:$dst,(or _.KRCWM:$mask,
2497                                       (OpNode (_.VT _.RC:$src1),
2498                                       (i32 imm:$src2))))], itins.rr>,
2499                       EVEX_K, Sched<[itins.Sched]>;
2500     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2501                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2502                     OpcodeStr##_.Suffix##
2503                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2504                     [(set _.KRC:$dst,
2505                           (OpNode _.ScalarIntMemCPat:$src1,
2506                                   (i32 imm:$src2)))], itins.rm>,
2507                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
2508     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2509                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2510                     OpcodeStr##_.Suffix##
2511                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2512                     [(set _.KRC:$dst,(or _.KRCWM:$mask,
2513                         (OpNode _.ScalarIntMemCPat:$src1,
2514                             (i32 imm:$src2))))], itins.rm>,
2515                     EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2516   }
2517 }
2518
2519 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2520 //                                  fpclass(reg_vec, mem_vec, imm)
2521 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2522 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2523                                  OpndItins itins, X86VectorVTInfo _,
2524                                  string mem, string broadcast>{
2525   let ExeDomain = _.ExeDomain in {
2526   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2527                       (ins _.RC:$src1, i32u8imm:$src2),
2528                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2529                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2530                                        (i32 imm:$src2)))], itins.rr>,
2531                       Sched<[itins.Sched]>;
2532   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2533                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2534                       OpcodeStr##_.Suffix#
2535                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2536                       [(set _.KRC:$dst,(or _.KRCWM:$mask,
2537                                        (OpNode (_.VT _.RC:$src1),
2538                                        (i32 imm:$src2))))], itins.rr>,
2539                       EVEX_K, Sched<[itins.Sched]>;
2540   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2541                     (ins _.MemOp:$src1, i32u8imm:$src2),
2542                     OpcodeStr##_.Suffix##mem#
2543                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2544                     [(set _.KRC:$dst,(OpNode
2545                                      (_.VT (bitconvert (_.LdFrag addr:$src1))),
2546                                      (i32 imm:$src2)))], itins.rm>,
2547                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
2548   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2549                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2550                     OpcodeStr##_.Suffix##mem#
2551                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2552                     [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
2553                                   (_.VT (bitconvert (_.LdFrag addr:$src1))),
2554                                   (i32 imm:$src2))))], itins.rm>,
2555                     EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2556   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2557                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2558                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2559                                       _.BroadcastStr##", $dst|$dst, ${src1}"
2560                                                   ##_.BroadcastStr##", $src2}",
2561                     [(set _.KRC:$dst,(OpNode
2562                                      (_.VT (X86VBroadcast
2563                                            (_.ScalarLdFrag addr:$src1))),
2564                                      (i32 imm:$src2)))], itins.rm>,
2565                     EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2566   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2567                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2568                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2569                           _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2570                                                    _.BroadcastStr##", $src2}",
2571                     [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
2572                                      (_.VT (X86VBroadcast
2573                                            (_.ScalarLdFrag addr:$src1))),
2574                                      (i32 imm:$src2))))], itins.rm>,
2575                     EVEX_B, EVEX_K,  Sched<[itins.Sched.Folded, ReadAfterLd]>;
2576   }
2577 }
2578
2579 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2580                                      bits<8> opc, SDNode OpNode,
2581                                      OpndItins itins, Predicate prd,
2582                                      string broadcast>{
2583   let Predicates = [prd] in {
2584     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2585                                       _.info512, "{z}", broadcast>, EVEX_V512;
2586   }
2587   let Predicates = [prd, HasVLX] in {
2588     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2589                                       _.info128, "{x}", broadcast>, EVEX_V128;
2590     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2591                                       _.info256, "{y}", broadcast>, EVEX_V256;
2592   }
2593 }
2594
2595 // FIXME: Is there a better scheduler itinerary for VFPCLASS?
2596 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2597              bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
2598   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2599                                       VecOpNode, SSE_ALU_F32P, prd, "{l}">,
2600                                       EVEX_CD8<32, CD8VF>;
2601   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2602                                       VecOpNode, SSE_ALU_F64P, prd, "{q}">,
2603                                       EVEX_CD8<64, CD8VF> , VEX_W;
2604   defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2605                                   SSE_ALU_F32S, f32x_info, prd>,
2606                                   EVEX_CD8<32, CD8VT1>;
2607   defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2608                                   SSE_ALU_F64S, f64x_info, prd>,
2609                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2610 }
2611
2612 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2613                                       X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
2614
2615 //-----------------------------------------------------------------
2616 // Mask register copy, including
2617 // - copy between mask registers
2618 // - load/store mask registers
2619 // - copy from GPR to mask register and vice versa
2620 //
2621 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2622                          string OpcodeStr, RegisterClass KRC,
2623                          ValueType vvt, X86MemOperand x86memop> {
2624   let hasSideEffects = 0, SchedRW = [WriteMove] in
2625   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2626              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2627              IIC_SSE_MOVDQ>;
2628   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2629              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2630              [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>;
2631   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2632              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2633              [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>;
2634 }
2635
2636 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2637                              string OpcodeStr,
2638                              RegisterClass KRC, RegisterClass GRC> {
2639   let hasSideEffects = 0 in {
2640     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2641                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2642                IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
2643     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2644                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2645                IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
2646   }
2647 }
2648
2649 let Predicates = [HasDQI] in
2650   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2651                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2652                VEX, PD;
2653
2654 let Predicates = [HasAVX512] in
2655   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2656                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2657                VEX, PS;
2658
2659 let Predicates = [HasBWI] in {
2660   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2661                VEX, PD, VEX_W;
2662   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2663                VEX, XD;
2664   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2665                VEX, PS, VEX_W;
2666   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2667                VEX, XD, VEX_W;
2668 }
2669
2670 // GR from/to mask register
2671 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2672           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2673 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2674           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2675
2676 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2677           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2678 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2679           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2680
2681 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2682           (KMOVWrk VK16:$src)>;
2683 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2684           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2685
2686 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2687           (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
2688 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2689           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2690 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2691           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2692
2693 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2694           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2695 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2696           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2697 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2698           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2699 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2700           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2701
2702 // Load/store kreg
2703 let Predicates = [HasDQI] in {
2704   def : Pat<(store VK4:$src, addr:$dst),
2705             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2706   def : Pat<(store VK2:$src, addr:$dst),
2707             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
2708   def : Pat<(store VK1:$src, addr:$dst),
2709             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2710
2711   def : Pat<(v2i1 (load addr:$src)),
2712             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2713   def : Pat<(v4i1 (load addr:$src)),
2714             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2715 }
2716 let Predicates = [HasAVX512, NoDQI] in {
2717   def : Pat<(store VK1:$src, addr:$dst),
2718             (MOV8mr addr:$dst,
2719              (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
2720               sub_8bit)))>;
2721   def : Pat<(store VK2:$src, addr:$dst),
2722             (MOV8mr addr:$dst,
2723              (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
2724               sub_8bit)))>;
2725   def : Pat<(store VK4:$src, addr:$dst),
2726             (MOV8mr addr:$dst,
2727              (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
2728               sub_8bit)))>;
2729   def : Pat<(store VK8:$src, addr:$dst),
2730             (MOV8mr addr:$dst,
2731              (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
2732               sub_8bit)))>;
2733
2734   def : Pat<(v8i1 (load addr:$src)),
2735             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2736   def : Pat<(v2i1 (load addr:$src)),
2737             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
2738   def : Pat<(v4i1 (load addr:$src)),
2739             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
2740 }
2741
2742 let Predicates = [HasAVX512] in {
2743   def : Pat<(v1i1 (load addr:$src)),
2744             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>;
2745   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2746             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2747 }
2748
2749 let Predicates = [HasAVX512] in {
2750   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2751     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2752               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2753
2754     def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
2755               (COPY_TO_REGCLASS maskRC:$src, GR32)>;
2756
2757     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2758               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2759   }
2760
2761   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2762   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2763   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2764   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2765   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2766   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2767   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2768
2769   def : Pat<(X86kshiftr  (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2770           (COPY_TO_REGCLASS
2771                 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2772                                             GR8:$src, sub_8bit), (i32 1))), VK1)>;
2773   def : Pat<(X86kshiftr  (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2774             (COPY_TO_REGCLASS
2775                 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2776                                             GR8:$src, sub_8bit), (i32 1))), VK16)>;
2777   def : Pat<(X86kshiftr  (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2778          (COPY_TO_REGCLASS
2779           (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2780                                             GR8:$src, sub_8bit), (i32 1))), VK8)>;
2781
2782 }
2783
2784 // Mask unary operation
2785 // - KNOT
2786 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2787                             RegisterClass KRC, SDPatternOperator OpNode,
2788                             OpndItins itins, Predicate prd> {
2789   let Predicates = [prd] in
2790     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2791                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2792                [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
2793                Sched<[itins.Sched]>;
2794 }
2795
2796 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2797                                 SDPatternOperator OpNode, OpndItins itins> {
2798   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2799                             itins, HasDQI>, VEX, PD;
2800   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2801                             itins, HasAVX512>, VEX, PS;
2802   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2803                             itins, HasBWI>, VEX, PD, VEX_W;
2804   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2805                             itins, HasBWI>, VEX, PS, VEX_W;
2806 }
2807
2808 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
2809
2810 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2811 let Predicates = [HasAVX512, NoDQI] in
2812 def : Pat<(vnot VK8:$src),
2813           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2814
2815 def : Pat<(vnot VK4:$src),
2816           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2817 def : Pat<(vnot VK2:$src),
2818           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2819
2820 // Mask binary operation
2821 // - KAND, KANDN, KOR, KXNOR, KXOR
2822 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2823                            RegisterClass KRC, SDPatternOperator OpNode,
2824                            OpndItins itins, Predicate prd, bit IsCommutable> {
2825   let Predicates = [prd], isCommutable = IsCommutable in
2826     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2827                !strconcat(OpcodeStr,
2828                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2829                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2830                Sched<[itins.Sched]>;
2831 }
2832
2833 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2834                                SDPatternOperator OpNode, OpndItins itins,
2835                                bit IsCommutable, Predicate prdW = HasAVX512> {
2836   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2837                              itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2838   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2839                              itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2840   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2841                              itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2842   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2843                              itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2844 }
2845
2846 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2847 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2848 // These nodes use 'vnot' instead of 'not' to support vectors.
2849 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2850 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2851
2852 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,   SSE_BIT_ITINS_P, 1>;
2853 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,    SSE_BIT_ITINS_P, 1>;
2854 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
2855 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,   SSE_BIT_ITINS_P, 1>;
2856 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
2857 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  add,   SSE_BIT_ITINS_P, 1, HasDQI>;
2858
2859 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2860                             Instruction Inst> {
2861   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2862   // for the DQI set, this type is legal and KxxxB instruction is used
2863   let Predicates = [NoDQI] in
2864   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2865             (COPY_TO_REGCLASS
2866               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2867                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2868
2869   // All types smaller than 8 bits require conversion anyway
2870   def : Pat<(OpNode VK1:$src1, VK1:$src2),
2871         (COPY_TO_REGCLASS (Inst
2872                            (COPY_TO_REGCLASS VK1:$src1, VK16),
2873                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2874   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2875         (COPY_TO_REGCLASS (Inst
2876                            (COPY_TO_REGCLASS VK2:$src1, VK16),
2877                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
2878   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2879         (COPY_TO_REGCLASS (Inst
2880                            (COPY_TO_REGCLASS VK4:$src1, VK16),
2881                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
2882 }
2883
2884 defm : avx512_binop_pat<and,   and,  KANDWrr>;
2885 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2886 defm : avx512_binop_pat<or,    or,   KORWrr>;
2887 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2888 defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
2889
2890 // Mask unpacking
2891 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
2892                              RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
2893   let Predicates = [prd] in {
2894     let hasSideEffects = 0 in
2895     def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2896                (ins KRC:$src1, KRC:$src2),
2897                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2898                itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
2899
2900     def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2901               (!cast<Instruction>(NAME##rr)
2902                         (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2903                         (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2904   }
2905 }
2906
2907 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
2908 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
2909 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
2910
2911 // Mask bit testing
2912 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2913                               SDNode OpNode, OpndItins itins, Predicate prd> {
2914   let Predicates = [prd], Defs = [EFLAGS] in
2915     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2916                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2917                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2918                Sched<[itins.Sched]>;
2919 }
2920
2921 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2922                                 OpndItins itins, Predicate prdW = HasAVX512> {
2923   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
2924                                                                 VEX, PD;
2925   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
2926                                                                 VEX, PS;
2927   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
2928                                                                 VEX, PS, VEX_W;
2929   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
2930                                                                 VEX, PD, VEX_W;
2931 }
2932
2933 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
2934 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
2935
2936 // Mask shift
2937 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2938                              SDNode OpNode, OpndItins itins> {
2939   let Predicates = [HasAVX512] in
2940     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2941                  !strconcat(OpcodeStr,
2942                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2943                             [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
2944                  itins.rr>, Sched<[itins.Sched]>;
2945 }
2946
2947 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2948                                SDNode OpNode, OpndItins itins> {
2949   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2950                                itins>, VEX, TAPD, VEX_W;
2951   let Predicates = [HasDQI] in
2952   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2953                                itins>, VEX, TAPD;
2954   let Predicates = [HasBWI] in {
2955   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2956                                itins>, VEX, TAPD, VEX_W;
2957   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2958                                itins>, VEX, TAPD;
2959   }
2960 }
2961
2962 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
2963 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
2964
2965 multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
2966 def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
2967             (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
2968             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2969             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
2970
2971 def : Pat<(v8i1 (and VK8:$mask,
2972                      (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
2973           (COPY_TO_REGCLASS
2974            (!cast<Instruction>(InstStr##Zrrk)
2975             (COPY_TO_REGCLASS VK8:$mask, VK16),
2976             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2977             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
2978            VK8)>;
2979 }
2980
2981 multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
2982                                                 AVX512VLVectorVTInfo _> {
2983 def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
2984             (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
2985             (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2986             (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
2987             imm:$cc), VK8)>;
2988
2989 def : Pat<(v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1),
2990                                         (_.info256.VT VR256X:$src2), imm:$cc))),
2991             (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
2992             (COPY_TO_REGCLASS VK8:$mask, VK16),
2993             (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2994             (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
2995             imm:$cc), VK8)>;
2996 }
2997
2998 let Predicates = [HasAVX512, NoVLX] in {
2999   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
3000   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
3001
3002   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
3003   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
3004   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
3005 }
3006
3007 // Mask setting all 0s or 1s
3008 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3009   let Predicates = [HasAVX512] in
3010     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3011         SchedRW = [WriteZero] in
3012       def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3013                      [(set KRC:$dst, (VT Val))]>;
3014 }
3015
3016 multiclass avx512_mask_setop_w<PatFrag Val> {
3017   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3018   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3019   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3020 }
3021
3022 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3023 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3024
3025 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3026 let Predicates = [HasAVX512] in {
3027   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3028   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3029   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3030   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3031   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3032   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3033   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3034   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3035 }
3036
3037 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3038 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3039                                              RegisterClass RC, ValueType VT> {
3040   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3041             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3042
3043   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3044             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3045 }
3046 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3047 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3048 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3049 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3050 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3051 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3052
3053 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3054 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3055 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3056 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3057 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3058
3059 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3060 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3061 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3062 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3063
3064 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3065 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3066 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3067
3068 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3069 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3070
3071 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3072
3073 //===----------------------------------------------------------------------===//
3074 // AVX-512 - Aligned and unaligned load and store
3075 //
3076
3077
3078 multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3079                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3080                        bit NoRMPattern = 0,
3081                        SDPatternOperator SelectOprr = vselect> {
3082   let hasSideEffects = 0 in {
3083   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3084                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3085                     _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>;
3086   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3087                       (ins _.KRCWM:$mask,  _.RC:$src),
3088                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3089                        "${dst} {${mask}} {z}, $src}"),
3090                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3091                                            (_.VT _.RC:$src),
3092                                            _.ImmAllZerosV)))], _.ExeDomain,
3093                        itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>;
3094
3095   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3096   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3097                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3098                     !if(NoRMPattern, [],
3099                         [(set _.RC:$dst,
3100                           (_.VT (bitconvert (ld_frag addr:$src))))]),
3101                     _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>;
3102
3103   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3104     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3105                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3106                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3107                       "${dst} {${mask}}, $src1}"),
3108                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3109                                           (_.VT _.RC:$src1),
3110                                           (_.VT _.RC:$src0))))], _.ExeDomain,
3111                        itins.rr>, EVEX, EVEX_K, Sched<[WriteMove]>;
3112     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3113                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3114                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3115                       "${dst} {${mask}}, $src1}"),
3116                      [(set _.RC:$dst, (_.VT
3117                          (vselect _.KRCWM:$mask,
3118                           (_.VT (bitconvert (ld_frag addr:$src1))),
3119                            (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>,
3120                      EVEX, EVEX_K, Sched<[WriteLoad]>;
3121   }
3122   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3123                   (ins _.KRCWM:$mask, _.MemOp:$src),
3124                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3125                                 "${dst} {${mask}} {z}, $src}",
3126                   [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3127                     (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3128                   _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
3129   }
3130   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3131             (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3132
3133   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3134             (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3135
3136   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3137             (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3138              _.KRCWM:$mask, addr:$ptr)>;
3139 }
3140
3141 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3142                                   AVX512VLVectorVTInfo _,
3143                                   Predicate prd> {
3144   let Predicates = [prd] in
3145   defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
3146                        _.info512.AlignedLdFrag, masked_load_aligned512>,
3147                        EVEX_V512;
3148
3149   let Predicates = [prd, HasVLX] in {
3150   defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
3151                           _.info256.AlignedLdFrag, masked_load_aligned256>,
3152                           EVEX_V256;
3153   defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
3154                           _.info128.AlignedLdFrag, masked_load_aligned128>,
3155                           EVEX_V128;
3156   }
3157 }
3158
3159 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3160                                   AVX512VLVectorVTInfo _,
3161                                   Predicate prd,
3162                                   bit NoRMPattern = 0,
3163                                   SDPatternOperator SelectOprr = vselect> {
3164   let Predicates = [prd] in
3165   defm Z : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info512, _.info512.LdFrag,
3166                        masked_load_unaligned, NoRMPattern,
3167                        SelectOprr>, EVEX_V512;
3168
3169   let Predicates = [prd, HasVLX] in {
3170   defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info256, _.info256.LdFrag,
3171                          masked_load_unaligned, NoRMPattern,
3172                          SelectOprr>, EVEX_V256;
3173   defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info128, _.info128.LdFrag,
3174                          masked_load_unaligned, NoRMPattern,
3175                          SelectOprr>, EVEX_V128;
3176   }
3177 }
3178
3179 multiclass avx512_store<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3180                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3181                         string Name, bit NoMRPattern = 0> {
3182   let hasSideEffects = 0 in {
3183   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3184                          OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
3185                          [], _.ExeDomain, itins.rr>, EVEX, FoldGenData<Name#rr>,
3186                          Sched<[WriteMove]>;
3187   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3188                          (ins _.KRCWM:$mask, _.RC:$src),
3189                          OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3190                          "${dst} {${mask}}, $src}",
3191                          [], _.ExeDomain, itins.rr>,  EVEX, EVEX_K,
3192                          FoldGenData<Name#rrk>, Sched<[WriteMove]>;
3193   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3194                           (ins _.KRCWM:$mask, _.RC:$src),
3195                           OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
3196                           "${dst} {${mask}} {z}, $src}",
3197                           [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ,
3198                           FoldGenData<Name#rrkz>, Sched<[WriteMove]>;
3199   }
3200
3201   let hasSideEffects = 0, mayStore = 1 in
3202   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3203                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3204                     !if(NoMRPattern, [],
3205                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3206                     _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>;
3207   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3208                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3209               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3210                [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>;
3211
3212   def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3213            (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3214                                                     _.KRCWM:$mask, _.RC:$src)>;
3215 }
3216
3217
3218 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3219                             AVX512VLVectorVTInfo _, Predicate prd,
3220                             string Name, bit NoMRPattern = 0> {
3221   let Predicates = [prd] in
3222   defm Z : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info512, store,
3223                         masked_store_unaligned, Name#Z, NoMRPattern>, EVEX_V512;
3224
3225   let Predicates = [prd, HasVLX] in {
3226     defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info256, store,
3227                              masked_store_unaligned, Name#Z256,
3228                              NoMRPattern>, EVEX_V256;
3229     defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info128, store,
3230                              masked_store_unaligned, Name#Z128,
3231                              NoMRPattern>, EVEX_V128;
3232   }
3233 }
3234
3235 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3236                                   AVX512VLVectorVTInfo _,  Predicate prd,
3237                                   string Name> {
3238   let Predicates = [prd] in
3239   defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore,
3240                         masked_store_aligned512, Name#Z>, EVEX_V512;
3241
3242   let Predicates = [prd, HasVLX] in {
3243     defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore,
3244                              masked_store_aligned256, Name#Z256>, EVEX_V256;
3245     defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore,
3246                              masked_store_aligned128, Name#Z128>, EVEX_V128;
3247   }
3248 }
3249
3250 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3251                                      HasAVX512>,
3252                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3253                                       HasAVX512, "VMOVAPS">,
3254                PS, EVEX_CD8<32, CD8VF>;
3255
3256 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3257                                      HasAVX512>,
3258                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3259                                      HasAVX512, "VMOVAPD">,
3260                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3261
3262 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3263                               0, null_frag>,
3264                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3265                                "VMOVUPS">,
3266                               PS, EVEX_CD8<32, CD8VF>;
3267
3268 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3269                               0, null_frag>,
3270                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3271                                "VMOVUPD">,
3272                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3273
3274 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3275                                        HasAVX512>,
3276                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3277                                        HasAVX512, "VMOVDQA32">,
3278                  PD, EVEX_CD8<32, CD8VF>;
3279
3280 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3281                                        HasAVX512>,
3282                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3283                                     HasAVX512, "VMOVDQA64">,
3284                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
3285
3286 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
3287                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
3288                                  HasBWI, "VMOVDQU8", 1>,
3289                 XD, EVEX_CD8<8, CD8VF>;
3290
3291 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
3292                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
3293                                  HasBWI, "VMOVDQU16", 1>,
3294                  XD, VEX_W, EVEX_CD8<16, CD8VF>;
3295
3296 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3297                                 0, null_frag>,
3298                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
3299                                  HasAVX512, "VMOVDQU32">,
3300                  XS, EVEX_CD8<32, CD8VF>;
3301
3302 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3303                                 0, null_frag>,
3304                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
3305                                  HasAVX512, "VMOVDQU64">,
3306                  XS, VEX_W, EVEX_CD8<64, CD8VF>;
3307
3308 // Special instructions to help with spilling when we don't have VLX. We need
3309 // to load or store from a ZMM register instead. These are converted in
3310 // expandPostRAPseudos.
3311 let isReMaterializable = 1, canFoldAsLoad = 1,
3312     isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3313 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3314                             "", [], IIC_SSE_MOVA_P_RM>;
3315 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3316                             "", [], IIC_SSE_MOVA_P_RM>;
3317 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3318                             "", [], IIC_SSE_MOVA_P_RM>;
3319 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3320                             "", [], IIC_SSE_MOVA_P_RM>;
3321 }
3322
3323 let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in {
3324 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3325                             "", [], IIC_SSE_MOVA_P_MR>;
3326 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3327                             "", [], IIC_SSE_MOVA_P_MR>;
3328 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3329                             "", [], IIC_SSE_MOVA_P_MR>;
3330 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3331                             "", [], IIC_SSE_MOVA_P_MR>;
3332 }
3333
3334 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3335                           (v8i64 VR512:$src))),
3336    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3337                                               VK8), VR512:$src)>;
3338
3339 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3340                            (v16i32 VR512:$src))),
3341                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3342
3343 // These patterns exist to prevent the above patterns from introducing a second
3344 // mask inversion when one already exists.
3345 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3346                           (bc_v8i64 (v16i32 immAllZerosV)),
3347                           (v8i64 VR512:$src))),
3348                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3349 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3350                            (v16i32 immAllZerosV),
3351                            (v16i32 VR512:$src))),
3352                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3353
3354 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3355                               X86VectorVTInfo Wide> {
3356  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3357                                Narrow.RC:$src1, Narrow.RC:$src0)),
3358            (EXTRACT_SUBREG
3359             (Wide.VT
3360              (!cast<Instruction>(InstrStr#"rrk")
3361               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3362               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3363               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3364             Narrow.SubRegIdx)>;
3365
3366  def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3367                                Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3368            (EXTRACT_SUBREG
3369             (Wide.VT
3370              (!cast<Instruction>(InstrStr#"rrkz")
3371               (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3372               (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3373             Narrow.SubRegIdx)>;
3374 }
3375
3376 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3377 // available. Use a 512-bit operation and extract.
3378 let Predicates = [HasAVX512, NoVLX] in {
3379   defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3380   defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3381 }
3382
3383 let Predicates = [HasAVX512] in {
3384   // 512-bit store.
3385   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3386             (VMOVDQA32Zmr addr:$dst, VR512:$src)>;
3387   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3388             (VMOVDQA32Zmr addr:$dst, VR512:$src)>;
3389   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3390             (VMOVDQU32Zmr addr:$dst, VR512:$src)>;
3391   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3392             (VMOVDQU32Zmr addr:$dst, VR512:$src)>;
3393 }
3394
3395 let Predicates = [HasVLX] in {
3396   // 128-bit store.
3397   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3398             (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3399   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3400             (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3401   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3402             (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3403   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3404             (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3405
3406   // 256-bit store.
3407   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3408             (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3409   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3410             (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3411   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3412             (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3413   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3414             (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3415 }
3416
3417 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3418                                    X86VectorVTInfo To, X86VectorVTInfo Cast> {
3419   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3420                               (bitconvert
3421                                (To.VT (extract_subvector
3422                                        (From.VT From.RC:$src), (iPTR 0)))),
3423                               To.RC:$src0)),
3424             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3425                       Cast.RC:$src0, Cast.KRCWM:$mask,
3426                       (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3427
3428   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3429                               (bitconvert
3430                                (To.VT (extract_subvector
3431                                        (From.VT From.RC:$src), (iPTR 0)))),
3432                               Cast.ImmAllZerosV)),
3433             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3434                       Cast.KRCWM:$mask,
3435                       (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3436 }
3437
3438
3439 let Predicates = [HasVLX] in {
3440 // A masked extract from the first 128-bits of a 256-bit vector can be
3441 // implemented with masked move.
3442 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info,  v2i64x_info, v2i64x_info>;
3443 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info,  v4i32x_info, v2i64x_info>;
3444 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3445 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info,  v16i8x_info, v2i64x_info>;
3446 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info,  v2i64x_info, v4i32x_info>;
3447 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info,  v4i32x_info, v4i32x_info>;
3448 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3449 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info,  v16i8x_info, v4i32x_info>;
3450 defm : masked_move_for_extract<"VMOVAPDZ128",   v4f64x_info,  v2f64x_info, v2f64x_info>;
3451 defm : masked_move_for_extract<"VMOVAPDZ128",   v8f32x_info,  v4f32x_info, v2f64x_info>;
3452 defm : masked_move_for_extract<"VMOVAPSZ128",   v4f64x_info,  v2f64x_info, v4f32x_info>;
3453 defm : masked_move_for_extract<"VMOVAPSZ128",   v8f32x_info,  v4f32x_info, v4f32x_info>;
3454
3455 // A masked extract from the first 128-bits of a 512-bit vector can be
3456 // implemented with masked move.
3457 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info,  v2i64x_info, v2i64x_info>;
3458 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3459 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3460 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info,  v16i8x_info, v2i64x_info>;
3461 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info,  v2i64x_info, v4i32x_info>;
3462 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3463 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3464 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info,  v16i8x_info, v4i32x_info>;
3465 defm : masked_move_for_extract<"VMOVAPDZ128",   v8f64_info,  v2f64x_info, v2f64x_info>;
3466 defm : masked_move_for_extract<"VMOVAPDZ128",   v16f32_info, v4f32x_info, v2f64x_info>;
3467 defm : masked_move_for_extract<"VMOVAPSZ128",   v8f64_info,  v2f64x_info, v4f32x_info>;
3468 defm : masked_move_for_extract<"VMOVAPSZ128",   v16f32_info, v4f32x_info, v4f32x_info>;
3469
3470 // A masked extract from the first 256-bits of a 512-bit vector can be
3471 // implemented with masked move.
3472 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info,  v4i64x_info,  v4i64x_info>;
3473 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info,  v4i64x_info>;
3474 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3475 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info,  v32i8x_info,  v4i64x_info>;
3476 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info,  v4i64x_info,  v8i32x_info>;
3477 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info,  v8i32x_info>;
3478 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3479 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info,  v32i8x_info,  v8i32x_info>;
3480 defm : masked_move_for_extract<"VMOVAPDZ256",   v8f64_info,  v4f64x_info,  v4f64x_info>;
3481 defm : masked_move_for_extract<"VMOVAPDZ256",   v16f32_info, v8f32x_info,  v4f64x_info>;
3482 defm : masked_move_for_extract<"VMOVAPSZ256",   v8f64_info,  v4f64x_info,  v8f32x_info>;
3483 defm : masked_move_for_extract<"VMOVAPSZ256",   v16f32_info, v8f32x_info,  v8f32x_info>;
3484 }
3485
3486 // Move Int Doubleword to Packed Double Int
3487 //
3488 let ExeDomain = SSEPackedInt in {
3489 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3490                       "vmovd\t{$src, $dst|$dst, $src}",
3491                       [(set VR128X:$dst,
3492                         (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
3493                         EVEX, Sched<[WriteMove]>;
3494 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3495                       "vmovd\t{$src, $dst|$dst, $src}",
3496                       [(set VR128X:$dst,
3497                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
3498                       IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
3499 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3500                       "vmovq\t{$src, $dst|$dst, $src}",
3501                         [(set VR128X:$dst,
3502                           (v2i64 (scalar_to_vector GR64:$src)))],
3503                           IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3504 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3505 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3506                       (ins i64mem:$src),
3507                       "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3508                       EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>;
3509 let isCodeGenOnly = 1 in {
3510 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3511                        "vmovq\t{$src, $dst|$dst, $src}",
3512                        [(set FR64X:$dst, (bitconvert GR64:$src))],
3513                        IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3514 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3515                       "vmovq\t{$src, $dst|$dst, $src}",
3516                       [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3517                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
3518 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3519                          "vmovq\t{$src, $dst|$dst, $src}",
3520                          [(set GR64:$dst, (bitconvert FR64X:$src))],
3521                          IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3522 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3523                          "vmovq\t{$src, $dst|$dst, $src}",
3524                          [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
3525                          IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3526                          EVEX_CD8<64, CD8VT1>;
3527 }
3528 } // ExeDomain = SSEPackedInt
3529
3530 // Move Int Doubleword to Single Scalar
3531 //
3532 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3533 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3534                       "vmovd\t{$src, $dst|$dst, $src}",
3535                       [(set FR32X:$dst, (bitconvert GR32:$src))],
3536                       IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>;
3537
3538 def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3539                       "vmovd\t{$src, $dst|$dst, $src}",
3540                       [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
3541                       IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
3542 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3543
3544 // Move doubleword from xmm register to r/m32
3545 //
3546 let ExeDomain = SSEPackedInt in {
3547 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3548                        "vmovd\t{$src, $dst|$dst, $src}",
3549                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3550                                         (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
3551                        EVEX, Sched<[WriteMove]>;
3552 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3553                        (ins i32mem:$dst, VR128X:$src),
3554                        "vmovd\t{$src, $dst|$dst, $src}",
3555                        [(store (i32 (extractelt (v4i32 VR128X:$src),
3556                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
3557                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
3558 } // ExeDomain = SSEPackedInt
3559
3560 // Move quadword from xmm1 register to r/m64
3561 //
3562 let ExeDomain = SSEPackedInt in {
3563 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3564                       "vmovq\t{$src, $dst|$dst, $src}",
3565                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3566                                                    (iPTR 0)))],
3567                       IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>,
3568                       Requires<[HasAVX512, In64BitMode]>;
3569
3570 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3571 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3572                       "vmovq\t{$src, $dst|$dst, $src}",
3573                       [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>,
3574                       Requires<[HasAVX512, In64BitMode]>;
3575
3576 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3577                       (ins i64mem:$dst, VR128X:$src),
3578                       "vmovq\t{$src, $dst|$dst, $src}",
3579                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3580                               addr:$dst)], IIC_SSE_MOVDQ>,
3581                       EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3582                       Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3583
3584 let hasSideEffects = 0 in
3585 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3586                              (ins VR128X:$src),
3587                              "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3588                              EVEX, VEX_W, Sched<[WriteMove]>;
3589 } // ExeDomain = SSEPackedInt
3590
3591 // Move Scalar Single to Double Int
3592 //
3593 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3594 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3595                       (ins FR32X:$src),
3596                       "vmovd\t{$src, $dst|$dst, $src}",
3597                       [(set GR32:$dst, (bitconvert FR32X:$src))],
3598                       IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>;
3599 def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3600                       (ins i32mem:$dst, FR32X:$src),
3601                       "vmovd\t{$src, $dst|$dst, $src}",
3602                       [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
3603                       IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
3604 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3605
3606 // Move Quadword Int to Packed Quadword Int
3607 //
3608 let ExeDomain = SSEPackedInt in {
3609 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3610                       (ins i64mem:$src),
3611                       "vmovq\t{$src, $dst|$dst, $src}",
3612                       [(set VR128X:$dst,
3613                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3614                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
3615 } // ExeDomain = SSEPackedInt
3616
3617 //===----------------------------------------------------------------------===//
3618 // AVX-512  MOVSS, MOVSD
3619 //===----------------------------------------------------------------------===//
3620
3621 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3622                               X86VectorVTInfo _> {
3623   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3624              (ins _.RC:$src1, _.RC:$src2),
3625              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3626              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3627              _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>;
3628   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3629               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3630               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3631               "$dst {${mask}} {z}, $src1, $src2}"),
3632               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3633                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3634                                       _.ImmAllZerosV)))],
3635               _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>;
3636   let Constraints = "$src0 = $dst"  in
3637   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3638              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3639              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3640              "$dst {${mask}}, $src1, $src2}"),
3641              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3642                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3643                                      (_.VT _.RC:$src0))))],
3644              _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>;
3645   let canFoldAsLoad = 1, isReMaterializable = 1 in
3646   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3647              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3648              [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3649              _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>;
3650   let mayLoad = 1, hasSideEffects = 0 in {
3651     let Constraints = "$src0 = $dst" in
3652     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3653                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3654                !strconcat(asm, "\t{$src, $dst {${mask}}|",
3655                "$dst {${mask}}, $src}"),
3656                [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>;
3657     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3658                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3659                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3660                "$dst {${mask}} {z}, $src}"),
3661                [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
3662   }
3663   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3664              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3665              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain, IIC_SSE_MOV_S_MR>,
3666              EVEX, Sched<[WriteStore]>;
3667   let mayStore = 1, hasSideEffects = 0 in
3668   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3669               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3670               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3671               [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>;
3672 }
3673
3674 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3675                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3676
3677 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3678                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3679
3680
3681 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3682                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
3683
3684 def : Pat<(_.VT (OpNode _.RC:$src0,
3685                         (_.VT (scalar_to_vector
3686                                   (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3687                                                        (_.EltVT _.FRC:$src1),
3688                                                        (_.EltVT _.FRC:$src2))))))),
3689           (!cast<Instruction>(InstrStr#rrk)
3690                         (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
3691                         (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3692                         (_.VT _.RC:$src0),
3693                         (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
3694
3695 def : Pat<(_.VT (OpNode _.RC:$src0,
3696                         (_.VT (scalar_to_vector
3697                                   (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3698                                                        (_.EltVT _.FRC:$src1),
3699                                                        (_.EltVT ZeroFP))))))),
3700           (!cast<Instruction>(InstrStr#rrkz)
3701                         (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3702                         (_.VT _.RC:$src0),
3703                         (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
3704 }
3705
3706 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3707                                         dag Mask, RegisterClass MaskRC> {
3708
3709 def : Pat<(masked_store addr:$dst, Mask,
3710              (_.info512.VT (insert_subvector undef,
3711                                (_.info256.VT (insert_subvector undef,
3712                                                  (_.info128.VT _.info128.RC:$src),
3713                                                  (iPTR 0))),
3714                                (iPTR 0)))),
3715           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3716                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3717                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3718
3719 }
3720
3721 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3722                                                AVX512VLVectorVTInfo _,
3723                                                dag Mask, RegisterClass MaskRC,
3724                                                SubRegIndex subreg> {
3725
3726 def : Pat<(masked_store addr:$dst, Mask,
3727              (_.info512.VT (insert_subvector undef,
3728                                (_.info256.VT (insert_subvector undef,
3729                                                  (_.info128.VT _.info128.RC:$src),
3730                                                  (iPTR 0))),
3731                                (iPTR 0)))),
3732           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3733                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3734                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3735
3736 }
3737
3738 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3739                                        dag Mask, RegisterClass MaskRC> {
3740
3741 def : Pat<(_.info128.VT (extract_subvector
3742                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
3743                                         (_.info512.VT (bitconvert
3744                                                        (v16i32 immAllZerosV))))),
3745                            (iPTR 0))),
3746           (!cast<Instruction>(InstrStr#rmkz)
3747                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3748                       addr:$srcAddr)>;
3749
3750 def : Pat<(_.info128.VT (extract_subvector
3751                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3752                       (_.info512.VT (insert_subvector undef,
3753                             (_.info256.VT (insert_subvector undef,
3754                                   (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3755                                   (iPTR 0))),
3756                             (iPTR 0))))),
3757                 (iPTR 0))),
3758           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3759                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3760                       addr:$srcAddr)>;
3761
3762 }
3763
3764 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3765                                               AVX512VLVectorVTInfo _,
3766                                               dag Mask, RegisterClass MaskRC,
3767                                               SubRegIndex subreg> {
3768
3769 def : Pat<(_.info128.VT (extract_subvector
3770                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
3771                                         (_.info512.VT (bitconvert
3772                                                        (v16i32 immAllZerosV))))),
3773                            (iPTR 0))),
3774           (!cast<Instruction>(InstrStr#rmkz)
3775                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3776                       addr:$srcAddr)>;
3777
3778 def : Pat<(_.info128.VT (extract_subvector
3779                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3780                       (_.info512.VT (insert_subvector undef,
3781                             (_.info256.VT (insert_subvector undef,
3782                                   (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3783                                   (iPTR 0))),
3784                             (iPTR 0))))),
3785                 (iPTR 0))),
3786           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3787                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3788                       addr:$srcAddr)>;
3789
3790 }
3791
3792 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3793 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3794
3795 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3796                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3797 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3798                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3799 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3800                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3801
3802 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3803                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3804 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3805                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3806 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3807                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3808
3809 def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3810                            (f32 FR32X:$src1), (f32 FR32X:$src2))),
3811           (COPY_TO_REGCLASS
3812             (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3813                         (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3814                           GR8:$mask, sub_8bit)), VK1WM),
3815             (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
3816             FR32X)>;
3817
3818 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
3819           (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3820            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
3821            (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
3822
3823 def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3824                            (f64 FR64X:$src1), (f64 FR64X:$src2))),
3825           (COPY_TO_REGCLASS
3826             (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3827                         (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3828                           GR8:$mask, sub_8bit)), VK1WM),
3829             (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
3830             FR64X)>;
3831
3832 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
3833           (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3834            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
3835            (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
3836
3837 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
3838           (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
3839            (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3840
3841 let hasSideEffects = 0 in {
3842   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3843                            (ins VR128X:$src1, VR128X:$src2),
3844                            "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3845                            [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG,
3846                            FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>;
3847
3848 let Constraints = "$src0 = $dst" in
3849   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3850                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
3851                                                    VR128X:$src1, VR128X:$src2),
3852                              "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
3853                                         "$dst {${mask}}, $src1, $src2}",
3854                              [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG,
3855                              FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>;
3856
3857   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3858                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
3859                          "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3860                                     "$dst {${mask}} {z}, $src1, $src2}",
3861                          [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
3862                          FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>;
3863
3864   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3865                            (ins VR128X:$src1, VR128X:$src2),
3866                            "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3867                            [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W,
3868                            FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>;
3869
3870 let Constraints = "$src0 = $dst" in
3871   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3872                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
3873                                                    VR128X:$src1, VR128X:$src2),
3874                              "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
3875                                         "$dst {${mask}}, $src1, $src2}",
3876                              [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG,
3877                              VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>;
3878
3879   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3880                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
3881                                                           VR128X:$src2),
3882                               "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3883                                          "$dst {${mask}} {z}, $src1, $src2}",
3884                               [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
3885                               VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>;
3886 }
3887
3888 let Predicates = [HasAVX512] in {
3889   let AddedComplexity = 15 in {
3890   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
3891             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
3892   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
3893             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
3894   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
3895             (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3896                        (COPY_TO_REGCLASS FR64X:$src, VR128))>;
3897   }
3898
3899   // Move low f32 and clear high bits.
3900   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
3901             (SUBREG_TO_REG (i32 0),
3902              (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
3903               (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
3904   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
3905             (SUBREG_TO_REG (i32 0),
3906              (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
3907               (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
3908   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
3909             (SUBREG_TO_REG (i32 0),
3910              (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
3911               (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
3912   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
3913             (SUBREG_TO_REG (i32 0),
3914              (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
3915               (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
3916
3917   let AddedComplexity = 20 in {
3918   // MOVSSrm zeros the high parts of the register; represent this
3919   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3920   def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
3921             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3922   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
3923             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3924   def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
3925             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3926   def : Pat<(v4f32 (X86vzload addr:$src)),
3927             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3928
3929   // MOVSDrm zeros the high parts of the register; represent this
3930   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3931   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
3932             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3933   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
3934             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3935   def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
3936             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3937   def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
3938             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3939   def : Pat<(v2f64 (X86vzload addr:$src)),
3940             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3941
3942   // Represent the same patterns above but in the form they appear for
3943   // 256-bit types
3944   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
3945                    (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
3946             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
3947   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
3948                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
3949             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3950   def : Pat<(v8f32 (X86vzload addr:$src)),
3951             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3952   def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
3953                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
3954             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3955   def : Pat<(v4f64 (X86vzload addr:$src)),
3956             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3957
3958   // Represent the same patterns above but in the form they appear for
3959   // 512-bit types
3960   def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
3961                    (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
3962             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
3963   def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
3964                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
3965             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3966   def : Pat<(v16f32 (X86vzload addr:$src)),
3967             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3968   def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
3969                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
3970             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3971   def : Pat<(v8f64 (X86vzload addr:$src)),
3972             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3973   }
3974   def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
3975                    (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
3976             (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
3977
3978   // Move low f64 and clear high bits.
3979   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
3980             (SUBREG_TO_REG (i32 0),
3981              (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3982                        (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
3983   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
3984             (SUBREG_TO_REG (i32 0),
3985              (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3986                        (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
3987
3988   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
3989             (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
3990                        (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
3991   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
3992             (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
3993                        (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
3994
3995   // Extract and store.
3996   def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
3997                    addr:$dst),
3998             (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
3999
4000   // Shuffle with VMOVSS
4001   def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
4002             (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4003
4004   def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4005             (VMOVSSZrr VR128X:$src1,
4006                        (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
4007
4008   // Shuffle with VMOVSD
4009   def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
4010             (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4011
4012   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4013             (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
4014
4015   def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
4016             (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4017   def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
4018             (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4019 }
4020
4021 let AddedComplexity = 15 in
4022 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4023                                 (ins VR128X:$src),
4024                                 "vmovq\t{$src, $dst|$dst, $src}",
4025                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4026                                                    (v2i64 VR128X:$src))))],
4027                                 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
4028
4029 let Predicates = [HasAVX512] in {
4030   let AddedComplexity = 15 in {
4031     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4032               (VMOVDI2PDIZrr GR32:$src)>;
4033
4034     def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4035               (VMOV64toPQIZrr GR64:$src)>;
4036
4037     def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4038                                  (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4039               (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4040
4041     def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4042                                  (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4043               (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4044   }
4045   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4046   let AddedComplexity = 20 in {
4047     def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4048               (VMOVDI2PDIZrm addr:$src)>;
4049     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4050               (VMOVDI2PDIZrm addr:$src)>;
4051     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4052               (VMOVDI2PDIZrm addr:$src)>;
4053     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4054               (VMOVDI2PDIZrm addr:$src)>;
4055     def : Pat<(v4i32 (X86vzload addr:$src)),
4056               (VMOVDI2PDIZrm addr:$src)>;
4057     def : Pat<(v8i32 (X86vzload addr:$src)),
4058               (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4059     def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4060               (VMOVQI2PQIZrm addr:$src)>;
4061     def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4062               (VMOVZPQILo2PQIZrr VR128X:$src)>;
4063     def : Pat<(v2i64 (X86vzload addr:$src)),
4064               (VMOVQI2PQIZrm addr:$src)>;
4065     def : Pat<(v4i64 (X86vzload addr:$src)),
4066               (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4067   }
4068
4069   // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4070   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4071                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4072             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4073   def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4074                                 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4075             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4076
4077   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4078   def : Pat<(v16i32 (X86vzload addr:$src)),
4079             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4080   def : Pat<(v8i64 (X86vzload addr:$src)),
4081             (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4082 }
4083 //===----------------------------------------------------------------------===//
4084 // AVX-512 - Non-temporals
4085 //===----------------------------------------------------------------------===//
4086 let SchedRW = [WriteLoad] in {
4087   def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4088                         (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4089                         [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
4090                         EVEX_CD8<64, CD8VF>;
4091
4092   let Predicates = [HasVLX] in {
4093     def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4094                          (ins i256mem:$src),
4095                          "vmovntdqa\t{$src, $dst|$dst, $src}",
4096                          [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
4097                          EVEX_CD8<64, CD8VF>;
4098
4099     def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4100                         (ins i128mem:$src),
4101                         "vmovntdqa\t{$src, $dst|$dst, $src}",
4102                         [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
4103                         EVEX_CD8<64, CD8VF>;
4104   }
4105 }
4106
4107 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4108                         PatFrag st_frag = alignednontemporalstore,
4109                         InstrItinClass itin = IIC_SSE_MOVNT> {
4110   let SchedRW = [WriteStore], AddedComplexity = 400 in
4111   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4112                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4113                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4114                     _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4115 }
4116
4117 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4118                                                   AVX512VLVectorVTInfo VTInfo> {
4119   let Predicates = [HasAVX512] in
4120     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
4121
4122   let Predicates = [HasAVX512, HasVLX] in {
4123     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4124     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
4125   }
4126 }
4127
4128 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4129 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4130 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
4131
4132 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4133   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4134             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4135   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4136             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4137   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4138             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4139
4140   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4141             (VMOVNTDQAZrm addr:$src)>;
4142   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4143             (VMOVNTDQAZrm addr:$src)>;
4144   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4145             (VMOVNTDQAZrm addr:$src)>;
4146 }
4147
4148 let Predicates = [HasVLX], AddedComplexity = 400 in {
4149   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4150             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4151   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4152             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4153   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4154             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4155
4156   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4157             (VMOVNTDQAZ256rm addr:$src)>;
4158   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4159             (VMOVNTDQAZ256rm addr:$src)>;
4160   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4161             (VMOVNTDQAZ256rm addr:$src)>;
4162
4163   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4164             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4165   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4166             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4167   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4168             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4169
4170   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4171             (VMOVNTDQAZ128rm addr:$src)>;
4172   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4173             (VMOVNTDQAZ128rm addr:$src)>;
4174   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4175             (VMOVNTDQAZ128rm addr:$src)>;
4176 }
4177
4178 //===----------------------------------------------------------------------===//
4179 // AVX-512 - Integer arithmetic
4180 //
4181 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4182                            X86VectorVTInfo _, OpndItins itins,
4183                            bit IsCommutable = 0> {
4184   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4185                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4186                     "$src2, $src1", "$src1, $src2",
4187                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4188                     itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4189                     Sched<[itins.Sched]>;
4190
4191   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4192                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4193                   "$src2, $src1", "$src1, $src2",
4194                   (_.VT (OpNode _.RC:$src1,
4195                                 (bitconvert (_.LdFrag addr:$src2)))),
4196                   itins.rm>, AVX512BIBase, EVEX_4V,
4197                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
4198 }
4199
4200 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4201                             X86VectorVTInfo _, OpndItins itins,
4202                             bit IsCommutable = 0> :
4203            avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
4204   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4205                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4206                   "${src2}"##_.BroadcastStr##", $src1",
4207                   "$src1, ${src2}"##_.BroadcastStr,
4208                   (_.VT (OpNode _.RC:$src1,
4209                                 (X86VBroadcast
4210                                     (_.ScalarLdFrag addr:$src2)))),
4211                   itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4212                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
4213 }
4214
4215 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4216                               AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4217                               Predicate prd, bit IsCommutable = 0> {
4218   let Predicates = [prd] in
4219     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4220                              IsCommutable>, EVEX_V512;
4221
4222   let Predicates = [prd, HasVLX] in {
4223     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4224                              IsCommutable>, EVEX_V256;
4225     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4226                              IsCommutable>, EVEX_V128;
4227   }
4228 }
4229
4230 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4231                                AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4232                                Predicate prd, bit IsCommutable = 0> {
4233   let Predicates = [prd] in
4234     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4235                              IsCommutable>, EVEX_V512;
4236
4237   let Predicates = [prd, HasVLX] in {
4238     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4239                              IsCommutable>, EVEX_V256;
4240     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4241                              IsCommutable>, EVEX_V128;
4242   }
4243 }
4244
4245 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4246                                 OpndItins itins, Predicate prd,
4247                                 bit IsCommutable = 0> {
4248   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4249                                itins, prd, IsCommutable>,
4250                                VEX_W, EVEX_CD8<64, CD8VF>;
4251 }
4252
4253 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4254                                 OpndItins itins, Predicate prd,
4255                                 bit IsCommutable = 0> {
4256   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4257                                itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4258 }
4259
4260 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4261                                 OpndItins itins, Predicate prd,
4262                                 bit IsCommutable = 0> {
4263   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4264                               itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4265                               VEX_WIG;
4266 }
4267
4268 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4269                                 OpndItins itins, Predicate prd,
4270                                 bit IsCommutable = 0> {
4271   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4272                               itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4273                               VEX_WIG;
4274 }
4275
4276 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4277                                  SDNode OpNode, OpndItins itins, Predicate prd,
4278                                  bit IsCommutable = 0> {
4279   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
4280                                    IsCommutable>;
4281
4282   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
4283                                    IsCommutable>;
4284 }
4285
4286 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4287                                  SDNode OpNode, OpndItins itins, Predicate prd,
4288                                  bit IsCommutable = 0> {
4289   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
4290                                    IsCommutable>;
4291
4292   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
4293                                    IsCommutable>;
4294 }
4295
4296 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4297                                   bits<8> opc_d, bits<8> opc_q,
4298                                   string OpcodeStr, SDNode OpNode,
4299                                   OpndItins itins, bit IsCommutable = 0> {
4300   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4301                                     itins, HasAVX512, IsCommutable>,
4302               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4303                                     itins, HasBWI, IsCommutable>;
4304 }
4305
4306 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
4307                             SDNode OpNode,X86VectorVTInfo _Src,
4308                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4309                             bit IsCommutable = 0> {
4310   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4311                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4312                             "$src2, $src1","$src1, $src2",
4313                             (_Dst.VT (OpNode
4314                                          (_Src.VT _Src.RC:$src1),
4315                                          (_Src.VT _Src.RC:$src2))),
4316                             itins.rr, IsCommutable>,
4317                             AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
4318   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4319                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4320                         "$src2, $src1", "$src1, $src2",
4321                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4322                                       (bitconvert (_Src.LdFrag addr:$src2)))),
4323                         itins.rm>, AVX512BIBase, EVEX_4V,
4324                         Sched<[itins.Sched.Folded, ReadAfterLd]>;
4325
4326   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4327                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4328                     OpcodeStr,
4329                     "${src2}"##_Brdct.BroadcastStr##", $src1",
4330                      "$src1, ${src2}"##_Brdct.BroadcastStr,
4331                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4332                                  (_Brdct.VT (X86VBroadcast
4333                                           (_Brdct.ScalarLdFrag addr:$src2)))))),
4334                     itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4335                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
4336 }
4337
4338 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4339                                     SSE_INTALU_ITINS_P, 1>;
4340 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4341                                     SSE_INTALU_ITINS_P, 0>;
4342 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4343                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
4344 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4345                                     SSE_INTALU_ITINS_P, HasBWI, 0>;
4346 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4347                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4348 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4349                                      SSE_INTALU_ITINS_P, HasBWI, 0>;
4350 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4351                                     SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
4352 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4353                                     SSE_INTMUL_ITINS_P, HasBWI, 1>;
4354 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4355                                     SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
4356 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
4357                                     HasBWI, 1>;
4358 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
4359                                      HasBWI, 1>;
4360 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
4361                                       HasBWI, 1>, T8PD;
4362 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4363                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
4364
4365 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
4366                             AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4367                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4368   let Predicates = [prd] in
4369     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4370                                  _SrcVTInfo.info512, _DstVTInfo.info512,
4371                                  v8i64_info, IsCommutable>,
4372                                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4373   let Predicates = [HasVLX, prd] in {
4374     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4375                                       _SrcVTInfo.info256, _DstVTInfo.info256,
4376                                       v4i64x_info, IsCommutable>,
4377                                       EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4378     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4379                                       _SrcVTInfo.info128, _DstVTInfo.info128,
4380                                       v2i64x_info, IsCommutable>,
4381                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4382   }
4383 }
4384
4385 defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P,
4386                                 avx512vl_i32_info, avx512vl_i64_info,
4387                                 X86pmuldq, HasAVX512, 1>,T8PD;
4388 defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
4389                                 avx512vl_i32_info, avx512vl_i64_info,
4390                                 X86pmuludq, HasAVX512, 1>;
4391 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4392                                 avx512vl_i8_info, avx512vl_i8_info,
4393                                 X86multishift, HasVBMI, 0>, T8PD;
4394
4395 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4396                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4397                             OpndItins itins> {
4398   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4399                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4400                     OpcodeStr,
4401                     "${src2}"##_Src.BroadcastStr##", $src1",
4402                      "$src1, ${src2}"##_Src.BroadcastStr,
4403                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4404                                  (_Src.VT (X86VBroadcast
4405                                           (_Src.ScalarLdFrag addr:$src2)))))),
4406                     itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4407                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
4408 }
4409
4410 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4411                             SDNode OpNode,X86VectorVTInfo _Src,
4412                             X86VectorVTInfo _Dst, OpndItins itins,
4413                             bit IsCommutable = 0> {
4414   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4415                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4416                             "$src2, $src1","$src1, $src2",
4417                             (_Dst.VT (OpNode
4418                                          (_Src.VT _Src.RC:$src1),
4419                                          (_Src.VT _Src.RC:$src2))),
4420                             itins.rr, IsCommutable>,
4421                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>;
4422   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4423                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4424                         "$src2, $src1", "$src1, $src2",
4425                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4426                                       (bitconvert (_Src.LdFrag addr:$src2)))), itins.rm>,
4427                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4428                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
4429 }
4430
4431 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4432                                     SDNode OpNode> {
4433   let Predicates = [HasBWI] in
4434   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4435                                  v32i16_info, SSE_PACK>,
4436                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4437                                  v32i16_info, SSE_PACK>, EVEX_V512;
4438   let Predicates = [HasBWI, HasVLX] in {
4439     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4440                                      v16i16x_info, SSE_PACK>,
4441                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4442                                      v16i16x_info, SSE_PACK>, EVEX_V256;
4443     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4444                                      v8i16x_info, SSE_PACK>,
4445                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4446                                      v8i16x_info, SSE_PACK>, EVEX_V128;
4447   }
4448 }
4449 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4450                             SDNode OpNode> {
4451   let Predicates = [HasBWI] in
4452   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
4453                                 v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG;
4454   let Predicates = [HasBWI, HasVLX] in {
4455     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4456                                     v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG;
4457     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4458                                     v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG;
4459   }
4460 }
4461
4462 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4463                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
4464                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4465   let Predicates = [HasBWI] in
4466   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4467                                 _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512;
4468   let Predicates = [HasBWI, HasVLX] in {
4469     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4470                                      _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256;
4471     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4472                                      _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128;
4473   }
4474 }
4475
4476 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4477 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4478 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4479 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4480
4481 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4482                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4483 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4484                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4485
4486 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4487                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4488 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4489                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4490 defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
4491                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4492
4493 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4494                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4495 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4496                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4497 defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
4498                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4499
4500 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4501                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4502 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4503                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4504 defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
4505                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4506
4507 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4508                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4509 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4510                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4511 defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
4512                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4513
4514 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4515 let Predicates = [HasDQI, NoVLX] in {
4516   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4517             (EXTRACT_SUBREG
4518                 (VPMULLQZrr
4519                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4520                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4521              sub_ymm)>;
4522
4523   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4524             (EXTRACT_SUBREG
4525                 (VPMULLQZrr
4526                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4527                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4528              sub_xmm)>;
4529 }
4530
4531 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4532 let Predicates = [HasDQI, NoVLX] in {
4533   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4534             (EXTRACT_SUBREG
4535                 (VPMULLQZrr
4536                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4537                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4538              sub_ymm)>;
4539
4540   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4541             (EXTRACT_SUBREG
4542                 (VPMULLQZrr
4543                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4544                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4545              sub_xmm)>;
4546 }
4547
4548 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4549   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4550             (EXTRACT_SUBREG
4551                 (Instr
4552                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4553                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4554              sub_ymm)>;
4555
4556   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4557             (EXTRACT_SUBREG
4558                 (Instr
4559                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4560                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4561              sub_xmm)>;
4562 }
4563
4564 let Predicates = [HasAVX512, NoVLX] in {
4565   defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4566   defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4567   defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4568   defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4569 }
4570
4571 //===----------------------------------------------------------------------===//
4572 // AVX-512  Logical Instructions
4573 //===----------------------------------------------------------------------===//
4574
4575 // OpNodeMsk is the OpNode to use when element size is important. OpNode will
4576 // be set to null_frag for 32-bit elements.
4577 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
4578                            SDPatternOperator OpNode,
4579                            SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
4580                            bit IsCommutable = 0> {
4581   let hasSideEffects = 0 in
4582   defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4583                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4584                     "$src2, $src1", "$src1, $src2",
4585                     (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4586                                      (bitconvert (_.VT _.RC:$src2)))),
4587                     (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4588                                                           _.RC:$src2)))),
4589                     itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4590                     Sched<[itins.Sched]>;
4591
4592   let hasSideEffects = 0, mayLoad = 1 in
4593   defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4594                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4595                   "$src2, $src1", "$src1, $src2",
4596                   (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4597                                    (bitconvert (_.LdFrag addr:$src2)))),
4598                   (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4599                                      (bitconvert (_.LdFrag addr:$src2)))))),
4600                   itins.rm>, AVX512BIBase, EVEX_4V,
4601                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
4602 }
4603
4604 // OpNodeMsk is the OpNode to use where element size is important. So use
4605 // for all of the broadcast patterns.
4606 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
4607                             SDPatternOperator OpNode,
4608                             SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
4609                             bit IsCommutable = 0> :
4610            avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _,
4611                            IsCommutable> {
4612   defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4613                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4614                   "${src2}"##_.BroadcastStr##", $src1",
4615                   "$src1, ${src2}"##_.BroadcastStr,
4616                   (_.i64VT (OpNodeMsk _.RC:$src1,
4617                                    (bitconvert
4618                                     (_.VT (X86VBroadcast
4619                                             (_.ScalarLdFrag addr:$src2)))))),
4620                   (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4621                                      (bitconvert
4622                                       (_.VT (X86VBroadcast
4623                                              (_.ScalarLdFrag addr:$src2)))))))),
4624                   itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4625                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
4626 }
4627
4628 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
4629                                SDPatternOperator OpNode,
4630                                SDNode OpNodeMsk, OpndItins itins,
4631                                AVX512VLVectorVTInfo VTInfo,
4632                                bit IsCommutable = 0> {
4633   let Predicates = [HasAVX512] in
4634     defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4635                               VTInfo.info512, IsCommutable>, EVEX_V512;
4636
4637   let Predicates = [HasAVX512, HasVLX] in {
4638     defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4639                                  VTInfo.info256, IsCommutable>, EVEX_V256;
4640     defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4641                                  VTInfo.info128, IsCommutable>, EVEX_V128;
4642   }
4643 }
4644
4645 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4646                                  SDNode OpNode, OpndItins itins,
4647                                  bit IsCommutable = 0> {
4648   defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins,
4649                                avx512vl_i64_info, IsCommutable>,
4650                                VEX_W, EVEX_CD8<64, CD8VF>;
4651   defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins,
4652                                avx512vl_i32_info, IsCommutable>,
4653                                EVEX_CD8<32, CD8VF>;
4654 }
4655
4656 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>;
4657 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>;
4658 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>;
4659 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>;
4660
4661 //===----------------------------------------------------------------------===//
4662 // AVX-512  FP arithmetic
4663 //===----------------------------------------------------------------------===//
4664 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4665                          SDNode OpNode, SDNode VecNode, OpndItins itins,
4666                          bit IsCommutable> {
4667   let ExeDomain = _.ExeDomain in {
4668   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4669                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4670                            "$src2, $src1", "$src1, $src2",
4671                            (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4672                                           (i32 FROUND_CURRENT))),
4673                            itins.rr>, Sched<[itins.Sched]>;
4674
4675   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4677                          "$src2, $src1", "$src1, $src2",
4678                          (_.VT (VecNode _.RC:$src1,
4679                                         _.ScalarIntMemCPat:$src2,
4680                                         (i32 FROUND_CURRENT))),
4681                          itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4682   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4683   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4684                          (ins _.FRC:$src1, _.FRC:$src2),
4685                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4686                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4687                           itins.rr>, Sched<[itins.Sched]> {
4688     let isCommutable = IsCommutable;
4689   }
4690   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4691                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4692                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4693                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4694                          (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4695                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
4696   }
4697   }
4698 }
4699
4700 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4701                          SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
4702   let ExeDomain = _.ExeDomain in
4703   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4704                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4705                           "$rc, $src2, $src1", "$src1, $src2, $rc",
4706                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4707                           (i32 imm:$rc)), itins.rr, IsCommutable>,
4708                           EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
4709 }
4710 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4711                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4712                                 OpndItins itins, bit IsCommutable> {
4713   let ExeDomain = _.ExeDomain in {
4714   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4715                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4716                            "$src2, $src1", "$src1, $src2",
4717                            (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
4718                            itins.rr>, Sched<[itins.Sched]>;
4719
4720   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4721                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4722                          "$src2, $src1", "$src1, $src2",
4723                          (_.VT (VecNode _.RC:$src1,
4724                                         _.ScalarIntMemCPat:$src2)),
4725                          itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4726
4727   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4728   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4729                          (ins _.FRC:$src1, _.FRC:$src2),
4730                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4731                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4732                           itins.rr>, Sched<[itins.Sched]> {
4733     let isCommutable = IsCommutable;
4734   }
4735   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4736                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4737                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4738                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4739                          (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4740                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
4741   }
4742
4743   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4744                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4745                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4746                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4747                             (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B,
4748                             Sched<[itins.Sched]>;
4749   }
4750 }
4751
4752 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4753                                   SDNode VecNode,
4754                                   SizeItins itins, bit IsCommutable> {
4755   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4756                               itins.s, IsCommutable>,
4757              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4758                               itins.s, IsCommutable>,
4759                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
4760   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4761                               itins.d,                  IsCommutable>,
4762              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4763                               itins.d, IsCommutable>,
4764                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4765 }
4766
4767 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
4768                                   SDNode VecNode, SDNode SaeNode,
4769                                   SizeItins itins, bit IsCommutable> {
4770   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4771                               VecNode, SaeNode, itins.s, IsCommutable>,
4772                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
4773   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4774                               VecNode, SaeNode, itins.d, IsCommutable>,
4775                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4776 }
4777 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4778 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4779 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4780 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4781 defm VMIN : avx512_binop_s_sae  <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
4782                                  SSE_ALU_ITINS_S, 0>;
4783 defm VMAX : avx512_binop_s_sae  <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
4784                                  SSE_ALU_ITINS_S, 0>;
4785
4786 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4787 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
4788 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4789                           X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
4790   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
4791   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4792                          (ins _.FRC:$src1, _.FRC:$src2),
4793                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4794                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4795                           itins.rr>, Sched<[itins.Sched]> {
4796     let isCommutable = 1;
4797   }
4798   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4799                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4800                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4801                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4802                          (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4803                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
4804   }
4805 }
4806 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
4807                                 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4808                                 EVEX_CD8<32, CD8VT1>;
4809
4810 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
4811                                 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4812                                 EVEX_CD8<64, CD8VT1>;
4813
4814 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
4815                                 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4816                                 EVEX_CD8<32, CD8VT1>;
4817
4818 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
4819                                 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4820                                 EVEX_CD8<64, CD8VT1>;
4821
4822 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4823                             X86VectorVTInfo _, OpndItins itins,
4824                             bit IsCommutable> {
4825   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
4826   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4827                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4828                   "$src2, $src1", "$src1, $src2",
4829                   (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
4830                   IsCommutable>, EVEX_4V, Sched<[itins.Sched]>;
4831   let mayLoad = 1 in {
4832     defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4833                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
4834                     "$src2, $src1", "$src1, $src2",
4835                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
4836                     EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4837     defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4838                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
4839                      "${src2}"##_.BroadcastStr##", $src1",
4840                      "$src1, ${src2}"##_.BroadcastStr,
4841                      (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
4842                                                 (_.ScalarLdFrag addr:$src2)))),
4843                      itins.rm>, EVEX_4V, EVEX_B,
4844                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
4845     }
4846   }
4847 }
4848
4849 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4850                                   OpndItins itins, X86VectorVTInfo _> {
4851   let ExeDomain = _.ExeDomain in
4852   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4853                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
4854                   "$rc, $src2, $src1", "$src1, $src2, $rc",
4855                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>,
4856                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
4857 }
4858
4859 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4860                                 OpndItins itins, X86VectorVTInfo _> {
4861   let ExeDomain = _.ExeDomain in
4862   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4863                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4864                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4865                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>,
4866                   EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
4867 }
4868
4869 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4870                              Predicate prd, SizeItins itins,
4871                              bit IsCommutable = 0> {
4872   let Predicates = [prd] in {
4873   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
4874                               itins.s, IsCommutable>, EVEX_V512, PS,
4875                               EVEX_CD8<32, CD8VF>;
4876   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
4877                               itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
4878                               EVEX_CD8<64, CD8VF>;
4879   }
4880
4881     // Define only if AVX512VL feature is present.
4882   let Predicates = [prd, HasVLX] in {
4883     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
4884                                    itins.s, IsCommutable>, EVEX_V128, PS,
4885                                    EVEX_CD8<32, CD8VF>;
4886     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
4887                                    itins.s, IsCommutable>, EVEX_V256, PS,
4888                                    EVEX_CD8<32, CD8VF>;
4889     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
4890                                    itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
4891                                    EVEX_CD8<64, CD8VF>;
4892     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
4893                                    itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
4894                                    EVEX_CD8<64, CD8VF>;
4895   }
4896 }
4897
4898 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4899                                    SizeItins itins> {
4900   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
4901                               EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
4902   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
4903                               EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4904 }
4905
4906 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4907                                  SizeItins itins> {
4908   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
4909                               EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
4910   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
4911                               EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4912 }
4913
4914 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
4915                               SSE_ALU_ITINS_P, 1>,
4916             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>;
4917 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
4918                               SSE_MUL_ITINS_P, 1>,
4919             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>;
4920 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
4921             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>;
4922 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
4923             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>;
4924 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
4925                               SSE_ALU_ITINS_P, 0>,
4926             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>;
4927 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
4928                               SSE_ALU_ITINS_P, 0>,
4929             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>;
4930 let isCodeGenOnly = 1 in {
4931   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
4932                                  SSE_ALU_ITINS_P, 1>;
4933   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
4934                                  SSE_ALU_ITINS_P, 1>;
4935 }
4936 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
4937                                SSE_ALU_ITINS_P, 1>;
4938 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
4939                                SSE_ALU_ITINS_P, 0>;
4940 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
4941                                SSE_ALU_ITINS_P, 1>;
4942 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
4943                                SSE_ALU_ITINS_P, 1>;
4944
4945 // Patterns catch floating point selects with bitcasted integer logic ops.
4946 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
4947                                       X86VectorVTInfo _, Predicate prd> {
4948 let Predicates = [prd] in {
4949   // Masked register-register logical operations.
4950   def : Pat<(_.VT (vselect _.KRCWM:$mask,
4951                    (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
4952                    _.RC:$src0)),
4953             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
4954              _.RC:$src1, _.RC:$src2)>;
4955   def : Pat<(_.VT (vselect _.KRCWM:$mask,
4956                    (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
4957                    _.ImmAllZerosV)),
4958             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
4959              _.RC:$src2)>;
4960   // Masked register-memory logical operations.
4961   def : Pat<(_.VT (vselect _.KRCWM:$mask,
4962                    (bitconvert (_.i64VT (OpNode _.RC:$src1,
4963                                          (load addr:$src2)))),
4964                    _.RC:$src0)),
4965             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
4966              _.RC:$src1, addr:$src2)>;
4967   def : Pat<(_.VT (vselect _.KRCWM:$mask,
4968                    (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
4969                    _.ImmAllZerosV)),
4970             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
4971              addr:$src2)>;
4972   // Register-broadcast logical operations.
4973   def : Pat<(_.i64VT (OpNode _.RC:$src1,
4974                       (bitconvert (_.VT (X86VBroadcast
4975                                          (_.ScalarLdFrag addr:$src2)))))),
4976             (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
4977   def : Pat<(_.VT (vselect _.KRCWM:$mask,
4978                    (bitconvert
4979                     (_.i64VT (OpNode _.RC:$src1,
4980                               (bitconvert (_.VT
4981                                            (X86VBroadcast
4982                                             (_.ScalarLdFrag addr:$src2))))))),
4983                    _.RC:$src0)),
4984             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
4985              _.RC:$src1, addr:$src2)>;
4986   def : Pat<(_.VT (vselect _.KRCWM:$mask,
4987                    (bitconvert
4988                     (_.i64VT (OpNode _.RC:$src1,
4989                               (bitconvert (_.VT
4990                                            (X86VBroadcast
4991                                             (_.ScalarLdFrag addr:$src2))))))),
4992                    _.ImmAllZerosV)),
4993             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
4994              _.RC:$src1, addr:$src2)>;
4995 }
4996 }
4997
4998 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
4999   defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5000   defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5001   defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5002   defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5003   defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5004   defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5005 }
5006
5007 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5008 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5009 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5010 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5011
5012 let Predicates = [HasVLX,HasDQI] in {
5013   // Use packed logical operations for scalar ops.
5014   def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5015             (COPY_TO_REGCLASS (VANDPDZ128rr
5016                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5017                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5018   def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5019             (COPY_TO_REGCLASS (VORPDZ128rr
5020                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5021                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5022   def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5023             (COPY_TO_REGCLASS (VXORPDZ128rr
5024                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5025                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5026   def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5027             (COPY_TO_REGCLASS (VANDNPDZ128rr
5028                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5029                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5030
5031   def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5032             (COPY_TO_REGCLASS (VANDPSZ128rr
5033                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5034                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5035   def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5036             (COPY_TO_REGCLASS (VORPSZ128rr
5037                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5038                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5039   def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5040             (COPY_TO_REGCLASS (VXORPSZ128rr
5041                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5042                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5043   def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5044             (COPY_TO_REGCLASS (VANDNPSZ128rr
5045                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5046                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5047 }
5048
5049 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5050                               OpndItins itins, X86VectorVTInfo _> {
5051   let ExeDomain = _.ExeDomain in {
5052   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5053                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5054                   "$src2, $src1", "$src1, $src2",
5055                   (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))),
5056                   itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
5057   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5058                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5059                   "$src2, $src1", "$src1, $src2",
5060                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)),
5061                   itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5062   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5063                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5064                    "${src2}"##_.BroadcastStr##", $src1",
5065                    "$src1, ${src2}"##_.BroadcastStr,
5066                    (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5067                                               (_.ScalarLdFrag addr:$src2))),
5068                                               (i32 FROUND_CURRENT)), itins.rm>,
5069                    EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5070   }
5071 }
5072
5073 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5074                                    OpndItins itins, X86VectorVTInfo _> {
5075   let ExeDomain = _.ExeDomain in {
5076   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5077                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5078                   "$src2, $src1", "$src1, $src2",
5079                   (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>,
5080                   Sched<[itins.Sched]>;
5081   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5082                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5083                   "$src2, $src1", "$src1, $src2",
5084                   (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5085                           (i32 FROUND_CURRENT)), itins.rm>,
5086                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
5087   }
5088 }
5089
5090 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
5091   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5092              avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5093                               EVEX_V512, EVEX_CD8<32, CD8VF>;
5094   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5095              avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5096                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5097   defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>,
5098                 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
5099                               EVEX_4V,EVEX_CD8<32, CD8VT1>;
5100   defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>,
5101                 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
5102                               EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5103
5104   // Define only if AVX512VL feature is present.
5105   let Predicates = [HasVLX] in {
5106     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>,
5107                                    EVEX_V128, EVEX_CD8<32, CD8VF>;
5108     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>,
5109                                    EVEX_V256, EVEX_CD8<32, CD8VF>;
5110     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>,
5111                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5112     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>,
5113                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5114   }
5115 }
5116 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
5117
5118 //===----------------------------------------------------------------------===//
5119 // AVX-512  VPTESTM instructions
5120 //===----------------------------------------------------------------------===//
5121
5122 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
5123                          OpndItins itins, X86VectorVTInfo _> {
5124   let ExeDomain = _.ExeDomain in {
5125   let isCommutable = 1 in
5126   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5127                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5128                       "$src2, $src1", "$src1, $src2",
5129                    (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
5130                    EVEX_4V, Sched<[itins.Sched]>;
5131   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5132                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5133                        "$src2, $src1", "$src1, $src2",
5134                    (OpNode (_.VT _.RC:$src1),
5135                     (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>,
5136                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5137                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
5138   }
5139 }
5140
5141 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5142                             OpndItins itins, X86VectorVTInfo _> {
5143   let ExeDomain = _.ExeDomain in
5144   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5145                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5146                     "${src2}"##_.BroadcastStr##", $src1",
5147                     "$src1, ${src2}"##_.BroadcastStr,
5148                     (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
5149                                                 (_.ScalarLdFrag addr:$src2)))),
5150                     itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5151                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
5152 }
5153
5154 // Use 512bit version to implement 128/256 bit in case NoVLX.
5155 multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
5156                                   X86VectorVTInfo _, string Suffix> {
5157     def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
5158               (_.KVT (COPY_TO_REGCLASS
5159                        (!cast<Instruction>(NAME # Suffix # "Zrr")
5160                          (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5161                                         _.RC:$src1, _.SubRegIdx),
5162                          (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5163                                         _.RC:$src2, _.SubRegIdx)),
5164                      _.KRC))>;
5165 }
5166
5167 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5168                                   OpndItins itins, AVX512VLVectorVTInfo _,
5169                                   string Suffix> {
5170   let Predicates  = [HasAVX512] in
5171   defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>,
5172            avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5173
5174   let Predicates = [HasAVX512, HasVLX] in {
5175   defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>,
5176               avx512_vptest_mb<opc, OpcodeStr, OpNode,itins,  _.info256>, EVEX_V256;
5177   defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>,
5178               avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
5179   }
5180   let Predicates = [HasAVX512, NoVLX] in {
5181   defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5182   defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
5183   }
5184 }
5185
5186 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
5187                             OpndItins itins> {
5188   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
5189                                  avx512vl_i32_info, "D">;
5190   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
5191                                  avx512vl_i64_info, "Q">, VEX_W;
5192 }
5193
5194 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5195                                  SDNode OpNode, OpndItins itins> {
5196   let Predicates = [HasBWI] in {
5197   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>,
5198               EVEX_V512, VEX_W;
5199   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>,
5200               EVEX_V512;
5201   }
5202   let Predicates = [HasVLX, HasBWI] in {
5203
5204   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>,
5205               EVEX_V256, VEX_W;
5206   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>,
5207               EVEX_V128, VEX_W;
5208   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>,
5209               EVEX_V256;
5210   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>,
5211               EVEX_V128;
5212   }
5213
5214   let Predicates = [HasAVX512, NoVLX] in {
5215   defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
5216   defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
5217   defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
5218   defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
5219   }
5220 }
5221
5222 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5223                                    SDNode OpNode, OpndItins itins> :
5224   avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
5225   avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
5226
5227 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm,
5228                                          SSE_BIT_ITINS_P>, T8PD;
5229 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
5230                                          SSE_BIT_ITINS_P>, T8XS;
5231
5232
5233 //===----------------------------------------------------------------------===//
5234 // AVX-512  Shift instructions
5235 //===----------------------------------------------------------------------===//
5236 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5237                             string OpcodeStr, SDNode OpNode, OpndItins itins,
5238                             X86VectorVTInfo _> {
5239   let ExeDomain = _.ExeDomain in {
5240   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5241                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5242                       "$src2, $src1", "$src1, $src2",
5243                    (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
5244                    itins.rr>, Sched<[itins.Sched]>;
5245   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5246                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5247                        "$src2, $src1", "$src1, $src2",
5248                    (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5249                           (i8 imm:$src2))),
5250                    itins.rm>, Sched<[itins.Sched.Folded]>;
5251   }
5252 }
5253
5254 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5255                              string OpcodeStr, SDNode OpNode, OpndItins itins,
5256                              X86VectorVTInfo _> {
5257   let ExeDomain = _.ExeDomain in
5258   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5259                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5260       "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5261      (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
5262      itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5263 }
5264
5265 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5266                             OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5267                             X86VectorVTInfo _> {
5268    // src2 is always 128-bit
5269   let ExeDomain = _.ExeDomain in {
5270   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5271                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5272                       "$src2, $src1", "$src1, $src2",
5273                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
5274                    itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
5275   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5276                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5277                        "$src2, $src1", "$src1, $src2",
5278                    (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
5279                    itins.rm>, AVX512BIBase,
5280                    EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5281   }
5282 }
5283
5284 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5285                               OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5286                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
5287   let Predicates = [prd] in
5288   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5289                             VTInfo.info512>, EVEX_V512,
5290                             EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5291   let Predicates = [prd, HasVLX] in {
5292   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5293                             VTInfo.info256>, EVEX_V256,
5294                             EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5295   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5296                             VTInfo.info128>, EVEX_V128,
5297                             EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5298   }
5299 }
5300
5301 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5302                               string OpcodeStr, SDNode OpNode,
5303                               OpndItins itins> {
5304   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
5305                               bc_v4i32, avx512vl_i32_info, HasAVX512>;
5306   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
5307                               bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5308   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
5309                               bc_v2i64, avx512vl_i16_info, HasBWI>;
5310 }
5311
5312 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5313                                   string OpcodeStr, SDNode OpNode,
5314                                   OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
5315   let Predicates = [HasAVX512] in
5316   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
5317                               VTInfo.info512>,
5318              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5319                               VTInfo.info512>, EVEX_V512;
5320   let Predicates = [HasAVX512, HasVLX] in {
5321   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
5322                               VTInfo.info256>,
5323              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5324                               VTInfo.info256>, EVEX_V256;
5325   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5326                               itins, VTInfo.info128>,
5327              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5328                               VTInfo.info128>, EVEX_V128;
5329   }
5330 }
5331
5332 multiclass avx512_shift_rmi_w<bits<8> opcw,
5333                                  Format ImmFormR, Format ImmFormM,
5334                                  string OpcodeStr, SDNode OpNode,
5335                                  OpndItins itins> {
5336   let Predicates = [HasBWI] in
5337   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5338                                itins, v32i16_info>, EVEX_V512, VEX_WIG;
5339   let Predicates = [HasVLX, HasBWI] in {
5340   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5341                                itins, v16i16x_info>, EVEX_V256, VEX_WIG;
5342   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5343                                itins, v8i16x_info>, EVEX_V128, VEX_WIG;
5344   }
5345 }
5346
5347 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5348                                  Format ImmFormR, Format ImmFormM,
5349                                  string OpcodeStr, SDNode OpNode, OpndItins itins> {
5350   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5351                                  itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5352   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5353                                  itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5354 }
5355
5356 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5357                                  SSE_INTSHIFT_P>,
5358              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5359                                 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5360
5361 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5362                                  SSE_INTSHIFT_P>,
5363              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5364                                 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5365
5366 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5367                                  SSE_INTSHIFT_P>,
5368              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5369                                 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5370
5371 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5372                                  SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5373 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5374                                  SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5375
5376 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
5377 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
5378 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
5379
5380 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5381 let Predicates = [HasAVX512, NoVLX] in {
5382   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5383             (EXTRACT_SUBREG (v8i64
5384               (VPSRAQZrr
5385                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5386                  VR128X:$src2)), sub_ymm)>;
5387
5388   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5389             (EXTRACT_SUBREG (v8i64
5390               (VPSRAQZrr
5391                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5392                  VR128X:$src2)), sub_xmm)>;
5393
5394   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5395             (EXTRACT_SUBREG (v8i64
5396               (VPSRAQZri
5397                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5398                  imm:$src2)), sub_ymm)>;
5399
5400   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5401             (EXTRACT_SUBREG (v8i64
5402               (VPSRAQZri
5403                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5404                  imm:$src2)), sub_xmm)>;
5405 }
5406
5407 //===-------------------------------------------------------------------===//
5408 // Variable Bit Shifts
5409 //===-------------------------------------------------------------------===//
5410 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5411                             OpndItins itins, X86VectorVTInfo _> {
5412   let ExeDomain = _.ExeDomain in {
5413   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5414                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5415                       "$src2, $src1", "$src1, $src2",
5416                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
5417                    itins.rr>, AVX5128IBase, EVEX_4V,
5418                    Sched<[itins.Sched]>;
5419   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5420                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5421                        "$src2, $src1", "$src1, $src2",
5422                    (_.VT (OpNode _.RC:$src1,
5423                    (_.VT (bitconvert (_.LdFrag addr:$src2))))),
5424                    itins.rm>, AVX5128IBase, EVEX_4V,
5425                    EVEX_CD8<_.EltSize, CD8VF>,
5426                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
5427   }
5428 }
5429
5430 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5431                                OpndItins itins, X86VectorVTInfo _> {
5432   let ExeDomain = _.ExeDomain in
5433   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5434                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5435                     "${src2}"##_.BroadcastStr##", $src1",
5436                     "$src1, ${src2}"##_.BroadcastStr,
5437                     (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5438                                                 (_.ScalarLdFrag addr:$src2))))),
5439                     itins.rm>, AVX5128IBase, EVEX_B,
5440                     EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5441                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
5442 }
5443
5444 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5445                                   OpndItins itins, AVX512VLVectorVTInfo _> {
5446   let Predicates  = [HasAVX512] in
5447   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5448            avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5449
5450   let Predicates = [HasAVX512, HasVLX] in {
5451   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5452               avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5453   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5454               avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
5455   }
5456 }
5457
5458 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5459                                  SDNode OpNode, OpndItins itins> {
5460   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
5461                                  avx512vl_i32_info>;
5462   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
5463                                  avx512vl_i64_info>, VEX_W;
5464 }
5465
5466 // Use 512bit version to implement 128/256 bit in case NoVLX.
5467 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5468                                      SDNode OpNode, list<Predicate> p> {
5469   let Predicates = p in {
5470   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5471                                   (_.info256.VT _.info256.RC:$src2))),
5472             (EXTRACT_SUBREG
5473                 (!cast<Instruction>(OpcodeStr#"Zrr")
5474                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5475                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5476              sub_ymm)>;
5477
5478   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5479                                   (_.info128.VT _.info128.RC:$src2))),
5480             (EXTRACT_SUBREG
5481                 (!cast<Instruction>(OpcodeStr#"Zrr")
5482                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5483                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5484              sub_xmm)>;
5485   }
5486 }
5487 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
5488                               SDNode OpNode, OpndItins itins> {
5489   let Predicates = [HasBWI] in
5490   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
5491               EVEX_V512, VEX_W;
5492   let Predicates = [HasVLX, HasBWI] in {
5493
5494   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
5495               EVEX_V256, VEX_W;
5496   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
5497               EVEX_V128, VEX_W;
5498   }
5499 }
5500
5501 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
5502               avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
5503
5504 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
5505               avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
5506
5507 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
5508               avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
5509
5510 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
5511 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
5512
5513 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5514 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5515 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5516 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5517
5518 // Special handing for handling VPSRAV intrinsics.
5519 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5520                                          list<Predicate> p> {
5521   let Predicates = p in {
5522     def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5523               (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5524                _.RC:$src2)>;
5525     def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5526               (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5527                _.RC:$src1, addr:$src2)>;
5528     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5529                      (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5530               (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5531                _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5532     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5533                      (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5534                      _.RC:$src0)),
5535               (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5536                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5537     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5538                      (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5539               (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5540                _.RC:$src1, _.RC:$src2)>;
5541     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5542                      (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5543                      _.ImmAllZerosV)),
5544               (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5545                _.RC:$src1, addr:$src2)>;
5546   }
5547 }
5548
5549 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5550                                          list<Predicate> p> :
5551            avx512_var_shift_int_lowering<InstrStr, _, p> {
5552   let Predicates = p in {
5553     def : Pat<(_.VT (X86vsrav _.RC:$src1,
5554                      (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5555               (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5556                _.RC:$src1, addr:$src2)>;
5557     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5558                      (X86vsrav _.RC:$src1,
5559                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5560                      _.RC:$src0)),
5561               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5562                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5563     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5564                      (X86vsrav _.RC:$src1,
5565                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5566                      _.ImmAllZerosV)),
5567               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5568                _.RC:$src1, addr:$src2)>;
5569   }
5570 }
5571
5572 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5573 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5574 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5575 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5576 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5577 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5578 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5579 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5580 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5581
5582
5583 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5584 let Predicates = [HasAVX512, NoVLX] in {
5585   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5586             (EXTRACT_SUBREG (v8i64
5587               (VPROLVQZrr
5588                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5589                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5590                        sub_xmm)>;
5591   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5592             (EXTRACT_SUBREG (v8i64
5593               (VPROLVQZrr
5594                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5595                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5596                        sub_ymm)>;
5597
5598   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5599             (EXTRACT_SUBREG (v16i32
5600               (VPROLVDZrr
5601                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5602                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5603                         sub_xmm)>;
5604   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5605             (EXTRACT_SUBREG (v16i32
5606               (VPROLVDZrr
5607                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5608                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5609                         sub_ymm)>;
5610
5611   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5612             (EXTRACT_SUBREG (v8i64
5613               (VPROLQZri
5614                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5615                         imm:$src2)), sub_xmm)>;
5616   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5617             (EXTRACT_SUBREG (v8i64
5618               (VPROLQZri
5619                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5620                        imm:$src2)), sub_ymm)>;
5621
5622   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5623             (EXTRACT_SUBREG (v16i32
5624               (VPROLDZri
5625                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5626                         imm:$src2)), sub_xmm)>;
5627   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5628             (EXTRACT_SUBREG (v16i32
5629               (VPROLDZri
5630                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5631                         imm:$src2)), sub_ymm)>;
5632 }
5633
5634 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5635 let Predicates = [HasAVX512, NoVLX] in {
5636   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5637             (EXTRACT_SUBREG (v8i64
5638               (VPRORVQZrr
5639                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5640                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5641                        sub_xmm)>;
5642   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5643             (EXTRACT_SUBREG (v8i64
5644               (VPRORVQZrr
5645                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5646                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5647                        sub_ymm)>;
5648
5649   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5650             (EXTRACT_SUBREG (v16i32
5651               (VPRORVDZrr
5652                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5653                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5654                         sub_xmm)>;
5655   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5656             (EXTRACT_SUBREG (v16i32
5657               (VPRORVDZrr
5658                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5659                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5660                         sub_ymm)>;
5661
5662   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5663             (EXTRACT_SUBREG (v8i64
5664               (VPRORQZri
5665                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5666                         imm:$src2)), sub_xmm)>;
5667   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5668             (EXTRACT_SUBREG (v8i64
5669               (VPRORQZri
5670                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5671                        imm:$src2)), sub_ymm)>;
5672
5673   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5674             (EXTRACT_SUBREG (v16i32
5675               (VPRORDZri
5676                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5677                         imm:$src2)), sub_xmm)>;
5678   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5679             (EXTRACT_SUBREG (v16i32
5680               (VPRORDZri
5681                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5682                         imm:$src2)), sub_ymm)>;
5683 }
5684
5685 //===-------------------------------------------------------------------===//
5686 // 1-src variable permutation VPERMW/D/Q
5687 //===-------------------------------------------------------------------===//
5688 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5689                                  OpndItins itins, AVX512VLVectorVTInfo _> {
5690   let Predicates  = [HasAVX512] in
5691   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5692            avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5693
5694   let Predicates = [HasAVX512, HasVLX] in
5695   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5696               avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5697 }
5698
5699 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5700                                  string OpcodeStr, SDNode OpNode,
5701                                  OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
5702   let Predicates = [HasAVX512] in
5703   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5704                               itins, VTInfo.info512>,
5705              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5706                                itins, VTInfo.info512>, EVEX_V512;
5707   let Predicates = [HasAVX512, HasVLX] in
5708   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5709                               itins, VTInfo.info256>,
5710              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5711                                itins, VTInfo.info256>, EVEX_V256;
5712 }
5713
5714 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5715                               Predicate prd, SDNode OpNode,
5716                               OpndItins itins, AVX512VLVectorVTInfo _> {
5717   let Predicates = [prd] in
5718   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5719               EVEX_V512 ;
5720   let Predicates = [HasVLX, prd] in {
5721   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5722               EVEX_V256 ;
5723   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5724               EVEX_V128 ;
5725   }
5726 }
5727
5728 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
5729                                AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
5730 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
5731                                AVX2_PERMV_I, avx512vl_i8_info>;
5732
5733 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
5734                                     AVX2_PERMV_I, avx512vl_i32_info>;
5735 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
5736                                     AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
5737 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
5738                                      AVX2_PERMV_F, avx512vl_f32_info>;
5739 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
5740                                      AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
5741
5742 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
5743                              X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
5744                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5745 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
5746                              X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
5747                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5748 //===----------------------------------------------------------------------===//
5749 // AVX-512 - VPERMIL
5750 //===----------------------------------------------------------------------===//
5751
5752 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
5753                              OpndItins itins, X86VectorVTInfo _,
5754                              X86VectorVTInfo Ctrl> {
5755   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
5756                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
5757                   "$src2, $src1", "$src1, $src2",
5758                   (_.VT (OpNode _.RC:$src1,
5759                                (Ctrl.VT Ctrl.RC:$src2))), itins.rr>,
5760                   T8PD, EVEX_4V, Sched<[itins.Sched]>;
5761   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5762                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
5763                   "$src2, $src1", "$src1, $src2",
5764                   (_.VT (OpNode
5765                            _.RC:$src1,
5766                            (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))),
5767                   itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5768                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
5769   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5770                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5771                    "${src2}"##_.BroadcastStr##", $src1",
5772                    "$src1, ${src2}"##_.BroadcastStr,
5773                    (_.VT (OpNode
5774                             _.RC:$src1,
5775                             (Ctrl.VT (X86VBroadcast
5776                                        (Ctrl.ScalarLdFrag addr:$src2))))),
5777                    itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
5778                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
5779 }
5780
5781 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
5782                                     OpndItins itins, AVX512VLVectorVTInfo _,
5783                                     AVX512VLVectorVTInfo Ctrl> {
5784   let Predicates = [HasAVX512] in {
5785     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5786                                   _.info512, Ctrl.info512>, EVEX_V512;
5787   }
5788   let Predicates = [HasAVX512, HasVLX] in {
5789     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5790                                   _.info128, Ctrl.info128>, EVEX_V128;
5791     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5792                                   _.info256, Ctrl.info256>, EVEX_V256;
5793   }
5794 }
5795
5796 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
5797                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
5798   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
5799   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
5800                                     X86VPermilpi, AVX_VPERMILV, _>,
5801                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
5802 }
5803
5804 let ExeDomain = SSEPackedSingle in
5805 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
5806                                avx512vl_i32_info>;
5807 let ExeDomain = SSEPackedDouble in
5808 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
5809                                avx512vl_i64_info>, VEX_W;
5810
5811 //===----------------------------------------------------------------------===//
5812 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
5813 //===----------------------------------------------------------------------===//
5814
5815 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
5816                              X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
5817                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
5818 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
5819                                   X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
5820 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
5821                                   X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
5822
5823 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5824                                OpndItins itins> {
5825   let Predicates = [HasBWI] in
5826   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
5827
5828   let Predicates = [HasVLX, HasBWI] in {
5829   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
5830   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
5831   }
5832 }
5833
5834 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
5835
5836 //===----------------------------------------------------------------------===//
5837 // Move Low to High and High to Low packed FP Instructions
5838 //===----------------------------------------------------------------------===//
5839 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
5840           (ins VR128X:$src1, VR128X:$src2),
5841           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5842           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
5843            IIC_SSE_MOV_LH>, EVEX_4V;
5844 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
5845           (ins VR128X:$src1, VR128X:$src2),
5846           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5847           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
5848           IIC_SSE_MOV_LH>, EVEX_4V;
5849
5850 //===----------------------------------------------------------------------===//
5851 // VMOVHPS/PD VMOVLPS Instructions
5852 // All patterns was taken from SSS implementation.
5853 //===----------------------------------------------------------------------===//
5854 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
5855                                   X86VectorVTInfo _> {
5856   let ExeDomain = _.ExeDomain in
5857   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
5858                   (ins _.RC:$src1, f64mem:$src2),
5859                   !strconcat(OpcodeStr,
5860                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5861                   [(set _.RC:$dst,
5862                      (OpNode _.RC:$src1,
5863                        (_.VT (bitconvert
5864                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
5865                   IIC_SSE_MOV_LH>, EVEX_4V;
5866 }
5867
5868 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
5869                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5870 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
5871                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5872 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
5873                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5874 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
5875                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5876
5877 let Predicates = [HasAVX512] in {
5878   // VMOVHPS patterns
5879   def : Pat<(X86Movlhps VR128X:$src1,
5880                (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
5881           (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5882   def : Pat<(X86Movlhps VR128X:$src1,
5883                (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
5884           (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5885   // VMOVHPD patterns
5886   def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
5887                     (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
5888            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
5889   // VMOVLPS patterns
5890   def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
5891           (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
5892   // VMOVLPD patterns
5893   def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
5894           (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
5895   def : Pat<(v2f64 (X86Movsd VR128X:$src1,
5896                            (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
5897           (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
5898 }
5899
5900 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
5901                        (ins f64mem:$dst, VR128X:$src),
5902                        "vmovhps\t{$src, $dst|$dst, $src}",
5903                        [(store (f64 (extractelt
5904                                      (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
5905                                                 (bc_v2f64 (v4f32 VR128X:$src))),
5906                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5907                        EVEX, EVEX_CD8<32, CD8VT2>;
5908 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
5909                        (ins f64mem:$dst, VR128X:$src),
5910                        "vmovhpd\t{$src, $dst|$dst, $src}",
5911                        [(store (f64 (extractelt
5912                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
5913                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5914                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
5915 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
5916                        (ins f64mem:$dst, VR128X:$src),
5917                        "vmovlps\t{$src, $dst|$dst, $src}",
5918                        [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
5919                                      (iPTR 0))), addr:$dst)],
5920                                      IIC_SSE_MOV_LH>,
5921                        EVEX, EVEX_CD8<32, CD8VT2>;
5922 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
5923                        (ins f64mem:$dst, VR128X:$src),
5924                        "vmovlpd\t{$src, $dst|$dst, $src}",
5925                        [(store (f64 (extractelt (v2f64 VR128X:$src),
5926                                      (iPTR 0))), addr:$dst)],
5927                                      IIC_SSE_MOV_LH>,
5928                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
5929
5930 let Predicates = [HasAVX512] in {
5931   // VMOVHPD patterns
5932   def : Pat<(store (f64 (extractelt
5933                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
5934                            (iPTR 0))), addr:$dst),
5935            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
5936   // VMOVLPS patterns
5937   def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
5938                    addr:$src1),
5939             (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
5940   // VMOVLPD patterns
5941   def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
5942                    addr:$src1),
5943             (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
5944 }
5945 //===----------------------------------------------------------------------===//
5946 // FMA - Fused Multiply Operations
5947 //
5948
5949 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5950                                X86VectorVTInfo _, string Suff> {
5951   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5952   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
5953           (ins _.RC:$src2, _.RC:$src3),
5954           OpcodeStr, "$src3, $src2", "$src2, $src3",
5955           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>,
5956           AVX512FMA3Base, Sched<[WriteFMA]>;
5957
5958   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
5959           (ins _.RC:$src2, _.MemOp:$src3),
5960           OpcodeStr, "$src3, $src2", "$src2, $src3",
5961           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
5962           NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
5963
5964   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
5965             (ins _.RC:$src2, _.ScalarMemOp:$src3),
5966             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
5967             !strconcat("$src2, ${src3}", _.BroadcastStr ),
5968             (OpNode _.RC:$src2,
5969              _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))),
5970              NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
5971              Sched<[WriteFMALd, ReadAfterLd]>;
5972   }
5973 }
5974
5975 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5976                                  X86VectorVTInfo _, string Suff> {
5977   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
5978   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
5979           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
5980           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
5981           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))),
5982           NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
5983 }
5984
5985 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
5986                                    SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
5987                                    string Suff> {
5988   let Predicates = [HasAVX512] in {
5989     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
5990                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
5991                       Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
5992   }
5993   let Predicates = [HasVLX, HasAVX512] in {
5994     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
5995                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
5996     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
5997                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
5998   }
5999 }
6000
6001 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6002                               SDNode OpNodeRnd > {
6003     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6004                                       avx512vl_f32_info, "PS">;
6005     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6006                                       avx512vl_f64_info, "PD">, VEX_W;
6007 }
6008
6009 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6010 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6011 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6012 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6013 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6014 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6015
6016
6017 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6018                                X86VectorVTInfo _, string Suff> {
6019   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6020   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6021           (ins _.RC:$src2, _.RC:$src3),
6022           OpcodeStr, "$src3, $src2", "$src2, $src3",
6023           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1,
6024           vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
6025
6026   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6027           (ins _.RC:$src2, _.MemOp:$src3),
6028           OpcodeStr, "$src3, $src2", "$src2, $src3",
6029           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6030           NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6031
6032   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6033          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6034          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6035          "$src2, ${src3}"##_.BroadcastStr,
6036          (_.VT (OpNode _.RC:$src2,
6037                       (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6038                       _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
6039          Sched<[WriteFMALd, ReadAfterLd]>;
6040   }
6041 }
6042
6043 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6044                                  X86VectorVTInfo _, string Suff> {
6045   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6046   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6047           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6048           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6049           (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6050           NoItinerary, 1, 1, vselect, 1>,
6051           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6052 }
6053
6054 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6055                                    SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6056                                    string Suff> {
6057   let Predicates = [HasAVX512] in {
6058     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6059                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6060                       Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6061   }
6062   let Predicates = [HasVLX, HasAVX512] in {
6063     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6064                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6065     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6066                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6067   }
6068 }
6069
6070 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6071                               SDNode OpNodeRnd > {
6072     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6073                                       avx512vl_f32_info, "PS">;
6074     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6075                                       avx512vl_f64_info, "PD">, VEX_W;
6076 }
6077
6078 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6079 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6080 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6081 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6082 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6083 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6084
6085 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6086                                X86VectorVTInfo _, string Suff> {
6087   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6088   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6089           (ins _.RC:$src2, _.RC:$src3),
6090           OpcodeStr, "$src3, $src2", "$src2, $src3",
6091           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary,
6092           1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
6093
6094   // Pattern is 312 order so that the load is in a different place from the
6095   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6096   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6097           (ins _.RC:$src2, _.MemOp:$src3),
6098           OpcodeStr, "$src3, $src2", "$src2, $src3",
6099           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6100           NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6101
6102   // Pattern is 312 order so that the load is in a different place from the
6103   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6104   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6105          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6106          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6107          "$src2, ${src3}"##_.BroadcastStr,
6108          (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6109                        _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>,
6110          AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>;
6111   }
6112 }
6113
6114 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6115                                  X86VectorVTInfo _, string Suff> {
6116   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6117   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6118           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6119           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6120           (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6121           NoItinerary, 1, 1, vselect, 1>,
6122           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6123 }
6124
6125 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6126                                    SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6127                                    string Suff> {
6128   let Predicates = [HasAVX512] in {
6129     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6130                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6131                       Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6132   }
6133   let Predicates = [HasVLX, HasAVX512] in {
6134     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6135                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6136     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6137                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6138   }
6139 }
6140
6141 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6142                               SDNode OpNodeRnd > {
6143     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6144                                       avx512vl_f32_info, "PS">;
6145     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6146                                       avx512vl_f64_info, "PD">, VEX_W;
6147 }
6148
6149 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6150 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6151 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6152 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6153 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6154 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6155
6156 // Scalar FMA
6157 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6158                                dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
6159                                dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
6160 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6161   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6162           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6163           "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>,
6164           AVX512FMA3Base, Sched<[WriteFMA]>;
6165
6166   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6167           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6168           "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>,
6169           AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6170
6171   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6172          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6173          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb,
6174          NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC,
6175          Sched<[WriteFMA]>;
6176
6177   let isCodeGenOnly = 1, isCommutable = 1 in {
6178     def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6179                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6180                      !strconcat(OpcodeStr,
6181                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6182                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>;
6183     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6184                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6185                     !strconcat(OpcodeStr,
6186                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6187                     [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>;
6188   }// isCodeGenOnly = 1
6189 }// Constraints = "$src1 = $dst"
6190 }
6191
6192 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6193                             string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6194                             SDNode OpNodeRnds1, SDNode OpNodes3,
6195                             SDNode OpNodeRnds3, X86VectorVTInfo _,
6196                             string SUFF> {
6197   let ExeDomain = _.ExeDomain in {
6198   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6199                 // Operands for intrinsic are in 123 order to preserve passthu
6200                 // semantics.
6201                 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6202                 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6203                          _.ScalarIntMemCPat:$src3)),
6204                 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
6205                          (i32 imm:$rc))),
6206                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6207                          _.FRC:$src3))),
6208                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6209                          (_.ScalarLdFrag addr:$src3)))), 0>;
6210
6211   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6212                 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6213                 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6214                               _.RC:$src1)),
6215                 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
6216                                   (i32 imm:$rc))),
6217                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6218                                           _.FRC:$src1))),
6219                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6220                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
6221
6222   // One pattern is 312 order so that the load is in a different place from the
6223   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6224   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6225                 (null_frag),
6226                 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6227                               _.RC:$src2)),
6228                 (null_frag),
6229                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6230                          _.FRC:$src2))),
6231                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6232                                  _.FRC:$src1, _.FRC:$src2))), 1>;
6233   }
6234 }
6235
6236 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6237                         string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6238                         SDNode OpNodeRnds1, SDNode OpNodes3,
6239                         SDNode OpNodeRnds3> {
6240   let Predicates = [HasAVX512] in {
6241     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6242                                  OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6243                                  f32x_info, "SS">,
6244                                  EVEX_CD8<32, CD8VT1>, VEX_LIG;
6245     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6246                                  OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6247                                  f64x_info, "SD">,
6248                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6249   }
6250 }
6251
6252 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6253                             X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6254 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6255                             X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6256 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6257                             X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6258 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6259                             X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
6260
6261 //===----------------------------------------------------------------------===//
6262 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6263 //===----------------------------------------------------------------------===//
6264 let Constraints = "$src1 = $dst" in {
6265 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6266                              OpndItins itins, X86VectorVTInfo _> {
6267   // NOTE: The SDNode have the multiply operands first with the add last.
6268   // This enables commuted load patterns to be autogenerated by tablegen.
6269   let ExeDomain = _.ExeDomain in {
6270   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6271           (ins _.RC:$src2, _.RC:$src3),
6272           OpcodeStr, "$src3, $src2", "$src2, $src3",
6273           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>,
6274          AVX512FMA3Base, Sched<[itins.Sched]>;
6275
6276   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6277           (ins _.RC:$src2, _.MemOp:$src3),
6278           OpcodeStr, "$src3, $src2", "$src2, $src3",
6279           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6280           itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6281
6282   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6283             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6284             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6285             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6286             (OpNode _.RC:$src2,
6287                     (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6288                     _.RC:$src1), itins.rm>,
6289             AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6290   }
6291 }
6292 } // Constraints = "$src1 = $dst"
6293
6294 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6295                                  OpndItins itins, AVX512VLVectorVTInfo _> {
6296   let Predicates = [HasIFMA] in {
6297     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>,
6298                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6299   }
6300   let Predicates = [HasVLX, HasIFMA] in {
6301     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>,
6302                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6303     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>,
6304                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6305   }
6306 }
6307
6308 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6309                                   SSE_PMADD, avx512vl_i64_info>, VEX_W;
6310 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6311                                   SSE_PMADD, avx512vl_i64_info>, VEX_W;
6312
6313 //===----------------------------------------------------------------------===//
6314 // AVX-512  Scalar convert from sign integer to float/double
6315 //===----------------------------------------------------------------------===//
6316
6317 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins,
6318                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
6319                     X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6320   let hasSideEffects = 0 in {
6321     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6322               (ins DstVT.FRC:$src1, SrcRC:$src),
6323               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6324               itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
6325     let mayLoad = 1 in
6326       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6327               (ins DstVT.FRC:$src1, x86memop:$src),
6328               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6329               itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6330   } // hasSideEffects = 0
6331   let isCodeGenOnly = 1 in {
6332     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6333                   (ins DstVT.RC:$src1, SrcRC:$src2),
6334                   !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6335                   [(set DstVT.RC:$dst,
6336                         (OpNode (DstVT.VT DstVT.RC:$src1),
6337                                  SrcRC:$src2,
6338                                  (i32 FROUND_CURRENT)))], itins.rr>,
6339                  EVEX_4V, Sched<[itins.Sched]>;
6340
6341     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6342                   (ins DstVT.RC:$src1, x86memop:$src2),
6343                   !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6344                   [(set DstVT.RC:$dst,
6345                         (OpNode (DstVT.VT DstVT.RC:$src1),
6346                                  (ld_frag addr:$src2),
6347                                  (i32 FROUND_CURRENT)))], itins.rm>,
6348                   EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6349   }//isCodeGenOnly = 1
6350 }
6351
6352 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins,
6353                     RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> {
6354   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6355               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
6356               !strconcat(asm,
6357                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
6358               [(set DstVT.RC:$dst,
6359                     (OpNode (DstVT.VT DstVT.RC:$src1),
6360                              SrcRC:$src2,
6361                              (i32 imm:$rc)))], itins.rr>,
6362               EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
6363 }
6364
6365 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins,
6366                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
6367                     X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6368   defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>,
6369               avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop,
6370                             ld_frag, asm>, VEX_LIG;
6371 }
6372
6373 let Predicates = [HasAVX512] in {
6374 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32,
6375                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6376                                  XS, EVEX_CD8<32, CD8VT1>;
6377 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64,
6378                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6379                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6380 defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32,
6381                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6382                                  XD, EVEX_CD8<32, CD8VT1>;
6383 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64,
6384                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6385                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6386
6387 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6388               (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6389 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6390               (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6391
6392 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6393           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6394 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
6395           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6396 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6397           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6398 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
6399           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6400
6401 def : Pat<(f32 (sint_to_fp GR32:$src)),
6402           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6403 def : Pat<(f32 (sint_to_fp GR64:$src)),
6404           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6405 def : Pat<(f64 (sint_to_fp GR32:$src)),
6406           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6407 def : Pat<(f64 (sint_to_fp GR64:$src)),
6408           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6409
6410 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32,
6411                                   v4f32x_info, i32mem, loadi32,
6412                                   "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
6413 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64,
6414                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6415                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6416 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info,
6417                                   i32mem, loadi32, "cvtusi2sd{l}">,
6418                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
6419 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64,
6420                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6421                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6422
6423 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6424               (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6425 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6426               (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6427
6428 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6429           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6430 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6431           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6432 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6433           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6434 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6435           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6436
6437 def : Pat<(f32 (uint_to_fp GR32:$src)),
6438           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6439 def : Pat<(f32 (uint_to_fp GR64:$src)),
6440           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6441 def : Pat<(f64 (uint_to_fp GR32:$src)),
6442           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6443 def : Pat<(f64 (uint_to_fp GR64:$src)),
6444           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6445 }
6446
6447 //===----------------------------------------------------------------------===//
6448 // AVX-512  Scalar convert from float/double to integer
6449 //===----------------------------------------------------------------------===//
6450
6451 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
6452                                   X86VectorVTInfo DstVT, SDNode OpNode,
6453                                   OpndItins itins, string asm> {
6454   let Predicates = [HasAVX512] in {
6455     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
6456                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6457                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))],
6458                 itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>;
6459     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
6460                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6461                  [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))],
6462                  itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC,
6463                  Sched<[itins.Sched]>;
6464     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
6465                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6466                 [(set DstVT.RC:$dst, (OpNode
6467                       (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
6468                       (i32 FROUND_CURRENT)))], itins.rm>,
6469                 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6470   } // Predicates = [HasAVX512]
6471 }
6472
6473 // Convert float/double to signed/unsigned int 32/64
6474 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
6475                                    X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si">,
6476                                    XS, EVEX_CD8<32, CD8VT1>;
6477 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
6478                                    X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si">,
6479                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
6480 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info,
6481                                    X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi">,
6482                                    XS, EVEX_CD8<32, CD8VT1>;
6483 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info,
6484                                    X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi">,
6485                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
6486 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
6487                                    X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">,
6488                                    XD, EVEX_CD8<64, CD8VT1>;
6489 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
6490                                    X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">,
6491                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6492 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info,
6493                                    X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">,
6494                                    XD, EVEX_CD8<64, CD8VT1>;
6495 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
6496                                    X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">,
6497                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6498
6499 // The SSE version of these instructions are disabled for AVX512.
6500 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6501 let Predicates = [HasAVX512] in {
6502   def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
6503             (VCVTSS2SIZrr_Int VR128X:$src)>;
6504   def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
6505             (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
6506   def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
6507             (VCVTSS2SI64Zrr_Int VR128X:$src)>;
6508   def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
6509             (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
6510   def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
6511             (VCVTSD2SIZrr_Int VR128X:$src)>;
6512   def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
6513             (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
6514   def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
6515             (VCVTSD2SI64Zrr_Int VR128X:$src)>;
6516   def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
6517             (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
6518 } // HasAVX512
6519
6520 let Predicates = [HasAVX512] in {
6521   def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6522             (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6523   def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6524             (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6525   def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6526             (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6527   def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6528             (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6529   def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6530             (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6531   def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6532             (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6533   def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6534             (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6535   def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6536             (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6537   def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6538             (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6539   def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6540             (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6541 } // Predicates = [HasAVX512]
6542
6543 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6544 // which produce unnecessary vmovs{s,d} instructions
6545 let Predicates = [HasAVX512] in {
6546 def : Pat<(v4f32 (X86Movss
6547                    (v4f32 VR128X:$dst),
6548                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6549           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6550
6551 def : Pat<(v4f32 (X86Movss
6552                    (v4f32 VR128X:$dst),
6553                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6554           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6555
6556 def : Pat<(v2f64 (X86Movsd
6557                    (v2f64 VR128X:$dst),
6558                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6559           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6560
6561 def : Pat<(v2f64 (X86Movsd
6562                    (v2f64 VR128X:$dst),
6563                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6564           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6565 } // Predicates = [HasAVX512]
6566
6567 // Convert float/double to signed/unsigned int 32/64 with truncation
6568 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6569                             X86VectorVTInfo _DstRC, SDNode OpNode,
6570                             SDNode OpNodeRnd, OpndItins itins, string aliasStr>{
6571 let Predicates = [HasAVX512] in {
6572   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6573               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6574               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>,
6575               EVEX, Sched<[itins.Sched]>;
6576   let hasSideEffects = 0 in
6577   def rrb : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6578                 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6579                 [], itins.rr>, EVEX, EVEX_B, Sched<[itins.Sched]>;
6580   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
6581               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6582               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))],
6583               itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6584
6585   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6586           (!cast<Instruction>(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6587   def : InstAlias<asm # aliasStr # "\t\t{{sae}, $src, $dst|$dst, $src, {sae}}",
6588           (!cast<Instruction>(NAME # "rrb") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6589   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6590           (!cast<Instruction>(NAME # "rm") _DstRC.RC:$dst,
6591                                           _SrcRC.ScalarMemOp:$src), 0>;
6592
6593   let isCodeGenOnly = 1 in {
6594     def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6595               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6596              [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6597                                    (i32 FROUND_CURRENT)))], itins.rr>,
6598              EVEX, VEX_LIG, Sched<[itins.Sched]>;
6599     def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6600               !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6601               [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6602                                     (i32 FROUND_NO_EXC)))], itins.rr>,
6603                                     EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>;
6604     let mayLoad = 1, hasSideEffects = 0 in
6605       def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6606                   (ins _SrcRC.IntScalarMemOp:$src),
6607                   !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6608                   [], itins.rm>, EVEX, VEX_LIG,
6609                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
6610   } // isCodeGenOnly = 1
6611 } //HasAVX512
6612 }
6613
6614
6615 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
6616                         fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">,
6617                         XS, EVEX_CD8<32, CD8VT1>;
6618 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
6619                         fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">,
6620                         VEX_W, XS, EVEX_CD8<32, CD8VT1>;
6621 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
6622                         fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">,
6623                         XD, EVEX_CD8<64, CD8VT1>;
6624 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
6625                         fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">,
6626                         VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6627
6628 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
6629                         fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">,
6630                         XS, EVEX_CD8<32, CD8VT1>;
6631 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
6632                         fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">,
6633                         XS,VEX_W, EVEX_CD8<32, CD8VT1>;
6634 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
6635                         fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">,
6636                         XD, EVEX_CD8<64, CD8VT1>;
6637 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
6638                         fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">,
6639                         XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6640 let Predicates = [HasAVX512] in {
6641   def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
6642             (VCVTTSS2SIZrr_Int VR128X:$src)>;
6643   def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6644             (VCVTTSS2SIZrm_Int ssmem:$src)>;
6645   def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
6646             (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
6647   def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6648             (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
6649   def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
6650             (VCVTTSD2SIZrr_Int VR128X:$src)>;
6651   def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6652             (VCVTTSD2SIZrm_Int sdmem:$src)>;
6653   def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
6654             (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
6655   def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6656             (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
6657 } // HasAVX512
6658
6659 //===----------------------------------------------------------------------===//
6660 // AVX-512  Convert form float to double and back
6661 //===----------------------------------------------------------------------===//
6662
6663 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6664                          X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> {
6665   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6666                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6667                          "$src2, $src1", "$src1, $src2",
6668                          (_.VT (OpNode (_.VT _.RC:$src1),
6669                                        (_Src.VT _Src.RC:$src2),
6670                                        (i32 FROUND_CURRENT))), itins.rr>,
6671                          EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
6672   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6673                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
6674                          "$src2, $src1", "$src1, $src2",
6675                          (_.VT (OpNode (_.VT _.RC:$src1),
6676                                   (_Src.VT _Src.ScalarIntMemCPat:$src2),
6677                                   (i32 FROUND_CURRENT))), itins.rm>,
6678                          EVEX_4V, VEX_LIG,
6679                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
6680
6681   let isCodeGenOnly = 1, hasSideEffects = 0 in {
6682     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6683                (ins _.FRC:$src1, _Src.FRC:$src2),
6684                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6685                itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
6686     let mayLoad = 1 in
6687     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6688                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
6689                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6690                itins.rm>, EVEX_4V, VEX_LIG,
6691                Sched<[itins.Sched.Folded, ReadAfterLd]>;
6692   }
6693 }
6694
6695 // Scalar Coversion with SAE - suppress all exceptions
6696 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6697                          X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
6698   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6699                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6700                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
6701                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6702                                          (_Src.VT _Src.RC:$src2),
6703                                          (i32 FROUND_NO_EXC))), itins.rr>,
6704                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>;
6705 }
6706
6707 // Scalar Conversion with rounding control (RC)
6708 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6709                          X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
6710   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6711                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
6712                         "$rc, $src2, $src1", "$src1, $src2, $rc",
6713                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6714                                          (_Src.VT _Src.RC:$src2), (i32 imm:$rc))),
6715                                          itins.rr>,
6716                         EVEX_4V, VEX_LIG, Sched<[itins.Sched]>,
6717                         EVEX_B, EVEX_RC;
6718 }
6719 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
6720                                   SDNode OpNodeRnd, OpndItins itins,
6721                                   X86VectorVTInfo _src, X86VectorVTInfo _dst> {
6722   let Predicates = [HasAVX512] in {
6723     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6724              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
6725                                OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
6726   }
6727 }
6728
6729 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
6730                                     SDNode OpNodeRnd, OpndItins itins,
6731                                     X86VectorVTInfo _src, X86VectorVTInfo _dst> {
6732   let Predicates = [HasAVX512] in {
6733     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6734              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6735              EVEX_CD8<32, CD8VT1>, XS;
6736   }
6737 }
6738 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
6739                                          X86froundRnd, SSE_CVT_SD2SS, f64x_info,
6740                                          f32x_info>, NotMemoryFoldable;
6741 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
6742                                           X86fpextRnd, SSE_CVT_SS2SD, f32x_info,
6743                                           f64x_info>, NotMemoryFoldable;
6744
6745 def : Pat<(f64 (fpextend FR32X:$src)),
6746           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
6747           Requires<[HasAVX512]>;
6748 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
6749           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6750           Requires<[HasAVX512]>;
6751
6752 def : Pat<(f64 (extloadf32 addr:$src)),
6753           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6754       Requires<[HasAVX512, OptForSize]>;
6755
6756 def : Pat<(f64 (extloadf32 addr:$src)),
6757           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
6758           Requires<[HasAVX512, OptForSpeed]>;
6759
6760 def : Pat<(f32 (fpround FR64X:$src)),
6761           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
6762            Requires<[HasAVX512]>;
6763
6764 def : Pat<(v4f32 (X86Movss
6765                    (v4f32 VR128X:$dst),
6766                    (v4f32 (scalar_to_vector
6767                      (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
6768           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
6769           Requires<[HasAVX512]>;
6770
6771 def : Pat<(v2f64 (X86Movsd
6772                    (v2f64 VR128X:$dst),
6773                    (v2f64 (scalar_to_vector
6774                      (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
6775           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
6776           Requires<[HasAVX512]>;
6777
6778 //===----------------------------------------------------------------------===//
6779 // AVX-512  Vector convert from signed/unsigned integer to float/double
6780 //          and from float/double to signed/unsigned integer
6781 //===----------------------------------------------------------------------===//
6782
6783 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6784                          X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins,
6785                          string Broadcast = _.BroadcastStr,
6786                          string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
6787
6788   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6789                          (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
6790                          (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>,
6791                          EVEX, Sched<[itins.Sched]>;
6792
6793   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6794                          (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
6795                          (_.VT (OpNode (_Src.VT
6796                              (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>,
6797                          EVEX, Sched<[itins.Sched.Folded]>;
6798
6799   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6800                          (ins _Src.ScalarMemOp:$src), OpcodeStr,
6801                          "${src}"##Broadcast, "${src}"##Broadcast,
6802                          (_.VT (OpNode (_Src.VT
6803                                   (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
6804                             )), itins.rm>, EVEX, EVEX_B,
6805                          Sched<[itins.Sched.Folded]>;
6806 }
6807 // Coversion with SAE - suppress all exceptions
6808 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6809                               X86VectorVTInfo _Src, SDNode OpNodeRnd,
6810                               OpndItins itins> {
6811   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6812                         (ins _Src.RC:$src), OpcodeStr,
6813                         "{sae}, $src", "$src, {sae}",
6814                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
6815                                (i32 FROUND_NO_EXC))), itins.rr>,
6816                         EVEX, EVEX_B, Sched<[itins.Sched]>;
6817 }
6818
6819 // Conversion with rounding control (RC)
6820 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6821                          X86VectorVTInfo _Src, SDNode OpNodeRnd,
6822                          OpndItins itins> {
6823   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6824                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
6825                         "$rc, $src", "$src, $rc",
6826                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))),
6827                         itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
6828 }
6829
6830 // Extend Float to Double
6831 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
6832                            OpndItins itins> {
6833   let Predicates = [HasAVX512] in {
6834     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
6835                             fpextend, itins>,
6836              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
6837                                 X86vfpextRnd, itins>, EVEX_V512;
6838   }
6839   let Predicates = [HasVLX] in {
6840     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
6841                                X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128;
6842     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
6843                                itins>, EVEX_V256;
6844   }
6845 }
6846
6847 // Truncate Double to Float
6848 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> {
6849   let Predicates = [HasAVX512] in {
6850     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>,
6851              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
6852                                X86vfproundRnd, itins>, EVEX_V512;
6853   }
6854   let Predicates = [HasVLX] in {
6855     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
6856                                X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128;
6857     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
6858                                itins, "{1to4}", "{y}">, EVEX_V256;
6859
6860     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6861                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
6862     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6863                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
6864     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6865                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
6866     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6867                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
6868   }
6869 }
6870
6871 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>,
6872                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
6873 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>,
6874                                   PS, EVEX_CD8<32, CD8VH>;
6875
6876 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
6877             (VCVTPS2PDZrm addr:$src)>;
6878
6879 let Predicates = [HasVLX] in {
6880   let AddedComplexity = 15 in {
6881     def : Pat<(X86vzmovl (v2f64 (bitconvert
6882                                  (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
6883               (VCVTPD2PSZ128rr VR128X:$src)>;
6884     def : Pat<(X86vzmovl (v2f64 (bitconvert
6885                                  (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
6886               (VCVTPD2PSZ128rm addr:$src)>;
6887   }
6888   def : Pat<(v2f64 (extloadv2f32 addr:$src)),
6889               (VCVTPS2PDZ128rm addr:$src)>;
6890   def : Pat<(v4f64 (extloadv4f32 addr:$src)),
6891               (VCVTPS2PDZ256rm addr:$src)>;
6892 }
6893
6894 // Convert Signed/Unsigned Doubleword to Double
6895 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
6896                            SDNode OpNode128, OpndItins itins> {
6897   // No rounding in this op
6898   let Predicates = [HasAVX512] in
6899     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
6900                             itins>, EVEX_V512;
6901
6902   let Predicates = [HasVLX] in {
6903     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
6904                                OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128;
6905     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
6906                                itins>, EVEX_V256;
6907   }
6908 }
6909
6910 // Convert Signed/Unsigned Doubleword to Float
6911 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
6912                            SDNode OpNodeRnd, OpndItins itins> {
6913   let Predicates = [HasAVX512] in
6914     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
6915                             itins>,
6916              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
6917                                OpNodeRnd, itins>, EVEX_V512;
6918
6919   let Predicates = [HasVLX] in {
6920     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
6921                                itins>, EVEX_V128;
6922     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
6923                                itins>, EVEX_V256;
6924   }
6925 }
6926
6927 // Convert Float to Signed/Unsigned Doubleword with truncation
6928 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
6929                             SDNode OpNodeRnd, OpndItins itins> {
6930   let Predicates = [HasAVX512] in {
6931     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
6932                             itins>,
6933              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
6934                                 OpNodeRnd, itins>, EVEX_V512;
6935   }
6936   let Predicates = [HasVLX] in {
6937     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
6938                                itins>, EVEX_V128;
6939     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
6940                                itins>, EVEX_V256;
6941   }
6942 }
6943
6944 // Convert Float to Signed/Unsigned Doubleword
6945 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
6946                            SDNode OpNodeRnd, OpndItins itins> {
6947   let Predicates = [HasAVX512] in {
6948     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
6949                             itins>,
6950              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
6951                                 OpNodeRnd, itins>, EVEX_V512;
6952   }
6953   let Predicates = [HasVLX] in {
6954     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
6955                                itins>, EVEX_V128;
6956     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
6957                                itins>, EVEX_V256;
6958   }
6959 }
6960
6961 // Convert Double to Signed/Unsigned Doubleword with truncation
6962 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
6963                             SDNode OpNode128, SDNode OpNodeRnd,
6964                             OpndItins itins> {
6965   let Predicates = [HasAVX512] in {
6966     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
6967                             itins>,
6968              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
6969                                 OpNodeRnd, itins>, EVEX_V512;
6970   }
6971   let Predicates = [HasVLX] in {
6972     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
6973     // memory forms of these instructions in Asm Parser. They have the same
6974     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
6975     // due to the same reason.
6976     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
6977                                OpNode128, itins, "{1to2}", "{x}">, EVEX_V128;
6978     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
6979                                itins, "{1to4}", "{y}">, EVEX_V256;
6980
6981     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6982                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
6983     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6984                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
6985     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6986                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
6987     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6988                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
6989   }
6990 }
6991
6992 // Convert Double to Signed/Unsigned Doubleword
6993 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
6994                            SDNode OpNodeRnd, OpndItins itins> {
6995   let Predicates = [HasAVX512] in {
6996     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
6997                             itins>,
6998              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
6999                                OpNodeRnd, itins>, EVEX_V512;
7000   }
7001   let Predicates = [HasVLX] in {
7002     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7003     // memory forms of these instructions in Asm Parcer. They have the same
7004     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7005     // due to the same reason.
7006     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7007                                itins, "{1to2}", "{x}">, EVEX_V128;
7008     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7009                                itins, "{1to4}", "{y}">, EVEX_V256;
7010
7011     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7012                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7013     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7014                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7015     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7016                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7017     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7018                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
7019   }
7020 }
7021
7022 // Convert Double to Signed/Unsigned Quardword
7023 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7024                            SDNode OpNodeRnd, OpndItins itins> {
7025   let Predicates = [HasDQI] in {
7026     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7027                             itins>,
7028              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7029                                OpNodeRnd,itins>, EVEX_V512;
7030   }
7031   let Predicates = [HasDQI, HasVLX] in {
7032     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7033                                itins>, EVEX_V128;
7034     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7035                                itins>, EVEX_V256;
7036   }
7037 }
7038
7039 // Convert Double to Signed/Unsigned Quardword with truncation
7040 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7041                             SDNode OpNodeRnd, OpndItins itins> {
7042   let Predicates = [HasDQI] in {
7043     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7044                             itins>,
7045              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7046                                 OpNodeRnd, itins>, EVEX_V512;
7047   }
7048   let Predicates = [HasDQI, HasVLX] in {
7049     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7050                                itins>, EVEX_V128;
7051     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7052                                itins>, EVEX_V256;
7053   }
7054 }
7055
7056 // Convert Signed/Unsigned Quardword to Double
7057 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7058                            SDNode OpNodeRnd, OpndItins itins> {
7059   let Predicates = [HasDQI] in {
7060     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7061                             itins>,
7062              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7063                                OpNodeRnd, itins>, EVEX_V512;
7064   }
7065   let Predicates = [HasDQI, HasVLX] in {
7066     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7067                                itins>, EVEX_V128;
7068     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7069                                itins>, EVEX_V256;
7070   }
7071 }
7072
7073 // Convert Float to Signed/Unsigned Quardword
7074 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7075                            SDNode OpNodeRnd, OpndItins itins> {
7076   let Predicates = [HasDQI] in {
7077     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7078                             itins>,
7079              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7080                                OpNodeRnd, itins>, EVEX_V512;
7081   }
7082   let Predicates = [HasDQI, HasVLX] in {
7083     // Explicitly specified broadcast string, since we take only 2 elements
7084     // from v4f32x_info source
7085     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7086                                itins, "{1to2}", "", f64mem>, EVEX_V128;
7087     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7088                                itins>, EVEX_V256;
7089   }
7090 }
7091
7092 // Convert Float to Signed/Unsigned Quardword with truncation
7093 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7094                             SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
7095   let Predicates = [HasDQI] in {
7096     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7097                             itins>,
7098              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7099                                 OpNodeRnd, itins>, EVEX_V512;
7100   }
7101   let Predicates = [HasDQI, HasVLX] in {
7102     // Explicitly specified broadcast string, since we take only 2 elements
7103     // from v4f32x_info source
7104     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
7105                                itins, "{1to2}", "", f64mem>, EVEX_V128;
7106     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7107                                itins>, EVEX_V256;
7108   }
7109 }
7110
7111 // Convert Signed/Unsigned Quardword to Float
7112 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7113                            SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
7114   let Predicates = [HasDQI] in {
7115     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7116                             itins>,
7117              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7118                                OpNodeRnd, itins>, EVEX_V512;
7119   }
7120   let Predicates = [HasDQI, HasVLX] in {
7121     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7122     // memory forms of these instructions in Asm Parcer. They have the same
7123     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7124     // due to the same reason.
7125     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7126                                itins, "{1to2}", "{x}">, EVEX_V128;
7127     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7128                                itins, "{1to4}", "{y}">, EVEX_V256;
7129
7130     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7131                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7132     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7133                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7134     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7135                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7136     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7137                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
7138   }
7139 }
7140
7141 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7142                                  SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>;
7143
7144 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
7145                                 X86VSintToFpRnd, SSE_CVT_I2PS>,
7146                                 PS, EVEX_CD8<32, CD8VF>;
7147
7148 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
7149                                 X86cvttp2siRnd, SSE_CVT_PS2I>,
7150                                 XS, EVEX_CD8<32, CD8VF>;
7151
7152 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
7153                                  X86cvttp2siRnd, SSE_CVT_PD2I>,
7154                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
7155
7156 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
7157                                  X86cvttp2uiRnd, SSE_CVT_PS2I>, PS,
7158                                  EVEX_CD8<32, CD8VF>;
7159
7160 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
7161                                  X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>,
7162                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
7163
7164 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7165                                   X86VUintToFP, SSE_CVT_I2PD>, XS,
7166                                   EVEX_CD8<32, CD8VH>;
7167
7168 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
7169                                  X86VUintToFpRnd, SSE_CVT_I2PS>, XD,
7170                                  EVEX_CD8<32, CD8VF>;
7171
7172 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
7173                                  X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7174                                  EVEX_CD8<32, CD8VF>;
7175
7176 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
7177                                  X86cvtp2IntRnd, SSE_CVT_PD2I>, XD,
7178                                  VEX_W, EVEX_CD8<64, CD8VF>;
7179
7180 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
7181                                  X86cvtp2UIntRnd, SSE_CVT_PS2I>,
7182                                  PS, EVEX_CD8<32, CD8VF>;
7183
7184 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
7185                                  X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
7186                                  PS, EVEX_CD8<64, CD8VF>;
7187
7188 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
7189                                  X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W,
7190                                  PD, EVEX_CD8<64, CD8VF>;
7191
7192 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
7193                                  X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7194                                  EVEX_CD8<32, CD8VH>;
7195
7196 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
7197                                  X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
7198                                  PD, EVEX_CD8<64, CD8VF>;
7199
7200 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
7201                                  X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD,
7202                                  EVEX_CD8<32, CD8VH>;
7203
7204 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
7205                                  X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W,
7206                                  PD, EVEX_CD8<64, CD8VF>;
7207
7208 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
7209                                  X86cvttp2siRnd, SSE_CVT_PS2I>, PD,
7210                                  EVEX_CD8<32, CD8VH>;
7211
7212 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
7213                                  X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W,
7214                                  PD, EVEX_CD8<64, CD8VF>;
7215
7216 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
7217                                  X86cvttp2uiRnd, SSE_CVT_PS2I>, PD,
7218                                  EVEX_CD8<32, CD8VH>;
7219
7220 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
7221                             X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7222                             EVEX_CD8<64, CD8VF>;
7223
7224 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
7225                             X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7226                             EVEX_CD8<64, CD8VF>;
7227
7228 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
7229                             X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS,
7230                             EVEX_CD8<64, CD8VF>;
7231
7232 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
7233                             X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD,
7234                             EVEX_CD8<64, CD8VF>;
7235
7236 let Predicates = [HasAVX512, NoVLX] in {
7237 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
7238           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7239            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7240                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7241
7242 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7243           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7244            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7245                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7246
7247 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7248           (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
7249            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7250                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
7251
7252 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7253           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7254            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7255                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7256
7257 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7258           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7259            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7260                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7261
7262 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7263           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7264            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7265                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
7266
7267 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
7268           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7269            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7270                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
7271 }
7272
7273 let Predicates = [HasAVX512, HasVLX] in {
7274   let AddedComplexity = 15 in {
7275     def : Pat<(X86vzmovl (v2i64 (bitconvert
7276                                 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
7277               (VCVTPD2DQZ128rr VR128X:$src)>;
7278     def : Pat<(X86vzmovl (v2i64 (bitconvert
7279                                 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
7280               (VCVTPD2DQZ128rm addr:$src)>;
7281     def : Pat<(X86vzmovl (v2i64 (bitconvert
7282                                  (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
7283               (VCVTPD2UDQZ128rr VR128X:$src)>;
7284     def : Pat<(X86vzmovl (v2i64 (bitconvert
7285                                 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
7286               (VCVTTPD2DQZ128rr VR128X:$src)>;
7287     def : Pat<(X86vzmovl (v2i64 (bitconvert
7288                                 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
7289               (VCVTTPD2DQZ128rm addr:$src)>;
7290     def : Pat<(X86vzmovl (v2i64 (bitconvert
7291                                  (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
7292               (VCVTTPD2UDQZ128rr VR128X:$src)>;
7293   }
7294
7295   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7296             (VCVTDQ2PDZ128rm addr:$src)>;
7297   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7298             (VCVTDQ2PDZ128rm addr:$src)>;
7299
7300   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7301             (VCVTUDQ2PDZ128rm addr:$src)>;
7302   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7303             (VCVTUDQ2PDZ128rm addr:$src)>;
7304 }
7305
7306 let Predicates = [HasAVX512] in {
7307   def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7308             (VCVTPD2PSZrm addr:$src)>;
7309   def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7310             (VCVTPS2PDZrm addr:$src)>;
7311 }
7312
7313 let Predicates = [HasDQI, HasVLX] in {
7314   let AddedComplexity = 15 in {
7315     def : Pat<(X86vzmovl (v2f64 (bitconvert
7316                                 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
7317               (VCVTQQ2PSZ128rr VR128X:$src)>;
7318     def : Pat<(X86vzmovl (v2f64 (bitconvert
7319                                 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
7320               (VCVTUQQ2PSZ128rr VR128X:$src)>;
7321   }
7322 }
7323
7324 let Predicates = [HasDQI, NoVLX] in {
7325 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7326           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7327            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7328                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7329
7330 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7331           (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7332            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7333                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
7334
7335 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7336           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7337            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7338                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7339
7340 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7341           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7342            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7343                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7344
7345 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7346           (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7347            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7348                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
7349
7350 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7351           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7352            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7353                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7354
7355 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7356           (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7357            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7358                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
7359
7360 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7361           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7362            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7363                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7364
7365 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7366           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7367            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7368                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7369
7370 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7371           (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7372            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7373                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
7374
7375 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7376           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7377            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7378                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7379
7380 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7381           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7382            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7383                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7384 }
7385
7386 //===----------------------------------------------------------------------===//
7387 // Half precision conversion instructions
7388 //===----------------------------------------------------------------------===//
7389
7390 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7391                            X86MemOperand x86memop, PatFrag ld_frag,
7392                            OpndItins itins> {
7393   defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7394                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
7395                             (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>,
7396                             T8PD, Sched<[itins.Sched]>;
7397   defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7398                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7399                             (X86cvtph2ps (_src.VT
7400                                           (bitconvert
7401                                            (ld_frag addr:$src)))), itins.rm>,
7402                             T8PD, Sched<[itins.Sched.Folded]>;
7403 }
7404
7405 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7406                                OpndItins itins> {
7407   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7408                              (ins _src.RC:$src), "vcvtph2ps",
7409                              "{sae}, $src", "$src, {sae}",
7410                              (X86cvtph2psRnd (_src.VT _src.RC:$src),
7411                                              (i32 FROUND_NO_EXC)), itins.rr>,
7412                              T8PD, EVEX_B, Sched<[itins.Sched]>;
7413 }
7414
7415 let Predicates = [HasAVX512] in
7416   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
7417                                     SSE_CVT_PH2PS>,
7418                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>,
7419                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
7420
7421 let Predicates = [HasVLX] in {
7422   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
7423                        loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256,
7424                        EVEX_CD8<32, CD8VH>;
7425   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
7426                        loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128,
7427                        EVEX_CD8<32, CD8VH>;
7428
7429   // Pattern match vcvtph2ps of a scalar i64 load.
7430   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7431             (VCVTPH2PSZ128rm addr:$src)>;
7432   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7433             (VCVTPH2PSZ128rm addr:$src)>;
7434   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7435               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7436             (VCVTPH2PSZ128rm addr:$src)>;
7437 }
7438
7439 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7440                            X86MemOperand x86memop, OpndItins itins> {
7441   defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
7442                    (ins _src.RC:$src1, i32u8imm:$src2),
7443                    "vcvtps2ph", "$src2, $src1", "$src1, $src2",
7444                    (X86cvtps2ph (_src.VT _src.RC:$src1),
7445                                 (i32 imm:$src2)),
7446                    itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>;
7447   let hasSideEffects = 0, mayStore = 1 in {
7448     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7449                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7450                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7451                [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7452     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7453                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7454                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
7455                 [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7456   }
7457 }
7458
7459 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7460                                OpndItins itins> {
7461   let hasSideEffects = 0 in
7462   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
7463                    (outs _dest.RC:$dst),
7464                    (ins _src.RC:$src1, i32u8imm:$src2),
7465                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
7466                    [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>;
7467 }
7468
7469 let Predicates = [HasAVX512] in {
7470   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
7471                                     SSE_CVT_PS2PH>,
7472                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
7473                                         SSE_CVT_PS2PH>, EVEX, EVEX_V512,
7474                                         EVEX_CD8<32, CD8VH>;
7475   let Predicates = [HasVLX] in {
7476     defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
7477                                          SSE_CVT_PS2PH>, EVEX, EVEX_V256,
7478                                          EVEX_CD8<32, CD8VH>;
7479     defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
7480                                          SSE_CVT_PS2PH>, EVEX, EVEX_V128,
7481                                          EVEX_CD8<32, CD8VH>;
7482   }
7483
7484   def : Pat<(store (f64 (extractelt
7485                          (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7486                          (iPTR 0))), addr:$dst),
7487             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7488   def : Pat<(store (i64 (extractelt
7489                          (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7490                          (iPTR 0))), addr:$dst),
7491             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7492   def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7493             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7494   def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7495             (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
7496 }
7497
7498 // Patterns for matching conversions from float to half-float and vice versa.
7499 let Predicates = [HasVLX] in {
7500   // Use MXCSR.RC for rounding instead of explicitly specifying the default
7501   // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7502   // configurations we support (the default). However, falling back to MXCSR is
7503   // more consistent with other instructions, which are always controlled by it.
7504   // It's encoded as 0b100.
7505   def : Pat<(fp_to_f16 FR32X:$src),
7506             (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7507               (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7508
7509   def : Pat<(f16_to_fp GR16:$src),
7510             (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7511               (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7512
7513   def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7514             (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7515               (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7516 }
7517
7518 //  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
7519 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
7520                             string OpcodeStr, OpndItins itins> {
7521   let hasSideEffects = 0 in
7522   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7523                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7524                   [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7525                   Sched<[itins.Sched]>;
7526 }
7527
7528 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7529   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>,
7530                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7531   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>,
7532                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7533   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>,
7534                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7535   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>,
7536                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7537 }
7538
7539 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7540   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
7541                                  "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7542                                  EVEX_CD8<32, CD8VT1>;
7543   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
7544                                   "ucomisd", SSE_COMIS>, PD, EVEX,
7545                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7546   let Pattern = []<dag> in {
7547     defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
7548                                    "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7549                                    EVEX_CD8<32, CD8VT1>;
7550     defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
7551                                    "comisd", SSE_COMIS>, PD, EVEX,
7552                                     VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7553   }
7554   let isCodeGenOnly = 1 in {
7555     defm Int_VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
7556                               sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7557                               EVEX_CD8<32, CD8VT1>;
7558     defm Int_VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
7559                               sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX,
7560                               VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7561
7562     defm Int_VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
7563                               sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7564                               EVEX_CD8<32, CD8VT1>;
7565     defm Int_VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
7566                               sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX,
7567                               VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7568   }
7569 }
7570
7571 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
7572 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7573                          OpndItins itins, X86VectorVTInfo _> {
7574   let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
7575   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7576                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7577                            "$src2, $src1", "$src1, $src2",
7578                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
7579                            EVEX_4V, Sched<[itins.Sched]>;
7580   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7581                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7582                          "$src2, $src1", "$src1, $src2",
7583                          (OpNode (_.VT _.RC:$src1),
7584                           _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
7585                           Sched<[itins.Sched.Folded, ReadAfterLd]>;
7586 }
7587 }
7588
7589 defm VRCP14SS   : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
7590                   EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
7591 defm VRCP14SD   : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
7592                   VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
7593 defm VRSQRT14SS   : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
7594                   EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
7595 defm VRSQRT14SD   : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
7596                   VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
7597
7598 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7599 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
7600                          OpndItins itins, X86VectorVTInfo _> {
7601   let ExeDomain = _.ExeDomain in {
7602   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7603                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
7604                          (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD,
7605                          Sched<[itins.Sched]>;
7606   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7607                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7608                          (OpNode (_.FloatVT
7609                            (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD,
7610                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
7611   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7612                           (ins _.ScalarMemOp:$src), OpcodeStr,
7613                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7614                           (OpNode (_.FloatVT
7615                             (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7616                           EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7617   }
7618 }
7619
7620 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
7621                                 SizeItins itins> {
7622   defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
7623                            v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
7624   defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
7625                            v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7626
7627   // Define only if AVX512VL feature is present.
7628   let Predicates = [HasVLX] in {
7629     defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7630                                 OpNode, itins.s, v4f32x_info>,
7631                                EVEX_V128, EVEX_CD8<32, CD8VF>;
7632     defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7633                                 OpNode, itins.s, v8f32x_info>,
7634                                EVEX_V256, EVEX_CD8<32, CD8VF>;
7635     defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7636                                 OpNode, itins.d, v2f64x_info>,
7637                                EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7638     defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7639                                 OpNode, itins.d, v4f64x_info>,
7640                                EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7641   }
7642 }
7643
7644 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
7645 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
7646
7647 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
7648 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
7649                          SDNode OpNode, OpndItins itins> {
7650   let ExeDomain = _.ExeDomain in {
7651   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7652                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7653                            "$src2, $src1", "$src1, $src2",
7654                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7655                            (i32 FROUND_CURRENT)), itins.rr>,
7656                            Sched<[itins.Sched]>;
7657
7658   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7659                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7660                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7661                             (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7662                             (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B,
7663                             Sched<[itins.Sched]>;
7664
7665   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7666                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7667                          "$src2, $src1", "$src1, $src2",
7668                          (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
7669                          (i32 FROUND_CURRENT)), itins.rm>,
7670                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
7671   }
7672 }
7673
7674 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7675                         SizeItins itins> {
7676   defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
7677               EVEX_CD8<32, CD8VT1>;
7678   defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
7679               EVEX_CD8<64, CD8VT1>, VEX_W;
7680 }
7681
7682 let Predicates = [HasERI] in {
7683   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28",   X86rcp28s, SSE_RCP_S>,
7684                               T8PD, EVEX_4V;
7685   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
7686                               T8PD, EVEX_4V;
7687 }
7688
7689 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
7690                              T8PD, EVEX_4V;
7691 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
7692
7693 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7694                          SDNode OpNode, OpndItins itins> {
7695   let ExeDomain = _.ExeDomain in {
7696   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7697                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
7698                          (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)),
7699                          itins.rr>, Sched<[itins.Sched]>;
7700
7701   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7702                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7703                          (OpNode (_.FloatVT
7704                              (bitconvert (_.LdFrag addr:$src))),
7705                           (i32 FROUND_CURRENT)), itins.rm>,
7706                           Sched<[itins.Sched.Folded, ReadAfterLd]>;
7707
7708   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7709                          (ins _.ScalarMemOp:$src), OpcodeStr,
7710                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7711                          (OpNode (_.FloatVT
7712                                   (X86VBroadcast (_.ScalarLdFrag addr:$src))),
7713                                  (i32 FROUND_CURRENT)), itins.rm>, EVEX_B,
7714                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
7715   }
7716 }
7717 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7718                          SDNode OpNode, OpndItins itins> {
7719   let ExeDomain = _.ExeDomain in
7720   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7721                         (ins _.RC:$src), OpcodeStr,
7722                         "{sae}, $src", "$src, {sae}",
7723                         (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
7724                         itins.rr>, EVEX_B, Sched<[itins.Sched]>;
7725 }
7726
7727 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
7728                        SizeItins itins> {
7729    defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7730              avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7731              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
7732    defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7733              avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7734              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7735 }
7736
7737 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
7738                                   SDNode OpNode, SizeItins itins> {
7739   // Define only if AVX512VL feature is present.
7740   let Predicates = [HasVLX] in {
7741     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
7742                                      EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
7743     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
7744                                      EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
7745     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
7746                                      EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7747     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
7748                                      EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7749   }
7750 }
7751 let Predicates = [HasERI] in {
7752
7753  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
7754  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
7755  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
7756 }
7757 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
7758                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
7759                                           SSE_ALU_ITINS_P>, EVEX;
7760
7761 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
7762                                     X86VectorVTInfo _>{
7763   let ExeDomain = _.ExeDomain in
7764   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7765                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
7766                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc))), itins.rr>,
7767                          EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
7768 }
7769
7770 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins,
7771                               X86VectorVTInfo _>{
7772   let ExeDomain = _.ExeDomain in {
7773   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7774                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
7775                          (_.FloatVT (fsqrt _.RC:$src)), itins.rr>, EVEX,
7776                          Sched<[itins.Sched]>;
7777   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7778                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7779                          (fsqrt (_.FloatVT
7780                            (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX,
7781                            Sched<[itins.Sched.Folded, ReadAfterLd]>;
7782   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7783                           (ins _.ScalarMemOp:$src), OpcodeStr,
7784                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7785                           (fsqrt (_.FloatVT
7786                             (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7787                           EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7788   }
7789 }
7790
7791 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
7792   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>,
7793                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7794   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>,
7795                                 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7796   // Define only if AVX512VL feature is present.
7797   let Predicates = [HasVLX] in {
7798     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7799                                      SSE_SQRTPS, v4f32x_info>,
7800                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
7801     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7802                                      SSE_SQRTPS, v8f32x_info>,
7803                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
7804     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7805                                      SSE_SQRTPD, v2f64x_info>,
7806                                      EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7807     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7808                                      SSE_SQRTPD, v4f64x_info>,
7809                                      EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7810   }
7811 }
7812
7813 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
7814   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS,
7815                                 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7816   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD,
7817                                 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7818 }
7819
7820 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins,
7821                               X86VectorVTInfo _, string SUFF, Intrinsic Intr> {
7822   let ExeDomain = _.ExeDomain in {
7823   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7824                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7825                          "$src2, $src1", "$src1, $src2",
7826                          (X86fsqrtRnds (_.VT _.RC:$src1),
7827                                     (_.VT _.RC:$src2),
7828                                     (i32 FROUND_CURRENT)), itins.rr>,
7829                          Sched<[itins.Sched]>;
7830   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7831                        (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7832                        "$src2, $src1", "$src1, $src2",
7833                        (X86fsqrtRnds (_.VT _.RC:$src1),
7834                                   _.ScalarIntMemCPat:$src2,
7835                                   (i32 FROUND_CURRENT)), itins.rm>,
7836                        Sched<[itins.Sched.Folded, ReadAfterLd]>;
7837   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7838                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
7839                          "$rc, $src2, $src1", "$src1, $src2, $rc",
7840                          (X86fsqrtRnds (_.VT _.RC:$src1),
7841                                      (_.VT _.RC:$src2),
7842                                      (i32 imm:$rc)), itins.rr>,
7843                          EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
7844
7845   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7846     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7847                (ins _.FRC:$src1, _.FRC:$src2),
7848                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], itins.rr>,
7849                Sched<[itins.Sched]>;
7850     let mayLoad = 1 in
7851       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7852                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
7853                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], itins.rm>,
7854                  Sched<[itins.Sched.Folded, ReadAfterLd]>;
7855   }
7856   }
7857
7858 let Predicates = [HasAVX512] in {
7859   def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
7860             (!cast<Instruction>(NAME#SUFF#Zr)
7861                 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
7862
7863    def : Pat<(Intr VR128X:$src),
7864              (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
7865                                  VR128X:$src)>;
7866 }
7867
7868 let Predicates = [HasAVX512, OptForSize] in {
7869   def : Pat<(_.EltVT (fsqrt (load addr:$src))),
7870             (!cast<Instruction>(NAME#SUFF#Zm)
7871                 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
7872
7873   def : Pat<(Intr _.ScalarIntMemCPat:$src2),
7874             (!cast<Instruction>(NAME#SUFF#Zm_Int)
7875                   (_.VT (IMPLICIT_DEF)), addr:$src2)>;
7876 }
7877
7878 }
7879
7880 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
7881   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS",
7882                         int_x86_sse_sqrt_ss>,
7883                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
7884   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD",
7885                         int_x86_sse2_sqrt_sd>,
7886                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
7887                         NotMemoryFoldable;
7888 }
7889
7890 defm VSQRT   : avx512_sqrt_packed_all<0x51, "vsqrt">,
7891                avx512_sqrt_packed_all_round<0x51, "vsqrt">;
7892
7893 defm VSQRT   : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
7894
7895 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
7896                                   OpndItins itins, X86VectorVTInfo _> {
7897   let ExeDomain = _.ExeDomain in {
7898   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7899                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7900                            "$src3, $src2, $src1", "$src1, $src2, $src3",
7901                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7902                            (i32 imm:$src3))), itins.rr>,
7903                            Sched<[itins.Sched]>;
7904
7905   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7906                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7907                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
7908                          (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7909                          (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B,
7910                          Sched<[itins.Sched]>;
7911
7912   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7913                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
7914                          OpcodeStr,
7915                          "$src3, $src2, $src1", "$src1, $src2, $src3",
7916                          (_.VT (X86RndScales _.RC:$src1,
7917                                 _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>,
7918                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
7919
7920   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7921     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7922                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
7923                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7924                [], itins.rr>, Sched<[itins.Sched]>;
7925
7926     let mayLoad = 1 in
7927       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7928                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
7929                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7930                  [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7931   }
7932   }
7933
7934   let Predicates = [HasAVX512] in {
7935     def : Pat<(ffloor _.FRC:$src),
7936               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7937                _.FRC:$src, (i32 0x9)))>;
7938     def : Pat<(fceil _.FRC:$src),
7939               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7940                _.FRC:$src, (i32 0xa)))>;
7941     def : Pat<(ftrunc _.FRC:$src),
7942               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7943                _.FRC:$src, (i32 0xb)))>;
7944     def : Pat<(frint _.FRC:$src),
7945               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7946                _.FRC:$src, (i32 0x4)))>;
7947     def : Pat<(fnearbyint _.FRC:$src),
7948               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7949                _.FRC:$src, (i32 0xc)))>;
7950   }
7951
7952   let Predicates = [HasAVX512, OptForSize] in {
7953     def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
7954               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7955                addr:$src, (i32 0x9)))>;
7956     def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
7957               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7958                addr:$src, (i32 0xa)))>;
7959     def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
7960               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7961                addr:$src, (i32 0xb)))>;
7962     def : Pat<(frint (_.ScalarLdFrag addr:$src)),
7963               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7964                addr:$src, (i32 0x4)))>;
7965     def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
7966               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7967                addr:$src, (i32 0xc)))>;
7968   }
7969 }
7970
7971 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S,
7972                       f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
7973
7974 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
7975                       f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
7976                       EVEX_CD8<64, CD8VT1>;
7977
7978 //-------------------------------------------------
7979 // Integer truncate and extend operations
7980 //-------------------------------------------------
7981
7982 let Sched = WriteShuffle256 in
7983 def AVX512_EXTEND : OpndItins<
7984   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
7985 >;
7986
7987 let Sched = WriteShuffle256 in
7988 def AVX512_TRUNCATE : OpndItins<
7989   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
7990 >;
7991
7992 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7993                               OpndItins itins, X86VectorVTInfo SrcInfo,
7994                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
7995   let ExeDomain = DestInfo.ExeDomain in
7996   defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
7997                       (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
7998                       (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
7999                       itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>;
8000
8001   let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8002       ExeDomain = DestInfo.ExeDomain in {
8003     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8004                (ins x86memop:$dst, SrcInfo.RC:$src),
8005                OpcodeStr # "\t{$src, $dst|$dst, $src}",
8006                [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>;
8007
8008     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8009                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
8010                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8011                [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>;
8012   }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
8013 }
8014
8015 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8016                                     X86VectorVTInfo DestInfo,
8017                                     PatFrag truncFrag, PatFrag mtruncFrag > {
8018
8019   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8020             (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8021                                     addr:$dst, SrcInfo.RC:$src)>;
8022
8023   def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8024                                                (SrcInfo.VT SrcInfo.RC:$src)),
8025             (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8026                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8027 }
8028
8029 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
8030          OpndItins itins, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
8031          X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8032          X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8033          X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
8034                                                      Predicate prd = HasAVX512>{
8035
8036   let Predicates = [HasVLX, prd] in {
8037     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8038                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
8039                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8040                              truncFrag, mtruncFrag>, EVEX_V128;
8041
8042     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8043                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
8044                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8045                              truncFrag, mtruncFrag>, EVEX_V256;
8046   }
8047   let Predicates = [prd] in
8048     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8049                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
8050                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8051                              truncFrag, mtruncFrag>, EVEX_V512;
8052 }
8053
8054 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8055                            OpndItins itins, PatFrag StoreNode,
8056                            PatFrag MaskedStoreNode> {
8057   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8058                v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
8059                StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
8060 }
8061
8062 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8063                            OpndItins itins, PatFrag StoreNode,
8064                            PatFrag MaskedStoreNode> {
8065   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8066                v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
8067                StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
8068 }
8069
8070 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8071                            OpndItins itins, PatFrag StoreNode,
8072                            PatFrag MaskedStoreNode> {
8073   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8074                v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
8075                StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
8076 }
8077
8078 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
8079                            OpndItins itins, PatFrag StoreNode,
8080                            PatFrag MaskedStoreNode> {
8081   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i32_info,
8082                v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
8083                StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
8084 }
8085
8086 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8087                            OpndItins itins, PatFrag StoreNode,
8088                            PatFrag MaskedStoreNode> {
8089   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i32_info,
8090               v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
8091               StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
8092 }
8093
8094 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8095                            OpndItins itins, PatFrag StoreNode,
8096                            PatFrag MaskedStoreNode> {
8097   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i16_info,
8098               v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
8099               StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
8100 }
8101
8102 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   X86vtrunc, AVX512_TRUNCATE,
8103                                   truncstorevi8, masked_truncstorevi8>;
8104 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, AVX512_TRUNCATE,
8105                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
8106 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE,
8107                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
8108
8109 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw",   X86vtrunc, AVX512_TRUNCATE,
8110                                   truncstorevi16, masked_truncstorevi16>;
8111 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, AVX512_TRUNCATE,
8112                                   truncstore_s_vi16, masked_truncstore_s_vi16>;
8113 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE,
8114                                   truncstore_us_vi16, masked_truncstore_us_vi16>;
8115
8116 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd",   X86vtrunc, AVX512_TRUNCATE,
8117                                   truncstorevi32, masked_truncstorevi32>;
8118 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, AVX512_TRUNCATE,
8119                                   truncstore_s_vi32, masked_truncstore_s_vi32>;
8120 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE,
8121                                   truncstore_us_vi32, masked_truncstore_us_vi32>;
8122
8123 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc, AVX512_TRUNCATE,
8124                                   truncstorevi8, masked_truncstorevi8>;
8125 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb",   X86vtruncs, AVX512_TRUNCATE,
8126                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
8127 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus, AVX512_TRUNCATE,
8128                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
8129
8130 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc, AVX512_TRUNCATE,
8131                                   truncstorevi16, masked_truncstorevi16>;
8132 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw",   X86vtruncs, AVX512_TRUNCATE,
8133                                   truncstore_s_vi16, masked_truncstore_s_vi16>;
8134 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw",  X86vtruncus, AVX512_TRUNCATE,
8135                                   truncstore_us_vi16, masked_truncstore_us_vi16>;
8136
8137 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc, AVX512_TRUNCATE,
8138                                   truncstorevi8, masked_truncstorevi8>;
8139 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb",   X86vtruncs, AVX512_TRUNCATE,
8140                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
8141 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus, AVX512_TRUNCATE,
8142                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
8143
8144 let Predicates = [HasAVX512, NoVLX] in {
8145 def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
8146          (v8i16 (EXTRACT_SUBREG
8147                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8148                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
8149 def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
8150          (v4i32 (EXTRACT_SUBREG
8151                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8152                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
8153 }
8154
8155 let Predicates = [HasBWI, NoVLX] in {
8156 def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
8157          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
8158                                             VR256X:$src, sub_ymm))), sub_xmm))>;
8159 }
8160
8161 multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins,
8162               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
8163               X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
8164   let ExeDomain = DestInfo.ExeDomain in {
8165   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8166                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
8167                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>,
8168                   EVEX, Sched<[itins.Sched]>;
8169
8170   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8171                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
8172                   (DestInfo.VT (LdFrag addr:$src)), itins.rm>,
8173                 EVEX, Sched<[itins.Sched.Folded]>;
8174   }
8175 }
8176
8177 multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
8178           SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8179           OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8180   let Predicates = [HasVLX, HasBWI] in {
8181     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info,
8182                     v16i8x_info, i64mem, LdFrag, InVecNode>,
8183                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
8184
8185     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info,
8186                     v16i8x_info, i128mem, LdFrag, OpNode>,
8187                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
8188   }
8189   let Predicates = [HasBWI] in {
8190     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v32i16_info,
8191                     v32i8x_info, i256mem, LdFrag, OpNode>,
8192                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
8193   }
8194 }
8195
8196 multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
8197           SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8198           OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8199   let Predicates = [HasVLX, HasAVX512] in {
8200     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
8201                    v16i8x_info, i32mem, LdFrag, InVecNode>,
8202                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
8203
8204     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
8205                    v16i8x_info, i64mem, LdFrag, OpNode>,
8206                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
8207   }
8208   let Predicates = [HasAVX512] in {
8209     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
8210                    v16i8x_info, i128mem, LdFrag, OpNode>,
8211                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
8212   }
8213 }
8214
8215 multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
8216           SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8217           OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8218   let Predicates = [HasVLX, HasAVX512] in {
8219     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8220                    v16i8x_info, i16mem, LdFrag, InVecNode>,
8221                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
8222
8223     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8224                    v16i8x_info, i32mem, LdFrag, OpNode>,
8225                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
8226   }
8227   let Predicates = [HasAVX512] in {
8228     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8229                    v16i8x_info, i64mem, LdFrag, OpNode>,
8230                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
8231   }
8232 }
8233
8234 multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
8235          SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8236          OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8237   let Predicates = [HasVLX, HasAVX512] in {
8238     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
8239                    v8i16x_info, i64mem, LdFrag, InVecNode>,
8240                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
8241
8242     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
8243                    v8i16x_info, i128mem, LdFrag, OpNode>,
8244                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
8245   }
8246   let Predicates = [HasAVX512] in {
8247     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
8248                    v16i16x_info, i256mem, LdFrag, OpNode>,
8249                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
8250   }
8251 }
8252
8253 multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
8254          SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8255          OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8256   let Predicates = [HasVLX, HasAVX512] in {
8257     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8258                    v8i16x_info, i32mem, LdFrag, InVecNode>,
8259                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
8260
8261     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8262                    v8i16x_info, i64mem, LdFrag, OpNode>,
8263                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
8264   }
8265   let Predicates = [HasAVX512] in {
8266     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8267                    v8i16x_info, i128mem, LdFrag, OpNode>,
8268                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
8269   }
8270 }
8271
8272 multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
8273          SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8274          OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
8275
8276   let Predicates = [HasVLX, HasAVX512] in {
8277     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8278                    v4i32x_info, i64mem, LdFrag, InVecNode>,
8279                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8280
8281     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8282                    v4i32x_info, i128mem, LdFrag, OpNode>,
8283                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8284   }
8285   let Predicates = [HasAVX512] in {
8286     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8287                    v8i32x_info, i256mem, LdFrag, OpNode>,
8288                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8289   }
8290 }
8291
8292 defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8293 defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8294 defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8295 defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8296 defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8297 defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8298
8299 defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8300 defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8301 defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8302 defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8303 defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8304 defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8305
8306
8307 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8308                                  SDNode InVecOp, PatFrag ExtLoad16> {
8309   // 128-bit patterns
8310   let Predicates = [HasVLX, HasBWI] in {
8311   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8312             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8313   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8314             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8315   def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8316             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8317   def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8318             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8319   def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8320             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8321   }
8322   let Predicates = [HasVLX] in {
8323   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8324             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8325   def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8326             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8327   def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8328             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8329   def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8330             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8331
8332   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
8333             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8334   def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8335             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8336   def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8337             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8338   def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8339             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8340
8341   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8342             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8343   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8344             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8345   def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8346             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8347   def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8348             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8349   def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8350             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8351
8352   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8353             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8354   def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
8355             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8356   def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8357             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8358   def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8359             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8360
8361   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8362             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8363   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8364             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8365   def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8366             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8367   def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
8368             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8369   def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
8370             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8371   }
8372   // 256-bit patterns
8373   let Predicates = [HasVLX, HasBWI] in {
8374   def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8375             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8376   def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8377             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8378   def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8379             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8380   }
8381   let Predicates = [HasVLX] in {
8382   def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8383             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8384   def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8385             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8386   def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8387             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8388   def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8389             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8390
8391   def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8392             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8393   def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8394             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8395   def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8396             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8397   def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8398             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8399
8400   def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8401             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8402   def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8403             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8404   def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8405             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8406
8407   def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8408             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8409   def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8410             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8411   def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8412             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8413   def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8414             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8415
8416   def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8417             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8418   def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8419             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8420   def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8421             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8422   }
8423   // 512-bit patterns
8424   let Predicates = [HasBWI] in {
8425   def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8426             (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8427   }
8428   let Predicates = [HasAVX512] in {
8429   def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8430             (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8431
8432   def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8433             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8434   def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8435             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8436
8437   def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8438             (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8439
8440   def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8441             (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8442
8443   def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8444             (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8445   }
8446 }
8447
8448 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8449 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
8450
8451 //===----------------------------------------------------------------------===//
8452 // GATHER - SCATTER Operations
8453
8454 // FIXME: Improve scheduling of gather/scatter instructions.
8455 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8456                          X86MemOperand memop, PatFrag GatherNode,
8457                          RegisterClass MaskRC = _.KRCWM> {
8458   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8459       ExeDomain = _.ExeDomain in
8460   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
8461             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
8462             !strconcat(OpcodeStr#_.Suffix,
8463             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
8464             [(set _.RC:$dst, MaskRC:$mask_wb,
8465               (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
8466                      vectoraddr:$src2))]>, EVEX, EVEX_K,
8467              EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
8468 }
8469
8470 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8471                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8472   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
8473                                       vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
8474   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
8475                                       vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
8476 let Predicates = [HasVLX] in {
8477   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8478                               vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
8479   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
8480                               vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
8481   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8482                               vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
8483   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8484                               vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
8485 }
8486 }
8487
8488 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8489                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8490   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
8491                                        mgatherv16i32>, EVEX_V512;
8492   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8493                                        mgatherv8i64>, EVEX_V512;
8494 let Predicates = [HasVLX] in {
8495   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8496                                           vy256xmem, mgatherv8i32>, EVEX_V256;
8497   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8498                                           vy128xmem, mgatherv4i64>, EVEX_V256;
8499   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8500                                           vx128xmem, mgatherv4i32>, EVEX_V128;
8501   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8502                                           vx64xmem, mgatherv2i64, VK2WM>,
8503                                           EVEX_V128;
8504 }
8505 }
8506
8507
8508 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8509                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8510
8511 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8512                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
8513
8514 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8515                           X86MemOperand memop, PatFrag ScatterNode> {
8516
8517 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
8518
8519   def mr  : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
8520             (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
8521             !strconcat(OpcodeStr#_.Suffix,
8522             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
8523             [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8524                                      _.KRCWM:$mask,  vectoraddr:$dst))]>,
8525             EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8526             Sched<[WriteStore]>;
8527 }
8528
8529 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8530                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8531   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
8532                                       vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
8533   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
8534                                       vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
8535 let Predicates = [HasVLX] in {
8536   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8537                               vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
8538   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
8539                               vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
8540   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8541                               vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
8542   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8543                               vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
8544 }
8545 }
8546
8547 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8548                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8549   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
8550                                        mscatterv16i32>, EVEX_V512;
8551   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8552                                        mscatterv8i64>, EVEX_V512;
8553 let Predicates = [HasVLX] in {
8554   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8555                                           vy256xmem, mscatterv8i32>, EVEX_V256;
8556   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8557                                           vy128xmem, mscatterv4i64>, EVEX_V256;
8558   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8559                                           vx128xmem, mscatterv4i32>, EVEX_V128;
8560   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8561                                           vx64xmem, mscatterv2i64>, EVEX_V128;
8562 }
8563 }
8564
8565 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8566                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
8567
8568 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8569                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
8570
8571 // prefetch
8572 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8573                        RegisterClass KRC, X86MemOperand memop> {
8574   let Predicates = [HasPFI], hasSideEffects = 1 in
8575   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
8576             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
8577             [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
8578 }
8579
8580 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
8581                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8582
8583 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
8584                      VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8585
8586 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
8587                      VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8588
8589 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
8590                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8591
8592 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
8593                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8594
8595 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
8596                      VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8597
8598 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
8599                      VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8600
8601 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
8602                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8603
8604 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
8605                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8606
8607 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
8608                      VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8609
8610 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
8611                      VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8612
8613 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
8614                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8615
8616 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
8617                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8618
8619 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
8620                      VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8621
8622 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
8623                      VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8624
8625 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
8626                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8627
8628 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
8629 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
8630                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
8631                   [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))],
8632                   IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
8633 }
8634
8635 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8636                                  string OpcodeStr, Predicate prd> {
8637 let Predicates = [prd] in
8638   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8639
8640   let Predicates = [prd, HasVLX] in {
8641     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8642     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8643   }
8644 }
8645
8646 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8647 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8648 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8649 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
8650
8651 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
8652     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8653                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8654                         [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))],
8655                         IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
8656 }
8657
8658 // Use 512bit version to implement 128/256 bit in case NoVLX.
8659 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
8660                                                             X86VectorVTInfo _> {
8661
8662   def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))),
8663             (_.KVT (COPY_TO_REGCLASS
8664                      (!cast<Instruction>(NAME#"Zrr")
8665                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
8666                                       _.RC:$src, _.SubRegIdx)),
8667                    _.KRC))>;
8668 }
8669
8670 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
8671                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8672   let Predicates = [prd] in
8673     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8674                                             EVEX_V512;
8675
8676   let Predicates = [prd, HasVLX] in {
8677     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
8678                                               EVEX_V256;
8679     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
8680                                                EVEX_V128;
8681   }
8682   let Predicates = [prd, NoVLX] in {
8683     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8684     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
8685   }
8686 }
8687
8688 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8689                                               avx512vl_i8_info, HasBWI>;
8690 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8691                                               avx512vl_i16_info, HasBWI>, VEX_W;
8692 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8693                                               avx512vl_i32_info, HasDQI>;
8694 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8695                                               avx512vl_i64_info, HasDQI>, VEX_W;
8696
8697 //===----------------------------------------------------------------------===//
8698 // AVX-512 - COMPRESS and EXPAND
8699 //
8700
8701 // FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
8702 let Sched = WriteShuffle256 in {
8703 def AVX512_COMPRESS : OpndItins<
8704   IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8705 >;
8706 def AVX512_EXPAND : OpndItins<
8707   IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8708 >;
8709 }
8710
8711 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
8712                                  string OpcodeStr, OpndItins itins> {
8713   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
8714               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8715               (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase,
8716               Sched<[itins.Sched]>;
8717
8718   let mayStore = 1, hasSideEffects = 0 in
8719   def mr : AVX5128I<opc, MRMDestMem, (outs),
8720               (ins _.MemOp:$dst, _.RC:$src),
8721               OpcodeStr # "\t{$src, $dst|$dst, $src}",
8722               []>, EVEX_CD8<_.EltSize, CD8VT1>,
8723               Sched<[itins.Sched.Folded]>;
8724
8725   def mrk : AVX5128I<opc, MRMDestMem, (outs),
8726               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
8727               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8728               []>,
8729               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8730               Sched<[itins.Sched.Folded]>;
8731 }
8732
8733 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
8734   def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
8735                                                (_.VT _.RC:$src)),
8736             (!cast<Instruction>(NAME#_.ZSuffix##mrk)
8737                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
8738 }
8739
8740 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
8741                                  OpndItins itins,
8742                                  AVX512VLVectorVTInfo VTInfo,
8743                                  Predicate Pred = HasAVX512> {
8744   let Predicates = [Pred] in
8745   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>,
8746            compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8747
8748   let Predicates = [Pred, HasVLX] in {
8749     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>,
8750                 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8751     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>,
8752                 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8753   }
8754 }
8755
8756 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS,
8757                                           avx512vl_i32_info>, EVEX;
8758 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS,
8759                                           avx512vl_i64_info>, EVEX, VEX_W;
8760 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS,
8761                                           avx512vl_f32_info>, EVEX;
8762 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS,
8763                                           avx512vl_f64_info>, EVEX, VEX_W;
8764
8765 // expand
8766 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
8767                                  string OpcodeStr, OpndItins itins> {
8768   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8769               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8770               (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase,
8771               Sched<[itins.Sched]>;
8772
8773   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8774               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
8775               (_.VT (X86expand (_.VT (bitconvert
8776                                       (_.LdFrag addr:$src1))))), itins.rm>,
8777             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
8778             Sched<[itins.Sched.Folded, ReadAfterLd]>;
8779 }
8780
8781 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
8782
8783   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
8784             (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
8785                                         _.KRCWM:$mask, addr:$src)>;
8786
8787   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
8788                                                (_.VT _.RC:$src0))),
8789             (!cast<Instruction>(NAME#_.ZSuffix##rmk)
8790                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
8791 }
8792
8793 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
8794                                OpndItins itins,
8795                                AVX512VLVectorVTInfo VTInfo,
8796                                Predicate Pred = HasAVX512> {
8797   let Predicates = [Pred] in
8798   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>,
8799            expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8800
8801   let Predicates = [Pred, HasVLX] in {
8802     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>,
8803                 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8804     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>,
8805                 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8806   }
8807 }
8808
8809 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND,
8810                                       avx512vl_i32_info>, EVEX;
8811 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND,
8812                                       avx512vl_i64_info>, EVEX, VEX_W;
8813 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND,
8814                                       avx512vl_f32_info>, EVEX;
8815 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND,
8816                                       avx512vl_f64_info>, EVEX, VEX_W;
8817
8818 //handle instruction  reg_vec1 = op(reg_vec,imm)
8819 //                               op(mem_vec,imm)
8820 //                               op(broadcast(eltVt),imm)
8821 //all instruction created with FROUND_CURRENT
8822 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8823                                       OpndItins itins, X86VectorVTInfo _> {
8824   let ExeDomain = _.ExeDomain in {
8825   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8826                       (ins _.RC:$src1, i32u8imm:$src2),
8827                       OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8828                       (OpNode (_.VT _.RC:$src1),
8829                               (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>;
8830   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8831                     (ins _.MemOp:$src1, i32u8imm:$src2),
8832                     OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8833                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
8834                             (i32 imm:$src2)), itins.rm>,
8835                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8836   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8837                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
8838                     OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
8839                     "${src1}"##_.BroadcastStr##", $src2",
8840                     (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
8841                             (i32 imm:$src2)), itins.rm>, EVEX_B,
8842                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8843   }
8844 }
8845
8846 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8847 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
8848                                           SDNode OpNode, OpndItins itins,
8849                                           X86VectorVTInfo _> {
8850   let ExeDomain = _.ExeDomain in
8851   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8852                       (ins _.RC:$src1, i32u8imm:$src2),
8853                       OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
8854                       "$src1, {sae}, $src2",
8855                       (OpNode (_.VT _.RC:$src1),
8856                               (i32 imm:$src2),
8857                               (i32 FROUND_NO_EXC)), itins.rr>,
8858                       EVEX_B, Sched<[itins.Sched]>;
8859 }
8860
8861 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
8862             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
8863             SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
8864   let Predicates = [prd] in {
8865     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8866                                            _.info512>,
8867                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
8868                                                itins, _.info512>, EVEX_V512;
8869   }
8870   let Predicates = [prd, HasVLX] in {
8871     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8872                                            _.info128>, EVEX_V128;
8873     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8874                                            _.info256>, EVEX_V256;
8875   }
8876 }
8877
8878 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
8879 //                               op(reg_vec2,mem_vec,imm)
8880 //                               op(reg_vec2,broadcast(eltVt),imm)
8881 //all instruction created with FROUND_CURRENT
8882 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8883                                 OpndItins itins, X86VectorVTInfo _>{
8884   let ExeDomain = _.ExeDomain in {
8885   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8886                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8887                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8888                       (OpNode (_.VT _.RC:$src1),
8889                               (_.VT _.RC:$src2),
8890                               (i32 imm:$src3)), itins.rr>,
8891                       Sched<[itins.Sched]>;
8892   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8893                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
8894                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8895                     (OpNode (_.VT _.RC:$src1),
8896                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
8897                             (i32 imm:$src3)), itins.rm>,
8898                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8899   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8900                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8901                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
8902                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
8903                     (OpNode (_.VT _.RC:$src1),
8904                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
8905                             (i32 imm:$src3)), itins.rm>, EVEX_B,
8906                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8907   }
8908 }
8909
8910 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
8911 //                               op(reg_vec2,mem_vec,imm)
8912 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
8913                               OpndItins itins, X86VectorVTInfo DestInfo,
8914                               X86VectorVTInfo SrcInfo>{
8915   let ExeDomain = DestInfo.ExeDomain in {
8916   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8917                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
8918                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8919                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
8920                                (SrcInfo.VT SrcInfo.RC:$src2),
8921                                (i8 imm:$src3))), itins.rr>,
8922                   Sched<[itins.Sched]>;
8923   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8924                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
8925                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8926                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
8927                              (SrcInfo.VT (bitconvert
8928                                                 (SrcInfo.LdFrag addr:$src2))),
8929                              (i8 imm:$src3))), itins.rm>,
8930                 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8931   }
8932 }
8933
8934 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
8935 //                               op(reg_vec2,mem_vec,imm)
8936 //                               op(reg_vec2,broadcast(eltVt),imm)
8937 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
8938                            OpndItins itins, X86VectorVTInfo _>:
8939   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
8940
8941   let ExeDomain = _.ExeDomain in
8942   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8943                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
8944                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
8945                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
8946                     (OpNode (_.VT _.RC:$src1),
8947                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
8948                             (i8 imm:$src3)), itins.rm>, EVEX_B,
8949                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8950 }
8951
8952 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
8953 //                                      op(reg_vec2,mem_scalar,imm)
8954 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8955                                 OpndItins itins, X86VectorVTInfo _> {
8956   let ExeDomain = _.ExeDomain in {
8957   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8958                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8959                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8960                       (OpNode (_.VT _.RC:$src1),
8961                               (_.VT _.RC:$src2),
8962                               (i32 imm:$src3)), itins.rr>,
8963                       Sched<[itins.Sched]>;
8964   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8965                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8966                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8967                     (OpNode (_.VT _.RC:$src1),
8968                             (_.VT (scalar_to_vector
8969                                       (_.ScalarLdFrag addr:$src2))),
8970                             (i32 imm:$src3)), itins.rm>,
8971                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8972   }
8973 }
8974
8975 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8976 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
8977                                     SDNode OpNode, OpndItins itins,
8978                                     X86VectorVTInfo _> {
8979   let ExeDomain = _.ExeDomain in
8980   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8981                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8982                       OpcodeStr, "$src3, {sae}, $src2, $src1",
8983                       "$src1, $src2, {sae}, $src3",
8984                       (OpNode (_.VT _.RC:$src1),
8985                               (_.VT _.RC:$src2),
8986                               (i32 imm:$src3),
8987                               (i32 FROUND_NO_EXC)), itins.rr>,
8988                       EVEX_B, Sched<[itins.Sched]>;
8989 }
8990
8991 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8992 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8993                                     OpndItins itins, X86VectorVTInfo _> {
8994   let ExeDomain = _.ExeDomain in
8995   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8996                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8997                       OpcodeStr, "$src3, {sae}, $src2, $src1",
8998                       "$src1, $src2, {sae}, $src3",
8999                       (OpNode (_.VT _.RC:$src1),
9000                               (_.VT _.RC:$src2),
9001                               (i32 imm:$src3),
9002                               (i32 FROUND_NO_EXC)), itins.rr>,
9003                       EVEX_B, Sched<[itins.Sched]>;
9004 }
9005
9006 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
9007             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9008             SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
9009   let Predicates = [prd] in {
9010     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>,
9011                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>,
9012                                   EVEX_V512;
9013
9014   }
9015   let Predicates = [prd, HasVLX] in {
9016     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>,
9017                                   EVEX_V128;
9018     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>,
9019                                   EVEX_V256;
9020   }
9021 }
9022
9023 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
9024                    OpndItins itins, AVX512VLVectorVTInfo DestInfo,
9025                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
9026   let Predicates = [Pred] in {
9027     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
9028                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9029   }
9030   let Predicates = [Pred, HasVLX] in {
9031     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
9032                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
9033     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins,  DestInfo.info256,
9034                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9035   }
9036 }
9037
9038 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
9039                                   bits<8> opc, SDNode OpNode, OpndItins itins,
9040                                   Predicate Pred = HasAVX512> {
9041   let Predicates = [Pred] in {
9042     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
9043   }
9044   let Predicates = [Pred, HasVLX] in {
9045     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
9046     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
9047   }
9048 }
9049
9050 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
9051                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
9052                   SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
9053   let Predicates = [prd] in {
9054      defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>,
9055                  avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>;
9056   }
9057 }
9058
9059 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
9060                     bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
9061                     SDNode OpNodeRnd, SizeItins itins, Predicate prd>{
9062   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
9063                             opcPs, OpNode, OpNodeRnd, itins.s, prd>,
9064                             EVEX_CD8<32, CD8VF>;
9065   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
9066                             opcPd, OpNode, OpNodeRnd, itins.d, prd>,
9067                             EVEX_CD8<64, CD8VF>, VEX_W;
9068 }
9069
9070 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
9071                               X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>,
9072                               AVX512AIi8Base, EVEX;
9073 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
9074                               X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>,
9075                               AVX512AIi8Base, EVEX;
9076 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
9077                               X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>,
9078                               AVX512AIi8Base, EVEX;
9079
9080 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
9081                                                 0x50, X86VRange, X86VRangeRnd,
9082                                                 SSE_ALU_F64P, HasDQI>,
9083       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9084 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
9085                                                 0x50, X86VRange, X86VRangeRnd,
9086                                                 SSE_ALU_F32P, HasDQI>,
9087       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9088
9089 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
9090       f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>,
9091       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9092 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
9093       0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>,
9094       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9095
9096 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
9097       0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>,
9098       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9099 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
9100       0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>,
9101       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9102
9103 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
9104       0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>,
9105       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9106 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
9107       0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
9108       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9109
9110 let Predicates = [HasAVX512] in {
9111 def : Pat<(v16f32 (ffloor VR512:$src)),
9112           (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
9113 def : Pat<(v16f32 (fnearbyint VR512:$src)),
9114           (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9115 def : Pat<(v16f32 (fceil VR512:$src)),
9116           (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
9117 def : Pat<(v16f32 (frint VR512:$src)),
9118           (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9119 def : Pat<(v16f32 (ftrunc VR512:$src)),
9120           (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
9121
9122 def : Pat<(v8f64 (ffloor VR512:$src)),
9123           (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
9124 def : Pat<(v8f64 (fnearbyint VR512:$src)),
9125           (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9126 def : Pat<(v8f64 (fceil VR512:$src)),
9127           (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
9128 def : Pat<(v8f64 (frint VR512:$src)),
9129           (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9130 def : Pat<(v8f64 (ftrunc VR512:$src)),
9131           (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
9132 }
9133
9134 let Predicates = [HasVLX] in {
9135 def : Pat<(v4f32 (ffloor VR128X:$src)),
9136           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
9137 def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9138           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9139 def : Pat<(v4f32 (fceil VR128X:$src)),
9140           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
9141 def : Pat<(v4f32 (frint VR128X:$src)),
9142           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9143 def : Pat<(v4f32 (ftrunc VR128X:$src)),
9144           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
9145
9146 def : Pat<(v2f64 (ffloor VR128X:$src)),
9147           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
9148 def : Pat<(v2f64 (fnearbyint VR128X:$src)),
9149           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
9150 def : Pat<(v2f64 (fceil VR128X:$src)),
9151           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
9152 def : Pat<(v2f64 (frint VR128X:$src)),
9153           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
9154 def : Pat<(v2f64 (ftrunc VR128X:$src)),
9155           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
9156
9157 def : Pat<(v8f32 (ffloor VR256X:$src)),
9158           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
9159 def : Pat<(v8f32 (fnearbyint VR256X:$src)),
9160           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
9161 def : Pat<(v8f32 (fceil VR256X:$src)),
9162           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
9163 def : Pat<(v8f32 (frint VR256X:$src)),
9164           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
9165 def : Pat<(v8f32 (ftrunc VR256X:$src)),
9166           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
9167
9168 def : Pat<(v4f64 (ffloor VR256X:$src)),
9169           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
9170 def : Pat<(v4f64 (fnearbyint VR256X:$src)),
9171           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
9172 def : Pat<(v4f64 (fceil VR256X:$src)),
9173           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
9174 def : Pat<(v4f64 (frint VR256X:$src)),
9175           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
9176 def : Pat<(v4f64 (ftrunc VR256X:$src)),
9177           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
9178 }
9179
9180 multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
9181                                    AVX512VLVectorVTInfo _, bits<8> opc>{
9182   let Predicates = [HasAVX512] in {
9183     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info512>, EVEX_V512;
9184
9185   }
9186   let Predicates = [HasAVX512, HasVLX] in {
9187      defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info256>, EVEX_V256;
9188   }
9189 }
9190
9191 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
9192       avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9193 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
9194       avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9195 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
9196       avx512vl_i32_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9197 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
9198       avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9199
9200 let Predicates = [HasAVX512] in {
9201 // Provide fallback in case the load node that is used in the broadcast
9202 // patterns above is used by additional users, which prevents the pattern
9203 // selection.
9204 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9205           (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9206                           (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9207                           0)>;
9208 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9209           (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9210                           (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9211                           0)>;
9212
9213 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9214           (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9215                           (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9216                           0)>;
9217 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9218           (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9219                           (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9220                           0)>;
9221
9222 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9223           (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9224                           (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9225                           0)>;
9226
9227 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9228           (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9229                           (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9230                           0)>;
9231 }
9232
9233 multiclass avx512_valign<string OpcodeStr, OpndItins itins,
9234                          AVX512VLVectorVTInfo VTInfo_I> {
9235   defm NAME:       avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
9236                            AVX512AIi8Base, EVEX_4V;
9237 }
9238
9239 defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
9240                                                   EVEX_CD8<32, CD8VF>;
9241 defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
9242                                                   EVEX_CD8<64, CD8VF>, VEX_W;
9243
9244 defm VPALIGNR:   avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
9245                                           avx512vl_i8_info, avx512vl_i8_info>,
9246                 EVEX_CD8<8, CD8VF>;
9247
9248 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
9249 // into vpalignr.
9250 def ValignqImm32XForm : SDNodeXForm<imm, [{
9251   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
9252 }]>;
9253 def ValignqImm8XForm : SDNodeXForm<imm, [{
9254   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
9255 }]>;
9256 def ValigndImm8XForm : SDNodeXForm<imm, [{
9257   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
9258 }]>;
9259
9260 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
9261                                         X86VectorVTInfo From, X86VectorVTInfo To,
9262                                         SDNodeXForm ImmXForm> {
9263   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9264                             (bitconvert
9265                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9266                                               imm:$src3))),
9267                             To.RC:$src0)),
9268             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
9269                                                   To.RC:$src1, To.RC:$src2,
9270                                                   (ImmXForm imm:$src3))>;
9271
9272   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9273                             (bitconvert
9274                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9275                                               imm:$src3))),
9276                             To.ImmAllZerosV)),
9277             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
9278                                                    To.RC:$src1, To.RC:$src2,
9279                                                    (ImmXForm imm:$src3))>;
9280
9281   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9282                             (bitconvert
9283                              (From.VT (OpNode From.RC:$src1,
9284                                       (bitconvert (To.LdFrag addr:$src2)),
9285                                       imm:$src3))),
9286                             To.RC:$src0)),
9287             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
9288                                                   To.RC:$src1, addr:$src2,
9289                                                   (ImmXForm imm:$src3))>;
9290
9291   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9292                             (bitconvert
9293                              (From.VT (OpNode From.RC:$src1,
9294                                       (bitconvert (To.LdFrag addr:$src2)),
9295                                       imm:$src3))),
9296                             To.ImmAllZerosV)),
9297             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
9298                                                    To.RC:$src1, addr:$src2,
9299                                                    (ImmXForm imm:$src3))>;
9300 }
9301
9302 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
9303                                            X86VectorVTInfo From,
9304                                            X86VectorVTInfo To,
9305                                            SDNodeXForm ImmXForm> :
9306       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
9307   def : Pat<(From.VT (OpNode From.RC:$src1,
9308                              (bitconvert (To.VT (X86VBroadcast
9309                                                 (To.ScalarLdFrag addr:$src2)))),
9310                              imm:$src3)),
9311             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
9312                                                   (ImmXForm imm:$src3))>;
9313
9314   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9315                             (bitconvert
9316                              (From.VT (OpNode From.RC:$src1,
9317                                       (bitconvert
9318                                        (To.VT (X86VBroadcast
9319                                                (To.ScalarLdFrag addr:$src2)))),
9320                                       imm:$src3))),
9321                             To.RC:$src0)),
9322             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
9323                                                    To.RC:$src1, addr:$src2,
9324                                                    (ImmXForm imm:$src3))>;
9325
9326   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9327                             (bitconvert
9328                              (From.VT (OpNode From.RC:$src1,
9329                                       (bitconvert
9330                                        (To.VT (X86VBroadcast
9331                                                (To.ScalarLdFrag addr:$src2)))),
9332                                       imm:$src3))),
9333                             To.ImmAllZerosV)),
9334             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
9335                                                     To.RC:$src1, addr:$src2,
9336                                                     (ImmXForm imm:$src3))>;
9337 }
9338
9339 let Predicates = [HasAVX512] in {
9340   // For 512-bit we lower to the widest element type we can. So we only need
9341   // to handle converting valignq to valignd.
9342   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
9343                                          v16i32_info, ValignqImm32XForm>;
9344 }
9345
9346 let Predicates = [HasVLX] in {
9347   // For 128-bit we lower to the widest element type we can. So we only need
9348   // to handle converting valignq to valignd.
9349   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
9350                                          v4i32x_info, ValignqImm32XForm>;
9351   // For 256-bit we lower to the widest element type we can. So we only need
9352   // to handle converting valignq to valignd.
9353   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
9354                                          v8i32x_info, ValignqImm32XForm>;
9355 }
9356
9357 let Predicates = [HasVLX, HasBWI] in {
9358   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
9359   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
9360                                       v16i8x_info, ValignqImm8XForm>;
9361   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
9362                                       v16i8x_info, ValigndImm8XForm>;
9363 }
9364
9365 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
9366                 SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
9367                 EVEX_CD8<8, CD8VF>;
9368
9369 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9370                            OpndItins itins, X86VectorVTInfo _> {
9371   let ExeDomain = _.ExeDomain in {
9372   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9373                     (ins _.RC:$src1), OpcodeStr,
9374                     "$src1", "$src1",
9375                     (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
9376                     Sched<[itins.Sched]>;
9377
9378   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9379                   (ins _.MemOp:$src1), OpcodeStr,
9380                   "$src1", "$src1",
9381                   (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
9382             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
9383             Sched<[itins.Sched.Folded]>;
9384   }
9385 }
9386
9387 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9388                             OpndItins itins, X86VectorVTInfo _> :
9389            avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
9390   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9391                   (ins _.ScalarMemOp:$src1), OpcodeStr,
9392                   "${src1}"##_.BroadcastStr,
9393                   "${src1}"##_.BroadcastStr,
9394                   (_.VT (OpNode (X86VBroadcast
9395                                     (_.ScalarLdFrag addr:$src1)))), itins.rm>,
9396              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
9397              Sched<[itins.Sched.Folded]>;
9398 }
9399
9400 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9401                               OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9402                               Predicate prd> {
9403   let Predicates = [prd] in
9404     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9405                              EVEX_V512;
9406
9407   let Predicates = [prd, HasVLX] in {
9408     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
9409                               EVEX_V256;
9410     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
9411                               EVEX_V128;
9412   }
9413 }
9414
9415 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9416                                OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9417                                Predicate prd> {
9418   let Predicates = [prd] in
9419     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9420                               EVEX_V512;
9421
9422   let Predicates = [prd, HasVLX] in {
9423     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
9424                                  EVEX_V256;
9425     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
9426                                  EVEX_V128;
9427   }
9428 }
9429
9430 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
9431                                  SDNode OpNode, OpndItins itins, Predicate prd> {
9432   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
9433                                avx512vl_i64_info, prd>, VEX_W;
9434   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
9435                                avx512vl_i32_info, prd>;
9436 }
9437
9438 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
9439                                  SDNode OpNode, OpndItins itins, Predicate prd> {
9440   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
9441                               avx512vl_i16_info, prd>, VEX_WIG;
9442   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
9443                               avx512vl_i8_info, prd>, VEX_WIG;
9444 }
9445
9446 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9447                                   bits<8> opc_d, bits<8> opc_q,
9448                                   string OpcodeStr, SDNode OpNode,
9449                                   OpndItins itins> {
9450   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
9451                                     HasAVX512>,
9452               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
9453                                     HasBWI>;
9454 }
9455
9456 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
9457
9458 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9459 let Predicates = [HasAVX512, NoVLX] in {
9460   def : Pat<(v4i64 (abs VR256X:$src)),
9461             (EXTRACT_SUBREG
9462                 (VPABSQZrr
9463                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9464              sub_ymm)>;
9465   def : Pat<(v2i64 (abs VR128X:$src)),
9466             (EXTRACT_SUBREG
9467                 (VPABSQZrr
9468                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9469              sub_xmm)>;
9470 }
9471
9472 // Use 512bit version to implement 128/256 bit.
9473 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
9474                                  AVX512VLVectorVTInfo _, Predicate prd> {
9475   let Predicates = [prd, NoVLX] in {
9476     def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9477               (EXTRACT_SUBREG
9478                 (!cast<Instruction>(InstrStr # "Zrr")
9479                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9480                                  _.info256.RC:$src1,
9481                                  _.info256.SubRegIdx)),
9482               _.info256.SubRegIdx)>;
9483
9484     def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9485               (EXTRACT_SUBREG
9486                 (!cast<Instruction>(InstrStr # "Zrr")
9487                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9488                                  _.info128.RC:$src1,
9489                                  _.info128.SubRegIdx)),
9490               _.info128.SubRegIdx)>;
9491   }
9492 }
9493
9494 // FIXME: Is there a better scheduler itinerary for VPLZCNT?
9495 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
9496                                         SSE_INTALU_ITINS_P, HasCDI>;
9497
9498 // FIXME: Is there a better scheduler itinerary for VPCONFLICT?
9499 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
9500                                         SSE_INTALU_ITINS_P, HasCDI>;
9501
9502 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
9503 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
9504 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
9505
9506 //===---------------------------------------------------------------------===//
9507 // Counts number of ones - VPOPCNTD and VPOPCNTQ
9508 //===---------------------------------------------------------------------===//
9509
9510 // FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
9511 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
9512                                      SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
9513
9514 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
9515 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
9516
9517 //===---------------------------------------------------------------------===//
9518 // Replicate Single FP - MOVSHDUP and MOVSLDUP
9519 //===---------------------------------------------------------------------===//
9520 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
9521                             OpndItins itins> {
9522   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
9523                                       avx512vl_f32_info, HasAVX512>, XS;
9524 }
9525
9526 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
9527 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
9528
9529 //===----------------------------------------------------------------------===//
9530 // AVX-512 - MOVDDUP
9531 //===----------------------------------------------------------------------===//
9532
9533 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
9534                               OpndItins itins, X86VectorVTInfo _> {
9535   let ExeDomain = _.ExeDomain in {
9536   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9537                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
9538                    (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
9539                    Sched<[itins.Sched]>;
9540   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9541                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9542                  (_.VT (OpNode (_.VT (scalar_to_vector
9543                                        (_.ScalarLdFrag addr:$src))))),
9544                  itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
9545                  Sched<[itins.Sched.Folded]>;
9546   }
9547 }
9548
9549 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9550                                  OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
9551
9552   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
9553
9554   let Predicates = [HasAVX512, HasVLX] in {
9555     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
9556                                EVEX_V256;
9557     defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
9558                                   EVEX_V128;
9559   }
9560 }
9561
9562 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
9563                           OpndItins itins> {
9564   defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
9565                                         avx512vl_f64_info>, XD, VEX_W;
9566 }
9567
9568 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
9569
9570 let Predicates = [HasVLX] in {
9571 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
9572           (VMOVDDUPZ128rm addr:$src)>;
9573 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9574           (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9575 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9576           (VMOVDDUPZ128rm addr:$src)>;
9577
9578 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9579                    (v2f64 VR128X:$src0)),
9580           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9581                            (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9582 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9583                    (bitconvert (v4i32 immAllZerosV))),
9584           (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9585
9586 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9587                    (v2f64 VR128X:$src0)),
9588           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9589 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9590                    (bitconvert (v4i32 immAllZerosV))),
9591           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9592
9593 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9594                    (v2f64 VR128X:$src0)),
9595           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9596 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9597                    (bitconvert (v4i32 immAllZerosV))),
9598           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9599 }
9600
9601 //===----------------------------------------------------------------------===//
9602 // AVX-512 - Unpack Instructions
9603 //===----------------------------------------------------------------------===//
9604 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9605                                  SSE_ALU_ITINS_S>;
9606 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9607                                  SSE_ALU_ITINS_S>;
9608
9609 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9610                                        SSE_INTALU_ITINS_P, HasBWI>;
9611 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9612                                        SSE_INTALU_ITINS_P, HasBWI>;
9613 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9614                                        SSE_INTALU_ITINS_P, HasBWI>;
9615 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9616                                        SSE_INTALU_ITINS_P, HasBWI>;
9617
9618 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9619                                        SSE_INTALU_ITINS_P, HasAVX512>;
9620 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9621                                        SSE_INTALU_ITINS_P, HasAVX512>;
9622 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9623                                        SSE_INTALU_ITINS_P, HasAVX512>;
9624 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9625                                        SSE_INTALU_ITINS_P, HasAVX512>;
9626
9627 //===----------------------------------------------------------------------===//
9628 // AVX-512 - Extract & Insert Integer Instructions
9629 //===----------------------------------------------------------------------===//
9630
9631 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9632                                                             X86VectorVTInfo _> {
9633   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9634               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9635               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9636               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
9637                        addr:$dst)]>,
9638               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd]>;
9639 }
9640
9641 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9642   let Predicates = [HasBWI] in {
9643     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9644                   (ins _.RC:$src1, u8imm:$src2),
9645                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9646                   [(set GR32orGR64:$dst,
9647                         (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
9648                   EVEX, TAPD, Sched<[WriteShuffle]>;
9649
9650     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9651   }
9652 }
9653
9654 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9655   let Predicates = [HasBWI] in {
9656     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9657                   (ins _.RC:$src1, u8imm:$src2),
9658                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9659                   [(set GR32orGR64:$dst,
9660                         (X86pextrw (_.VT _.RC:$src1), imm:$src2))],
9661                   IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>;
9662
9663     let hasSideEffects = 0 in
9664     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
9665                    (ins _.RC:$src1, u8imm:$src2),
9666                    OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
9667                    IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData<NAME#rr>,
9668                    Sched<[WriteShuffle]>;
9669
9670     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
9671   }
9672 }
9673
9674 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
9675                                                             RegisterClass GRC> {
9676   let Predicates = [HasDQI] in {
9677     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
9678                   (ins _.RC:$src1, u8imm:$src2),
9679                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9680                   [(set GRC:$dst,
9681                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
9682                   EVEX, TAPD, Sched<[WriteShuffle]>;
9683
9684     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
9685                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9686                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9687                 [(store (extractelt (_.VT _.RC:$src1),
9688                                     imm:$src2),addr:$dst)]>,
9689                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
9690                 Sched<[WriteShuffleLd]>;
9691   }
9692 }
9693
9694 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
9695 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
9696 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
9697 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
9698
9699 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9700                                             X86VectorVTInfo _, PatFrag LdFrag> {
9701   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
9702       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
9703       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9704       [(set _.RC:$dst,
9705           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
9706       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
9707 }
9708
9709 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9710                                             X86VectorVTInfo _, PatFrag LdFrag> {
9711   let Predicates = [HasBWI] in {
9712     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9713         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
9714         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9715         [(set _.RC:$dst,
9716             (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
9717         Sched<[WriteShuffle]>;
9718
9719     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
9720   }
9721 }
9722
9723 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
9724                                          X86VectorVTInfo _, RegisterClass GRC> {
9725   let Predicates = [HasDQI] in {
9726     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9727         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
9728         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9729         [(set _.RC:$dst,
9730             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
9731         EVEX_4V, TAPD, Sched<[WriteShuffle]>;
9732
9733     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
9734                                     _.ScalarLdFrag>, TAPD;
9735   }
9736 }
9737
9738 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
9739                                      extloadi8>, TAPD, VEX_WIG;
9740 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
9741                                      extloadi16>, PD, VEX_WIG;
9742 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
9743 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
9744
9745 //===----------------------------------------------------------------------===//
9746 // VSHUFPS - VSHUFPD Operations
9747 //===----------------------------------------------------------------------===//
9748
9749 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
9750                                                 AVX512VLVectorVTInfo VTInfo_FP>{
9751   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
9752                         SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
9753                         AVX512AIi8Base, EVEX_4V;
9754 }
9755
9756 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
9757 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
9758
9759 //===----------------------------------------------------------------------===//
9760 // AVX-512 - Byte shift Left/Right
9761 //===----------------------------------------------------------------------===//
9762
9763 let Sched = WriteVecShift in
9764 def AVX512_BYTESHIFT : OpndItins<
9765   IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI
9766 >;
9767
9768 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
9769                                Format MRMm, string OpcodeStr,
9770                                OpndItins itins, X86VectorVTInfo _>{
9771   def rr : AVX512<opc, MRMr,
9772              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
9773              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9774              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))],
9775              itins.rr>, Sched<[itins.Sched]>;
9776   def rm : AVX512<opc, MRMm,
9777            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
9778            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9779            [(set _.RC:$dst,(_.VT (OpNode
9780                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
9781                                  (i8 imm:$src2))))], itins.rm>,
9782            Sched<[itins.Sched.Folded, ReadAfterLd]>;
9783 }
9784
9785 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
9786                                    Format MRMm, string OpcodeStr,
9787                                    OpndItins itins, Predicate prd>{
9788   let Predicates = [prd] in
9789     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9790                                  OpcodeStr, itins, v64i8_info>, EVEX_V512;
9791   let Predicates = [prd, HasVLX] in {
9792     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9793                                     OpcodeStr, itins, v32i8x_info>, EVEX_V256;
9794     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9795                                     OpcodeStr, itins, v16i8x_info>, EVEX_V128;
9796   }
9797 }
9798 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
9799                                        AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9800                                        EVEX_4V, VEX_WIG;
9801 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
9802                                        AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9803                                        EVEX_4V, VEX_WIG;
9804
9805
9806 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
9807                                 string OpcodeStr, OpndItins itins,
9808                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
9809   def rr : AVX512BI<opc, MRMSrcReg,
9810              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
9811              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9812              [(set _dst.RC:$dst,(_dst.VT
9813                                 (OpNode (_src.VT _src.RC:$src1),
9814                                         (_src.VT _src.RC:$src2))))], itins.rr>,
9815              Sched<[itins.Sched]>;
9816   def rm : AVX512BI<opc, MRMSrcMem,
9817            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
9818            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9819            [(set _dst.RC:$dst,(_dst.VT
9820                               (OpNode (_src.VT _src.RC:$src1),
9821                               (_src.VT (bitconvert
9822                                         (_src.LdFrag addr:$src2))))))], itins.rm>,
9823            Sched<[itins.Sched.Folded, ReadAfterLd]>;
9824 }
9825
9826 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
9827                                     string OpcodeStr, OpndItins itins,
9828                                     Predicate prd> {
9829   let Predicates = [prd] in
9830     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info,
9831                                   v64i8_info>, EVEX_V512;
9832   let Predicates = [prd, HasVLX] in {
9833     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info,
9834                                     v32i8x_info>, EVEX_V256;
9835     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info,
9836                                     v16i8x_info>, EVEX_V128;
9837   }
9838 }
9839
9840 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
9841                                         SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG;
9842
9843 // Transforms to swizzle an immediate to enable better matching when
9844 // memory operand isn't in the right place.
9845 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
9846   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
9847   uint8_t Imm = N->getZExtValue();
9848   // Swap bits 1/4 and 3/6.
9849   uint8_t NewImm = Imm & 0xa5;
9850   if (Imm & 0x02) NewImm |= 0x10;
9851   if (Imm & 0x10) NewImm |= 0x02;
9852   if (Imm & 0x08) NewImm |= 0x40;
9853   if (Imm & 0x40) NewImm |= 0x08;
9854   return getI8Imm(NewImm, SDLoc(N));
9855 }]>;
9856 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
9857   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9858   uint8_t Imm = N->getZExtValue();
9859   // Swap bits 2/4 and 3/5.
9860   uint8_t NewImm = Imm & 0xc3;
9861   if (Imm & 0x04) NewImm |= 0x10;
9862   if (Imm & 0x10) NewImm |= 0x04;
9863   if (Imm & 0x08) NewImm |= 0x20;
9864   if (Imm & 0x20) NewImm |= 0x08;
9865   return getI8Imm(NewImm, SDLoc(N));
9866 }]>;
9867 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
9868   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9869   uint8_t Imm = N->getZExtValue();
9870   // Swap bits 1/2 and 5/6.
9871   uint8_t NewImm = Imm & 0x99;
9872   if (Imm & 0x02) NewImm |= 0x04;
9873   if (Imm & 0x04) NewImm |= 0x02;
9874   if (Imm & 0x20) NewImm |= 0x40;
9875   if (Imm & 0x40) NewImm |= 0x20;
9876   return getI8Imm(NewImm, SDLoc(N));
9877 }]>;
9878 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
9879   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
9880   uint8_t Imm = N->getZExtValue();
9881   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
9882   uint8_t NewImm = Imm & 0x81;
9883   if (Imm & 0x02) NewImm |= 0x04;
9884   if (Imm & 0x04) NewImm |= 0x10;
9885   if (Imm & 0x08) NewImm |= 0x40;
9886   if (Imm & 0x10) NewImm |= 0x02;
9887   if (Imm & 0x20) NewImm |= 0x08;
9888   if (Imm & 0x40) NewImm |= 0x20;
9889   return getI8Imm(NewImm, SDLoc(N));
9890 }]>;
9891 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
9892   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
9893   uint8_t Imm = N->getZExtValue();
9894   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
9895   uint8_t NewImm = Imm & 0x81;
9896   if (Imm & 0x02) NewImm |= 0x10;
9897   if (Imm & 0x04) NewImm |= 0x02;
9898   if (Imm & 0x08) NewImm |= 0x20;
9899   if (Imm & 0x10) NewImm |= 0x04;
9900   if (Imm & 0x20) NewImm |= 0x40;
9901   if (Imm & 0x40) NewImm |= 0x08;
9902   return getI8Imm(NewImm, SDLoc(N));
9903 }]>;
9904
9905 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
9906                           OpndItins itins, X86VectorVTInfo _>{
9907   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
9908   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
9909                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
9910                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
9911                       (OpNode (_.VT _.RC:$src1),
9912                               (_.VT _.RC:$src2),
9913                               (_.VT _.RC:$src3),
9914                               (i8 imm:$src4)), itins.rr, 1, 1>,
9915                       AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
9916   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
9917                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
9918                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
9919                     (OpNode (_.VT _.RC:$src1),
9920                             (_.VT _.RC:$src2),
9921                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
9922                             (i8 imm:$src4)), itins.rm, 1, 0>,
9923                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
9924                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
9925   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
9926                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
9927                     OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
9928                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
9929                     (OpNode (_.VT _.RC:$src1),
9930                             (_.VT _.RC:$src2),
9931                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
9932                             (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
9933                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
9934                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
9935   }// Constraints = "$src1 = $dst"
9936
9937   // Additional patterns for matching passthru operand in other positions.
9938   def : Pat<(_.VT (vselect _.KRCWM:$mask,
9939                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9940                    _.RC:$src1)),
9941             (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
9942              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9943   def : Pat<(_.VT (vselect _.KRCWM:$mask,
9944                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
9945                    _.RC:$src1)),
9946             (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
9947              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
9948
9949   // Additional patterns for matching loads in other positions.
9950   def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
9951                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
9952             (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
9953                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9954   def : Pat<(_.VT (OpNode _.RC:$src1,
9955                           (bitconvert (_.LdFrag addr:$src3)),
9956                           _.RC:$src2, (i8 imm:$src4))),
9957             (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
9958                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9959
9960   // Additional patterns for matching zero masking with loads in other
9961   // positions.
9962   def : Pat<(_.VT (vselect _.KRCWM:$mask,
9963                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
9964                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9965                    _.ImmAllZerosV)),
9966             (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
9967              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9968   def : Pat<(_.VT (vselect _.KRCWM:$mask,
9969                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
9970                     _.RC:$src2, (i8 imm:$src4)),
9971                    _.ImmAllZerosV)),
9972             (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
9973              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9974
9975   // Additional patterns for matching masked loads with different
9976   // operand orders.
9977   def : Pat<(_.VT (vselect _.KRCWM:$mask,
9978                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
9979                     _.RC:$src2, (i8 imm:$src4)),
9980                    _.RC:$src1)),
9981             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9982              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9983   def : Pat<(_.VT (vselect _.KRCWM:$mask,
9984                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
9985                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9986                    _.RC:$src1)),
9987             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9988              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9989   def : Pat<(_.VT (vselect _.KRCWM:$mask,
9990                    (OpNode _.RC:$src2, _.RC:$src1,
9991                     (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
9992                    _.RC:$src1)),
9993             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9994              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
9995   def : Pat<(_.VT (vselect _.KRCWM:$mask,
9996                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
9997                     _.RC:$src1, (i8 imm:$src4)),
9998                    _.RC:$src1)),
9999             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10000              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10001   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10002                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
10003                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10004                    _.RC:$src1)),
10005             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10006              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
10007
10008   // Additional patterns for matching broadcasts in other positions.
10009   def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10010                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10011             (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10012                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10013   def : Pat<(_.VT (OpNode _.RC:$src1,
10014                           (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10015                           _.RC:$src2, (i8 imm:$src4))),
10016             (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10017                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10018
10019   // Additional patterns for matching zero masking with broadcasts in other
10020   // positions.
10021   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10022                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10023                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10024                    _.ImmAllZerosV)),
10025             (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10026              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10027              (VPTERNLOG321_imm8 imm:$src4))>;
10028   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10029                    (OpNode _.RC:$src1,
10030                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10031                     _.RC:$src2, (i8 imm:$src4)),
10032                    _.ImmAllZerosV)),
10033             (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10034              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10035              (VPTERNLOG132_imm8 imm:$src4))>;
10036
10037   // Additional patterns for matching masked broadcasts with different
10038   // operand orders.
10039   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10040                    (OpNode _.RC:$src1,
10041                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10042                     _.RC:$src2, (i8 imm:$src4)),
10043                    _.RC:$src1)),
10044             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10045              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10046   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10047                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10048                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10049                    _.RC:$src1)),
10050             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10051              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10052   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10053                    (OpNode _.RC:$src2, _.RC:$src1,
10054                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10055                     (i8 imm:$src4)), _.RC:$src1)),
10056             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10057              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10058   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10059                    (OpNode _.RC:$src2,
10060                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10061                     _.RC:$src1, (i8 imm:$src4)),
10062                    _.RC:$src1)),
10063             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10064              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10065   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10066                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10067                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10068                    _.RC:$src1)),
10069             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10070              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
10071 }
10072
10073 multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
10074                                  AVX512VLVectorVTInfo _> {
10075   let Predicates = [HasAVX512] in
10076     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
10077   let Predicates = [HasAVX512, HasVLX] in {
10078     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
10079     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
10080   }
10081 }
10082
10083 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
10084                                         avx512vl_i32_info>;
10085 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
10086                                         avx512vl_i64_info>, VEX_W;
10087
10088 //===----------------------------------------------------------------------===//
10089 // AVX-512 - FixupImm
10090 //===----------------------------------------------------------------------===//
10091
10092 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
10093                                   OpndItins itins, X86VectorVTInfo _>{
10094   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
10095     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10096                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10097                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10098                         (OpNode (_.VT _.RC:$src1),
10099                                 (_.VT _.RC:$src2),
10100                                 (_.IntVT _.RC:$src3),
10101                                 (i32 imm:$src4),
10102                                 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
10103     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10104                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
10105                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10106                       (OpNode (_.VT _.RC:$src1),
10107                               (_.VT _.RC:$src2),
10108                               (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
10109                               (i32 imm:$src4),
10110                               (i32 FROUND_CURRENT)), itins.rm>,
10111                       Sched<[itins.Sched.Folded, ReadAfterLd]>;
10112     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10113                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10114                     OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10115                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
10116                       (OpNode (_.VT _.RC:$src1),
10117                               (_.VT _.RC:$src2),
10118                               (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10119                               (i32 imm:$src4),
10120                               (i32 FROUND_CURRENT)), itins.rm>,
10121                     EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10122   } // Constraints = "$src1 = $dst"
10123 }
10124
10125 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
10126                                       SDNode OpNode, OpndItins itins,
10127                                       X86VectorVTInfo _>{
10128 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
10129   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10130                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10131                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10132                       "$src2, $src3, {sae}, $src4",
10133                       (OpNode (_.VT _.RC:$src1),
10134                                 (_.VT _.RC:$src2),
10135                                 (_.IntVT _.RC:$src3),
10136                                 (i32 imm:$src4),
10137                                 (i32 FROUND_NO_EXC)), itins.rr>,
10138                       EVEX_B, Sched<[itins.Sched]>;
10139   }
10140 }
10141
10142 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
10143                                   OpndItins itins, X86VectorVTInfo _,
10144                                   X86VectorVTInfo _src3VT> {
10145   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10146       ExeDomain = _.ExeDomain in {
10147     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10148                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10149                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10150                       (OpNode (_.VT _.RC:$src1),
10151                               (_.VT _.RC:$src2),
10152                               (_src3VT.VT _src3VT.RC:$src3),
10153                               (i32 imm:$src4),
10154                               (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
10155     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10156                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10157                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10158                       "$src2, $src3, {sae}, $src4",
10159                       (OpNode (_.VT _.RC:$src1),
10160                               (_.VT _.RC:$src2),
10161                               (_src3VT.VT _src3VT.RC:$src3),
10162                               (i32 imm:$src4),
10163                               (i32 FROUND_NO_EXC)), itins.rm>,
10164                       EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10165     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10166                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10167                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10168                      (OpNode (_.VT _.RC:$src1),
10169                              (_.VT _.RC:$src2),
10170                              (_src3VT.VT (scalar_to_vector
10171                                        (_src3VT.ScalarLdFrag addr:$src3))),
10172                              (i32 imm:$src4),
10173                              (i32 FROUND_CURRENT)), itins.rm>,
10174                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
10175   }
10176 }
10177
10178 multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> {
10179   let Predicates = [HasAVX512] in
10180     defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10181                                        _Vec.info512>,
10182                 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins,
10183                                 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
10184   let Predicates = [HasAVX512, HasVLX] in {
10185     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10186                             _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
10187     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10188                             _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
10189   }
10190 }
10191
10192 defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10193                                           SSE_ALU_F32S, f32x_info, v4i32x_info>,
10194                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10195 defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10196                                           SSE_ALU_F64S, f64x_info, v2i64x_info>,
10197                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10198 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>,
10199                          EVEX_CD8<32, CD8VF>;
10200 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>,
10201                          EVEX_CD8<64, CD8VF>, VEX_W;
10202
10203
10204
10205 // Patterns used to select SSE scalar fp arithmetic instructions from
10206 // either:
10207 //
10208 // (1) a scalar fp operation followed by a blend
10209 //
10210 // The effect is that the backend no longer emits unnecessary vector
10211 // insert instructions immediately after SSE scalar fp instructions
10212 // like addss or mulss.
10213 //
10214 // For example, given the following code:
10215 //   __m128 foo(__m128 A, __m128 B) {
10216 //     A[0] += B[0];
10217 //     return A;
10218 //   }
10219 //
10220 // Previously we generated:
10221 //   addss %xmm0, %xmm1
10222 //   movss %xmm1, %xmm0
10223 //
10224 // We now generate:
10225 //   addss %xmm1, %xmm0
10226 //
10227 // (2) a vector packed single/double fp operation followed by a vector insert
10228 //
10229 // The effect is that the backend converts the packed fp instruction
10230 // followed by a vector insert into a single SSE scalar fp instruction.
10231 //
10232 // For example, given the following code:
10233 //   __m128 foo(__m128 A, __m128 B) {
10234 //     __m128 C = A + B;
10235 //     return (__m128) {c[0], a[1], a[2], a[3]};
10236 //   }
10237 //
10238 // Previously we generated:
10239 //   addps %xmm0, %xmm1
10240 //   movss %xmm1, %xmm0
10241 //
10242 // We now generate:
10243 //   addss %xmm1, %xmm0
10244
10245 // TODO: Some canonicalization in lowering would simplify the number of
10246 // patterns we have to try to match.
10247 multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10248   let Predicates = [HasAVX512] in {
10249     // extracted scalar math op with insert via movss
10250     def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10251           (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10252           FR32X:$src))))),
10253       (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
10254           (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
10255
10256     // vector math op with insert via movss
10257     def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10258           (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
10259       (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10260
10261     // extracted masked scalar math op with insert via movss
10262     def : Pat<(X86Movss (v4f32 VR128X:$src1),
10263                (scalar_to_vector
10264                 (X86selects VK1WM:$mask,
10265                             (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10266                                 FR32X:$src2),
10267                             FR32X:$src0))),
10268       (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10269           VK1WM:$mask, v4f32:$src1,
10270           (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
10271   }
10272 }
10273
10274 defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10275 defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10276 defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10277 defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10278
10279 multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10280   let Predicates = [HasAVX512] in {
10281     // extracted scalar math op with insert via movsd
10282     def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10283           (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10284           FR64X:$src))))),
10285       (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
10286           (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10287
10288     // vector math op with insert via movsd
10289     def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10290           (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
10291       (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10292
10293     // extracted masked scalar math op with insert via movss
10294     def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10295                (scalar_to_vector
10296                 (X86selects VK1WM:$mask,
10297                             (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10298                                 FR64X:$src2),
10299                             FR64X:$src0))),
10300       (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10301           VK1WM:$mask, v2f64:$src1,
10302           (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
10303   }
10304 }
10305
10306 defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10307 defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10308 defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10309 defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
10310
10311 //===----------------------------------------------------------------------===//
10312 // AES instructions
10313 //===----------------------------------------------------------------------===//
10314
10315 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
10316   let Predicates = [HasVLX, HasVAES] in {
10317     defm Z128 : AESI_binop_rm_int<Op, OpStr,
10318                                   !cast<Intrinsic>(IntPrefix),
10319                                   loadv2i64, 0, VR128X, i128mem>,
10320                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
10321     defm Z256 : AESI_binop_rm_int<Op, OpStr,
10322                                   !cast<Intrinsic>(IntPrefix##"_256"),
10323                                   loadv4i64, 0, VR256X, i256mem>,
10324                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
10325     }
10326     let Predicates = [HasAVX512, HasVAES] in
10327     defm Z    : AESI_binop_rm_int<Op, OpStr,
10328                                   !cast<Intrinsic>(IntPrefix##"_512"),
10329                                   loadv8i64, 0, VR512, i512mem>,
10330                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
10331 }
10332
10333 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
10334 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
10335 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
10336 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
10337
10338 //===----------------------------------------------------------------------===//
10339 // PCLMUL instructions - Carry less multiplication
10340 //===----------------------------------------------------------------------===//
10341
10342 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
10343 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
10344                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
10345
10346 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
10347 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
10348                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
10349
10350 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
10351                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
10352                                 EVEX_CD8<64, CD8VF>, VEX_WIG;
10353 }
10354
10355 // Aliases
10356 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
10357 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
10358 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
10359
10360 //===----------------------------------------------------------------------===//
10361 // VBMI2
10362 //===----------------------------------------------------------------------===//
10363
10364 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
10365                               OpndItins itins, X86VectorVTInfo VTI> {
10366   let Constraints = "$src1 = $dst",
10367       ExeDomain   = VTI.ExeDomain in {
10368     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10369                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10370                 "$src3, $src2", "$src2, $src3",
10371                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
10372                 itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
10373     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10374                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10375                 "$src3, $src2", "$src2, $src3",
10376                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10377                         (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
10378                 itins.rm>, AVX512FMA3Base,
10379                 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10380   }
10381 }
10382
10383 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
10384                                OpndItins itins, X86VectorVTInfo VTI>
10385          : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
10386   let Constraints = "$src1 = $dst",
10387       ExeDomain   = VTI.ExeDomain in
10388   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10389               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
10390               "${src3}"##VTI.BroadcastStr##", $src2",
10391               "$src2, ${src3}"##VTI.BroadcastStr,
10392               (OpNode VTI.RC:$src1, VTI.RC:$src2,
10393                (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
10394               itins.rm>, AVX512FMA3Base, EVEX_B,
10395               Sched<[itins.Sched.Folded, ReadAfterLd]>;
10396 }
10397
10398 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
10399                                      OpndItins itins, AVX512VLVectorVTInfo VTI> {
10400   let Predicates = [HasVBMI2] in
10401   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
10402   let Predicates = [HasVBMI2, HasVLX] in {
10403     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10404     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
10405   }
10406 }
10407
10408 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
10409                                       OpndItins itins, AVX512VLVectorVTInfo VTI> {
10410   let Predicates = [HasVBMI2] in
10411   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
10412   let Predicates = [HasVBMI2, HasVLX] in {
10413     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10414     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
10415   }
10416 }
10417 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
10418                            SDNode OpNode, OpndItins itins> {
10419   defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
10420              avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
10421   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
10422              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10423   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
10424              avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
10425 }
10426
10427 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
10428                            SDNode OpNode, OpndItins itins> {
10429   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
10430              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
10431              VEX_W, EVEX_CD8<16, CD8VF>;
10432   defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
10433              OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10434   defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
10435              itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10436 }
10437
10438 // Concat & Shift
10439 defm VPSHLDV     : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
10440 defm VPSHRDV     : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
10441 defm VPSHLD      : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
10442 defm VPSHRD      : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
10443
10444 // Compress
10445 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS,
10446                                          avx512vl_i8_info, HasVBMI2>, EVEX;
10447 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS,
10448                                           avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
10449 // Expand
10450 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND,
10451                                       avx512vl_i8_info, HasVBMI2>, EVEX;
10452 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND,
10453                                       avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
10454
10455 //===----------------------------------------------------------------------===//
10456 // VNNI
10457 //===----------------------------------------------------------------------===//
10458
10459 let Constraints = "$src1 = $dst" in
10460 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
10461                     OpndItins itins, X86VectorVTInfo VTI> {
10462   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10463                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10464                                    "$src3, $src2", "$src2, $src3",
10465                                    (VTI.VT (OpNode VTI.RC:$src1,
10466                                             VTI.RC:$src2, VTI.RC:$src3)),
10467                                    itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>;
10468   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10469                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10470                                    "$src3, $src2", "$src2, $src3",
10471                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10472                                             (VTI.VT (bitconvert
10473                                                      (VTI.LdFrag addr:$src3))))),
10474                                    itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
10475                                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
10476   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10477                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
10478                                    OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
10479                                    "$src2, ${src3}"##VTI.BroadcastStr,
10480                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
10481                                     (VTI.VT (X86VBroadcast
10482                                              (VTI.ScalarLdFrag addr:$src3)))),
10483                                    itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
10484                                    T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10485 }
10486
10487 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> {
10488   let Predicates = [HasVNNI] in
10489   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512;
10490   let Predicates = [HasVNNI, HasVLX] in {
10491     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256;
10492     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128;
10493   }
10494 }
10495
10496 // FIXME: Is there a better scheduler itinerary for VPDP?
10497 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>;
10498 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>;
10499 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>;
10500 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
10501
10502 //===----------------------------------------------------------------------===//
10503 // Bit Algorithms
10504 //===----------------------------------------------------------------------===//
10505
10506 // FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
10507 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
10508                                    avx512vl_i8_info, HasBITALG>;
10509 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
10510                                    avx512vl_i16_info, HasBITALG>, VEX_W;
10511
10512 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
10513 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
10514
10515 multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
10516   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
10517                                 (ins VTI.RC:$src1, VTI.RC:$src2),
10518                                 "vpshufbitqmb",
10519                                 "$src2, $src1", "$src1, $src2",
10520                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
10521                                 (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD,
10522                                 Sched<[itins.Sched]>;
10523   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
10524                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
10525                                 "vpshufbitqmb",
10526                                 "$src2, $src1", "$src1, $src2",
10527                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
10528                                 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))),
10529                                 itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
10530                                 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10531 }
10532
10533 multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> {
10534   let Predicates = [HasBITALG] in
10535   defm Z      : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512;
10536   let Predicates = [HasBITALG, HasVLX] in {
10537     defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256;
10538     defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128;
10539   }
10540 }
10541
10542 // FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB?
10543 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>;
10544
10545 //===----------------------------------------------------------------------===//
10546 // GFNI
10547 //===----------------------------------------------------------------------===//
10548
10549 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
10550   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10551   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info,
10552                                 SSE_INTALU_ITINS_P, 1>, EVEX_V512;
10553   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10554     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info,
10555                                 SSE_INTALU_ITINS_P, 1>, EVEX_V256;
10556     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info,
10557                                 SSE_INTALU_ITINS_P, 1>, EVEX_V128;
10558   }
10559 }
10560
10561 defm GF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>,
10562                  EVEX_CD8<8, CD8VF>, T8PD;
10563
10564 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
10565                                       OpndItins itins, X86VectorVTInfo VTI,
10566                                       X86VectorVTInfo BcstVTI>
10567            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
10568   let ExeDomain = VTI.ExeDomain in
10569   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10570                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
10571                 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
10572                 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
10573                 (OpNode (VTI.VT VTI.RC:$src1),
10574                  (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
10575                  (i8 imm:$src3)), itins.rm>, EVEX_B,
10576                  Sched<[itins.Sched.Folded, ReadAfterLd]>;
10577 }
10578
10579 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
10580                                      OpndItins itins> {
10581   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10582   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
10583                                            v8i64_info>, EVEX_V512;
10584   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10585     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
10586                                            v4i64x_info>, EVEX_V256;
10587     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
10588                                            v2i64x_info>, EVEX_V128;
10589   }
10590 }
10591
10592 defm GF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
10593                         X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
10594                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10595 defm GF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
10596                         X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
10597                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10598