]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/X86/X86InstrAVX512.td
Merge libc++ trunk r321414 to contrib/libc++.
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / X86 / X86InstrAVX512.td
1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
13 //
14 //===----------------------------------------------------------------------===//
15
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT).  These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22                       string suffix = ""> {
23   RegisterClass RC = rc;
24   ValueType EltVT = eltvt;
25   int NumElts = numelts;
26
27   // Corresponding mask register class.
28   RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30   // Corresponding write-mask register class.
31   RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
33   // The mask VT.
34   ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
35
36   // Suffix used in the instruction mnemonic.
37   string Suffix = suffix;
38
39   // VTName is a string name for vector VT. For vector types it will be
40   // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41   // It is a little bit complex for scalar types, where NumElts = 1.
42   // In this case we build v4f32 or v2f64
43   string VTName = "v" # !if (!eq (NumElts, 1),
44                         !if (!eq (EltVT.Size, 32), 4,
45                         !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
46
47   // The vector VT.
48   ValueType VT = !cast<ValueType>(VTName);
49
50   string EltTypeName = !cast<string>(EltVT);
51   // Size of the element type in bits, e.g. 32 for v16i32.
52   string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53   int EltSize = EltVT.Size;
54
55   // "i" for integer types and "f" for floating-point types
56   string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
57
58   // Size of RC in bits, e.g. 512 for VR512.
59   int Size = VT.Size;
60
61   // The corresponding memory operand, e.g. i512mem for VR512.
62   X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63   X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64   // FP scalar memory operand for intrinsics - ssmem/sdmem.
65   Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66                            !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
67
68   // Load patterns
69   // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70   //       due to load promotion during legalization
71   PatFrag LdFrag = !cast<PatFrag>("load" #
72                                   !if (!eq (TypeVariantName, "i"),
73                                        !if (!eq (Size, 128), "v2i64",
74                                        !if (!eq (Size, 256), "v4i64",
75                                        !if (!eq (Size, 512), "v8i64",
76                                             VTName))), VTName));
77
78   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79                                          !if (!eq (TypeVariantName, "i"),
80                                                !if (!eq (Size, 128), "v2i64",
81                                                !if (!eq (Size, 256), "v4i64",
82                                                !if (!eq (Size, 512), "v8i64",
83                                                    VTName))), VTName));
84
85   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
86
87   ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88                                           !cast<ComplexPattern>("sse_load_f32"),
89                                     !if (!eq (EltTypeName, "f64"),
90                                           !cast<ComplexPattern>("sse_load_f64"),
91                                     ?));
92
93   // The corresponding float type, e.g. v16f32 for v16i32
94   // Note: For EltSize < 32, FloatVT is illegal and TableGen
95   //       fails to compile, so we choose FloatVT = VT
96   ValueType FloatVT = !cast<ValueType>(
97                         !if (!eq (!srl(EltSize,5),0),
98                              VTName,
99                              !if (!eq(TypeVariantName, "i"),
100                                   "v" # NumElts # "f" # EltSize,
101                                   VTName)));
102
103   ValueType IntVT = !cast<ValueType>(
104                         !if (!eq (!srl(EltSize,5),0),
105                              VTName,
106                              !if (!eq(TypeVariantName, "f"),
107                                   "v" # NumElts # "i" # EltSize,
108                                   VTName)));
109   // The string to specify embedded broadcast in assembly.
110   string BroadcastStr = "{1to" # NumElts # "}";
111
112   // 8-bit compressed displacement tuple/subvector format.  This is only
113   // defined for NumElts <= 8.
114   CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115                                !cast<CD8VForm>("CD8VT" # NumElts), ?);
116
117   SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118                           !if (!eq (Size, 256), sub_ymm, ?));
119
120   Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121                      !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
122                      SSEPackedInt));
123
124   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
125
126   // A vector tye of the same width with element type i64. This is used to
127   // create patterns for logic ops.
128   ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
129
130   // A vector type of the same width with element type i32.  This is used to
131   // create the canonical constant zero node ImmAllZerosV.
132   ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133   dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
134
135   string ZSuffix = !if (!eq (Size, 128), "Z128",
136                    !if (!eq (Size, 256), "Z256", "Z"));
137 }
138
139 def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
140 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
141 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142 def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
143 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144 def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
145
146 // "x" in v32i8x_info means RC = VR256X
147 def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
148 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149 def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
150 def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
151 def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
152 def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
153
154 def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
155 def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
156 def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
157 def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
158 def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
159 def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
160
161 // We map scalar types to the smallest (128-bit) vector type
162 // with the appropriate element type. This allows to use the same masking logic.
163 def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
164 def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
165 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
166 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
167
168 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169                            X86VectorVTInfo i128> {
170   X86VectorVTInfo info512 = i512;
171   X86VectorVTInfo info256 = i256;
172   X86VectorVTInfo info128 = i128;
173 }
174
175 def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
176                                              v16i8x_info>;
177 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
178                                              v8i16x_info>;
179 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
180                                              v4i32x_info>;
181 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
182                                              v2i64x_info>;
183 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
184                                              v4f32x_info>;
185 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
186                                              v2f64x_info>;
187
188 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
189                        ValueType _vt> {
190   RegisterClass KRC = _krc;
191   RegisterClass KRCWM = _krcwm;
192   ValueType KVT = _vt;
193 }
194
195 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
196 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
202
203 // This multiclass generates the masking variants from the non-masking
204 // variant.  It only provides the assembly pieces for the masking variants.
205 // It assumes custom ISel patterns for masking which can be provided as
206 // template arguments.
207 multiclass AVX512_maskable_custom<bits<8> O, Format F,
208                                   dag Outs,
209                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210                                   string OpcodeStr,
211                                   string AttSrcAsm, string IntelSrcAsm,
212                                   list<dag> Pattern,
213                                   list<dag> MaskingPattern,
214                                   list<dag> ZeroMaskingPattern,
215                                   InstrItinClass itin,
216                                   string MaskingConstraint = "",
217                                   bit IsCommutable = 0,
218                                   bit IsKCommutable = 0> {
219   let isCommutable = IsCommutable in
220     def NAME: AVX512<O, F, Outs, Ins,
221                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222                                      "$dst, "#IntelSrcAsm#"}",
223                        Pattern, itin>;
224
225   // Prefer over VMOV*rrk Pat<>
226   let isCommutable = IsKCommutable in
227     def NAME#k: AVX512<O, F, Outs, MaskingIns,
228                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
229                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
230                        MaskingPattern, itin>,
231               EVEX_K {
232       // In case of the 3src subclass this is overridden with a let.
233       string Constraints = MaskingConstraint;
234     }
235
236   // Zero mask does not add any restrictions to commute operands transformation.
237   // So, it is Ok to use IsCommutable instead of IsKCommutable.
238   let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
239     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
240                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
241                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
242                        ZeroMaskingPattern,
243                        itin>,
244               EVEX_KZ;
245 }
246
247
248 // Common base class of AVX512_maskable and AVX512_maskable_3src.
249 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
250                                   dag Outs,
251                                   dag Ins, dag MaskingIns, dag ZeroMaskingIns,
252                                   string OpcodeStr,
253                                   string AttSrcAsm, string IntelSrcAsm,
254                                   dag RHS, dag MaskingRHS,
255                                   InstrItinClass itin,
256                                   SDNode Select = vselect,
257                                   string MaskingConstraint = "",
258                                   bit IsCommutable = 0,
259                                   bit IsKCommutable = 0> :
260   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261                          AttSrcAsm, IntelSrcAsm,
262                          [(set _.RC:$dst, RHS)],
263                          [(set _.RC:$dst, MaskingRHS)],
264                          [(set _.RC:$dst,
265                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
266                          itin, MaskingConstraint, IsCommutable,
267                          IsKCommutable>;
268
269 // This multiclass generates the unconditional/non-masking, the masking and
270 // the zero-masking variant of the vector instruction.  In the masking case, the
271 // perserved vector elements come from a new dummy input operand tied to $dst.
272 // This version uses a separate dag for non-masking and masking.
273 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
274                            dag Outs, dag Ins, string OpcodeStr,
275                            string AttSrcAsm, string IntelSrcAsm,
276                            dag RHS, dag MaskRHS,
277                            InstrItinClass itin,
278                            bit IsCommutable = 0, bit IsKCommutable = 0,
279                            SDNode Select = vselect> :
280    AVX512_maskable_custom<O, F, Outs, Ins,
281                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282                           !con((ins _.KRCWM:$mask), Ins),
283                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
284                           [(set _.RC:$dst, RHS)],
285                           [(set _.RC:$dst,
286                               (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
287                           [(set _.RC:$dst,
288                               (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
289                           itin, "$src0 = $dst", IsCommutable, IsKCommutable>;
290
291 // This multiclass generates the unconditional/non-masking, the masking and
292 // the zero-masking variant of the vector instruction.  In the masking case, the
293 // perserved vector elements come from a new dummy input operand tied to $dst.
294 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
295                            dag Outs, dag Ins, string OpcodeStr,
296                            string AttSrcAsm, string IntelSrcAsm,
297                            dag RHS,
298                            InstrItinClass itin,
299                            bit IsCommutable = 0, bit IsKCommutable = 0,
300                            SDNode Select = vselect> :
301    AVX512_maskable_common<O, F, _, Outs, Ins,
302                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
303                           !con((ins _.KRCWM:$mask), Ins),
304                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
305                           (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin,
306                           Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
307
308 // This multiclass generates the unconditional/non-masking, the masking and
309 // the zero-masking variant of the scalar instruction.
310 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
311                            dag Outs, dag Ins, string OpcodeStr,
312                            string AttSrcAsm, string IntelSrcAsm,
313                            dag RHS,
314                            InstrItinClass itin,
315                            bit IsCommutable = 0> :
316    AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
317                    RHS, itin, IsCommutable, 0, X86selects>;
318
319 // Similar to AVX512_maskable but in this case one of the source operands
320 // ($src1) is already tied to $dst so we just use that for the preserved
321 // vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
322 // $src1.
323 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
324                                 dag Outs, dag NonTiedIns, string OpcodeStr,
325                                 string AttSrcAsm, string IntelSrcAsm,
326                                 dag RHS, InstrItinClass itin,
327                                 bit IsCommutable = 0,
328                                 bit IsKCommutable = 0,
329                                 SDNode Select = vselect,
330                                 bit MaskOnly = 0> :
331    AVX512_maskable_common<O, F, _, Outs,
332                           !con((ins _.RC:$src1), NonTiedIns),
333                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
334                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
335                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
336                           !if(MaskOnly, (null_frag), RHS),
337                           (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin,
338                           Select, "", IsCommutable, IsKCommutable>;
339
340 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341                                      dag Outs, dag NonTiedIns, string OpcodeStr,
342                                      string AttSrcAsm, string IntelSrcAsm,
343                                      dag RHS, InstrItinClass itin,
344                                      bit IsCommutable = 0,
345                                      bit IsKCommutable = 0,
346                                      bit MaskOnly = 0> :
347    AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
348                         IntelSrcAsm, RHS, itin, IsCommutable, IsKCommutable,
349                         X86selects, MaskOnly>;
350
351 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
352                                   dag Outs, dag Ins,
353                                   string OpcodeStr,
354                                   string AttSrcAsm, string IntelSrcAsm,
355                                   list<dag> Pattern,
356                                   InstrItinClass itin> :
357    AVX512_maskable_custom<O, F, Outs, Ins,
358                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
359                           !con((ins _.KRCWM:$mask), Ins),
360                           OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
361                           itin, "$src0 = $dst">;
362
363
364 // Instruction with mask that puts result in mask register,
365 // like "compare" and "vptest"
366 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
367                                   dag Outs,
368                                   dag Ins, dag MaskingIns,
369                                   string OpcodeStr,
370                                   string AttSrcAsm, string IntelSrcAsm,
371                                   list<dag> Pattern,
372                                   list<dag> MaskingPattern,
373                                   InstrItinClass itin,
374                                   bit IsCommutable = 0> {
375     let isCommutable = IsCommutable in
376     def NAME: AVX512<O, F, Outs, Ins,
377                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
378                                      "$dst, "#IntelSrcAsm#"}",
379                        Pattern, itin>;
380
381     def NAME#k: AVX512<O, F, Outs, MaskingIns,
382                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
383                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
384                        MaskingPattern, itin>, EVEX_K;
385 }
386
387 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
388                                   dag Outs,
389                                   dag Ins, dag MaskingIns,
390                                   string OpcodeStr,
391                                   string AttSrcAsm, string IntelSrcAsm,
392                                   dag RHS, dag MaskingRHS,
393                                   InstrItinClass itin,
394                                   bit IsCommutable = 0> :
395   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
396                          AttSrcAsm, IntelSrcAsm,
397                          [(set _.KRC:$dst, RHS)],
398                          [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
399
400 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
401                            dag Outs, dag Ins, string OpcodeStr,
402                            string AttSrcAsm, string IntelSrcAsm,
403                            dag RHS, InstrItinClass itin,
404                            bit IsCommutable = 0> :
405    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
406                           !con((ins _.KRCWM:$mask), Ins),
407                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
408                           (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
409
410 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
411                            dag Outs, dag Ins, string OpcodeStr,
412                            string AttSrcAsm, string IntelSrcAsm,
413                            InstrItinClass itin> :
414    AVX512_maskable_custom_cmp<O, F, Outs,
415                              Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
416                              AttSrcAsm, IntelSrcAsm, [],[], itin>;
417
418 // This multiclass generates the unconditional/non-masking, the masking and
419 // the zero-masking variant of the vector instruction.  In the masking case, the
420 // perserved vector elements come from a new dummy input operand tied to $dst.
421 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
422                            dag Outs, dag Ins, string OpcodeStr,
423                            string AttSrcAsm, string IntelSrcAsm,
424                            dag RHS, dag MaskedRHS,
425                            InstrItinClass itin,
426                            bit IsCommutable = 0, SDNode Select = vselect> :
427    AVX512_maskable_custom<O, F, Outs, Ins,
428                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
429                           !con((ins _.KRCWM:$mask), Ins),
430                           OpcodeStr, AttSrcAsm, IntelSrcAsm,
431                           [(set _.RC:$dst, RHS)],
432                           [(set _.RC:$dst,
433                                 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
434                           [(set _.RC:$dst,
435                                 (Select _.KRCWM:$mask, MaskedRHS,
436                                         _.ImmAllZerosV))],
437                           itin, "$src0 = $dst", IsCommutable>;
438
439
440 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
441 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
442 // swizzled by ExecutionDepsFix to pxor.
443 // We set canFoldAsLoad because this can be converted to a constant-pool
444 // load of an all-zeros value if folding it would be beneficial.
445 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
446     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
447 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
448                [(set VR512:$dst, (v16i32 immAllZerosV))]>;
449 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
450                [(set VR512:$dst, (v16i32 immAllOnesV))]>;
451 }
452
453 // Alias instructions that allow VPTERNLOG to be used with a mask to create
454 // a mix of all ones and all zeros elements. This is done this way to force
455 // the same register to be used as input for all three sources.
456 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
457 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
458                                 (ins VK16WM:$mask), "",
459                            [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
460                                                       (v16i32 immAllOnesV),
461                                                       (v16i32 immAllZerosV)))]>;
462 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
463                                 (ins VK8WM:$mask), "",
464                 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
465                                            (bc_v8i64 (v16i32 immAllOnesV)),
466                                            (bc_v8i64 (v16i32 immAllZerosV))))]>;
467 }
468
469 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
470     isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
471 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
472                [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
473 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
474                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
475 }
476
477 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
478 // This is expanded by ExpandPostRAPseudos.
479 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
480     isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
481   def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
482                           [(set FR32X:$dst, fp32imm0)]>;
483   def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
484                           [(set FR64X:$dst, fpimm0)]>;
485 }
486
487 //===----------------------------------------------------------------------===//
488 // AVX-512 - VECTOR INSERT
489 //
490
491 // Supports two different pattern operators for mask and unmasked ops. Allows
492 // null_frag to be passed for one.
493 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
494                                   X86VectorVTInfo To,
495                                   SDPatternOperator vinsert_insert,
496                                   SDPatternOperator vinsert_for_mask,
497                                   OpndItins itins> {
498   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
499     defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
500                    (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
501                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
502                    "$src3, $src2, $src1", "$src1, $src2, $src3",
503                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
504                                          (From.VT From.RC:$src2),
505                                          (iPTR imm)),
506                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
507                                            (From.VT From.RC:$src2),
508                                            (iPTR imm)), itins.rr>,
509                    AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
510     let mayLoad = 1 in
511     defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
512                    (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
513                    "vinsert" # From.EltTypeName # "x" # From.NumElts,
514                    "$src3, $src2, $src1", "$src1, $src2, $src3",
515                    (vinsert_insert:$src3 (To.VT To.RC:$src1),
516                                (From.VT (bitconvert (From.LdFrag addr:$src2))),
517                                (iPTR imm)),
518                    (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
519                                (From.VT (bitconvert (From.LdFrag addr:$src2))),
520                                (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V,
521                    EVEX_CD8<From.EltSize, From.CD8TupleForm>,
522                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
523   }
524 }
525
526 // Passes the same pattern operator for masked and unmasked ops.
527 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
528                             X86VectorVTInfo To,
529                             SDPatternOperator vinsert_insert,
530                             OpndItins itins> :
531   vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>;
532
533 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
534                        X86VectorVTInfo To, PatFrag vinsert_insert,
535                        SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
536   let Predicates = p in {
537     def : Pat<(vinsert_insert:$ins
538                      (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
539               (To.VT (!cast<Instruction>(InstrStr#"rr")
540                      To.RC:$src1, From.RC:$src2,
541                      (INSERT_get_vinsert_imm To.RC:$ins)))>;
542
543     def : Pat<(vinsert_insert:$ins
544                   (To.VT To.RC:$src1),
545                   (From.VT (bitconvert (From.LdFrag addr:$src2))),
546                   (iPTR imm)),
547               (To.VT (!cast<Instruction>(InstrStr#"rm")
548                   To.RC:$src1, addr:$src2,
549                   (INSERT_get_vinsert_imm To.RC:$ins)))>;
550   }
551 }
552
553 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
554                             ValueType EltVT64, int Opcode256,
555                             OpndItins itins> {
556
557   let Predicates = [HasVLX] in
558     defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
559                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
560                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
561                                  vinsert128_insert, itins>, EVEX_V256;
562
563   defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
564                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
565                                  X86VectorVTInfo<16, EltVT32, VR512>,
566                                  vinsert128_insert, itins>, EVEX_V512;
567
568   defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
569                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
570                                  X86VectorVTInfo< 8, EltVT64, VR512>,
571                                  vinsert256_insert, itins>, VEX_W, EVEX_V512;
572
573   // Even with DQI we'd like to only use these instructions for masking.
574   let Predicates = [HasVLX, HasDQI] in
575     defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
576                                    X86VectorVTInfo< 2, EltVT64, VR128X>,
577                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
578                                    null_frag, vinsert128_insert, itins>,
579                                    VEX_W, EVEX_V256;
580
581   // Even with DQI we'd like to only use these instructions for masking.
582   let Predicates = [HasDQI] in {
583     defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
584                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
585                                  X86VectorVTInfo< 8, EltVT64, VR512>,
586                                  null_frag, vinsert128_insert, itins>,
587                                  VEX_W, EVEX_V512;
588
589     defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
590                                    X86VectorVTInfo< 8, EltVT32, VR256X>,
591                                    X86VectorVTInfo<16, EltVT32, VR512>,
592                                    null_frag, vinsert256_insert, itins>,
593                                    EVEX_V512;
594   }
595 }
596
597 // FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI?
598 let Sched = WriteFShuffle256 in
599 def AVX512_VINSERTF : OpndItins<
600   IIC_SSE_SHUFP, IIC_SSE_SHUFP
601 >;
602 let Sched = WriteShuffle256 in
603 def AVX512_VINSERTI : OpndItins<
604   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
605 >;
606
607 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>;
608 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>;
609
610 // Codegen pattern with the alternative types,
611 // Even with AVX512DQ we'll still use these for unmasked operations.
612 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
613               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
614 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
615               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
616
617 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
618               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
619 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
620               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
621
622 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
623               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
624 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
625               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
626
627 // Codegen pattern with the alternative types insert VEC128 into VEC256
628 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
629               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
630 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
631               vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
632 // Codegen pattern with the alternative types insert VEC128 into VEC512
633 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
634               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
635 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
636                vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
637 // Codegen pattern with the alternative types insert VEC256 into VEC512
638 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
639               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
641               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
642
643
644 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
645                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
646                                  PatFrag vinsert_insert,
647                                  SDNodeXForm INSERT_get_vinsert_imm,
648                                  list<Predicate> p> {
649 let Predicates = p in {
650   def : Pat<(Cast.VT
651              (vselect Cast.KRCWM:$mask,
652                       (bitconvert
653                        (vinsert_insert:$ins (To.VT To.RC:$src1),
654                                             (From.VT From.RC:$src2),
655                                             (iPTR imm))),
656                       Cast.RC:$src0)),
657             (!cast<Instruction>(InstrStr#"rrk")
658              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659              (INSERT_get_vinsert_imm To.RC:$ins))>;
660   def : Pat<(Cast.VT
661              (vselect Cast.KRCWM:$mask,
662                       (bitconvert
663                        (vinsert_insert:$ins (To.VT To.RC:$src1),
664                                             (From.VT
665                                              (bitconvert
666                                               (From.LdFrag addr:$src2))),
667                                             (iPTR imm))),
668                       Cast.RC:$src0)),
669             (!cast<Instruction>(InstrStr#"rmk")
670              Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
671              (INSERT_get_vinsert_imm To.RC:$ins))>;
672
673   def : Pat<(Cast.VT
674              (vselect Cast.KRCWM:$mask,
675                       (bitconvert
676                        (vinsert_insert:$ins (To.VT To.RC:$src1),
677                                             (From.VT From.RC:$src2),
678                                             (iPTR imm))),
679                       Cast.ImmAllZerosV)),
680             (!cast<Instruction>(InstrStr#"rrkz")
681              Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
682              (INSERT_get_vinsert_imm To.RC:$ins))>;
683   def : Pat<(Cast.VT
684              (vselect Cast.KRCWM:$mask,
685                       (bitconvert
686                        (vinsert_insert:$ins (To.VT To.RC:$src1),
687                                             (From.VT
688                                              (bitconvert
689                                               (From.LdFrag addr:$src2))),
690                                             (iPTR imm))),
691                       Cast.ImmAllZerosV)),
692             (!cast<Instruction>(InstrStr#"rmkz")
693              Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
694              (INSERT_get_vinsert_imm To.RC:$ins))>;
695 }
696 }
697
698 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
699                              v8f32x_info, vinsert128_insert,
700                              INSERT_get_vinsert128_imm, [HasVLX]>;
701 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
702                              v4f64x_info, vinsert128_insert,
703                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
704
705 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
706                              v8i32x_info, vinsert128_insert,
707                              INSERT_get_vinsert128_imm, [HasVLX]>;
708 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
709                              v8i32x_info, vinsert128_insert,
710                              INSERT_get_vinsert128_imm, [HasVLX]>;
711 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
712                              v8i32x_info, vinsert128_insert,
713                              INSERT_get_vinsert128_imm, [HasVLX]>;
714 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
715                              v4i64x_info, vinsert128_insert,
716                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
717 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
718                              v4i64x_info, vinsert128_insert,
719                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
721                              v4i64x_info, vinsert128_insert,
722                              INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
723
724 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
725                              v16f32_info, vinsert128_insert,
726                              INSERT_get_vinsert128_imm, [HasAVX512]>;
727 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
728                              v8f64_info, vinsert128_insert,
729                              INSERT_get_vinsert128_imm, [HasDQI]>;
730
731 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
732                              v16i32_info, vinsert128_insert,
733                              INSERT_get_vinsert128_imm, [HasAVX512]>;
734 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
735                              v16i32_info, vinsert128_insert,
736                              INSERT_get_vinsert128_imm, [HasAVX512]>;
737 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
738                              v16i32_info, vinsert128_insert,
739                              INSERT_get_vinsert128_imm, [HasAVX512]>;
740 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
741                              v8i64_info, vinsert128_insert,
742                              INSERT_get_vinsert128_imm, [HasDQI]>;
743 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
744                              v8i64_info, vinsert128_insert,
745                              INSERT_get_vinsert128_imm, [HasDQI]>;
746 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
747                              v8i64_info, vinsert128_insert,
748                              INSERT_get_vinsert128_imm, [HasDQI]>;
749
750 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
751                              v16f32_info, vinsert256_insert,
752                              INSERT_get_vinsert256_imm, [HasDQI]>;
753 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
754                              v8f64_info, vinsert256_insert,
755                              INSERT_get_vinsert256_imm, [HasAVX512]>;
756
757 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
758                              v16i32_info, vinsert256_insert,
759                              INSERT_get_vinsert256_imm, [HasDQI]>;
760 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
761                              v16i32_info, vinsert256_insert,
762                              INSERT_get_vinsert256_imm, [HasDQI]>;
763 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
764                              v16i32_info, vinsert256_insert,
765                              INSERT_get_vinsert256_imm, [HasDQI]>;
766 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
767                              v8i64_info, vinsert256_insert,
768                              INSERT_get_vinsert256_imm, [HasAVX512]>;
769 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
770                              v8i64_info, vinsert256_insert,
771                              INSERT_get_vinsert256_imm, [HasAVX512]>;
772 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
773                              v8i64_info, vinsert256_insert,
774                              INSERT_get_vinsert256_imm, [HasAVX512]>;
775
776 // vinsertps - insert f32 to XMM
777 let ExeDomain = SSEPackedSingle in {
778 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
779       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
780       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
781       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))],
782       IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>;
783 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
784       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
785       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
786       [(set VR128X:$dst, (X86insertps VR128X:$src1,
787                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
788                           imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V,
789       EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>;
790 }
791
792 //===----------------------------------------------------------------------===//
793 // AVX-512 VECTOR EXTRACT
794 //---
795
796 // Supports two different pattern operators for mask and unmasked ops. Allows
797 // null_frag to be passed for one.
798 multiclass vextract_for_size_split<int Opcode,
799                                    X86VectorVTInfo From, X86VectorVTInfo To,
800                                    SDPatternOperator vextract_extract,
801                                    SDPatternOperator vextract_for_mask,
802                                    OpndItins itins> {
803
804   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
805     defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
806                 (ins From.RC:$src1, u8imm:$idx),
807                 "vextract" # To.EltTypeName # "x" # To.NumElts,
808                 "$idx, $src1", "$src1, $idx",
809                 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
810                 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)),
811                 itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>;
812
813     def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
814                     (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
815                     "vextract" # To.EltTypeName # "x" # To.NumElts #
816                         "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
817                     [(store (To.VT (vextract_extract:$idx
818                                     (From.VT From.RC:$src1), (iPTR imm))),
819                              addr:$dst)], itins.rm>, EVEX,
820                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
821
822     let mayStore = 1, hasSideEffects = 0 in
823     def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
824                     (ins To.MemOp:$dst, To.KRCWM:$mask,
825                                         From.RC:$src1, u8imm:$idx),
826                      "vextract" # To.EltTypeName # "x" # To.NumElts #
827                           "\t{$idx, $src1, $dst {${mask}}|"
828                           "$dst {${mask}}, $src1, $idx}",
829                     [], itins.rm>, EVEX_K, EVEX,
830                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
831   }
832 }
833
834 // Passes the same pattern operator for masked and unmasked ops.
835 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
836                              X86VectorVTInfo To,
837                              SDPatternOperator vextract_extract,
838                              OpndItins itins> :
839   vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, itins>;
840
841 // Codegen pattern for the alternative types
842 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
843                 X86VectorVTInfo To, PatFrag vextract_extract,
844                 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
845   let Predicates = p in {
846      def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
847                (To.VT (!cast<Instruction>(InstrStr#"rr")
848                           From.RC:$src1,
849                           (EXTRACT_get_vextract_imm To.RC:$ext)))>;
850      def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
851                               (iPTR imm))), addr:$dst),
852                (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
853                 (EXTRACT_get_vextract_imm To.RC:$ext))>;
854   }
855 }
856
857 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
858                              ValueType EltVT64, int Opcode256,
859                              OpndItins itins> {
860   let Predicates = [HasAVX512] in {
861     defm NAME # "32x4Z" : vextract_for_size<Opcode128,
862                                    X86VectorVTInfo<16, EltVT32, VR512>,
863                                    X86VectorVTInfo< 4, EltVT32, VR128X>,
864                                    vextract128_extract, itins>,
865                                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
866     defm NAME # "64x4Z" : vextract_for_size<Opcode256,
867                                    X86VectorVTInfo< 8, EltVT64, VR512>,
868                                    X86VectorVTInfo< 4, EltVT64, VR256X>,
869                                    vextract256_extract, itins>,
870                                        VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
871   }
872   let Predicates = [HasVLX] in
873     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
874                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
875                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
876                                  vextract128_extract, itins>,
877                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
878
879   // Even with DQI we'd like to only use these instructions for masking.
880   let Predicates = [HasVLX, HasDQI] in
881     defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
882                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
883                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
884                                  null_frag, vextract128_extract, itins>,
885                                      VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
886
887   // Even with DQI we'd like to only use these instructions for masking.
888   let Predicates = [HasDQI] in {
889     defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
890                                  X86VectorVTInfo< 8, EltVT64, VR512>,
891                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
892                                  null_frag, vextract128_extract, itins>,
893                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
894     defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
895                                  X86VectorVTInfo<16, EltVT32, VR512>,
896                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
897                                  null_frag, vextract256_extract, itins>,
898                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
899   }
900 }
901
902 // FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI?
903 let Sched = WriteFShuffle256 in
904 def AVX512_VEXTRACTF : OpndItins<
905   IIC_SSE_SHUFP, IIC_SSE_SHUFP
906 >;
907 let Sched = WriteShuffle256 in
908 def AVX512_VEXTRACTI : OpndItins<
909   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
910 >;
911
912 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, AVX512_VEXTRACTF>;
913 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, AVX512_VEXTRACTI>;
914
915 // extract_subvector codegen patterns with the alternative types.
916 // Even with AVX512DQ we'll still use these for unmasked operations.
917 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
918           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
919 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
920           vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
921
922 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
923           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
924 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
925           vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
926
927 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
928           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
929 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
930           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
931
932 // Codegen pattern with the alternative types extract VEC128 from VEC256
933 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
934           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
935 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936           vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
937
938 // Codegen pattern with the alternative types extract VEC128 from VEC512
939 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
940                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
941 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
942                  vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
943 // Codegen pattern with the alternative types extract VEC256 from VEC512
944 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
945                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
946 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
947                  vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
948
949
950 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
951 // smaller extract to enable EVEX->VEX.
952 let Predicates = [NoVLX] in {
953 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
954           (v2i64 (VEXTRACTI128rr
955                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
956                   (iPTR 1)))>;
957 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
958           (v2f64 (VEXTRACTF128rr
959                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
960                   (iPTR 1)))>;
961 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
962           (v4i32 (VEXTRACTI128rr
963                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
964                   (iPTR 1)))>;
965 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
966           (v4f32 (VEXTRACTF128rr
967                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
968                   (iPTR 1)))>;
969 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
970           (v8i16 (VEXTRACTI128rr
971                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
972                   (iPTR 1)))>;
973 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
974           (v16i8 (VEXTRACTI128rr
975                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
976                   (iPTR 1)))>;
977 }
978
979 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
980 // smaller extract to enable EVEX->VEX.
981 let Predicates = [HasVLX] in {
982 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
983           (v2i64 (VEXTRACTI32x4Z256rr
984                   (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
985                   (iPTR 1)))>;
986 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
987           (v2f64 (VEXTRACTF32x4Z256rr
988                   (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
989                   (iPTR 1)))>;
990 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
991           (v4i32 (VEXTRACTI32x4Z256rr
992                   (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
993                   (iPTR 1)))>;
994 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
995           (v4f32 (VEXTRACTF32x4Z256rr
996                   (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
997                   (iPTR 1)))>;
998 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
999           (v8i16 (VEXTRACTI32x4Z256rr
1000                   (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1001                   (iPTR 1)))>;
1002 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1003           (v16i8 (VEXTRACTI32x4Z256rr
1004                   (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1005                   (iPTR 1)))>;
1006 }
1007
1008
1009 // Additional patterns for handling a bitcast between the vselect and the
1010 // extract_subvector.
1011 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1012                                   X86VectorVTInfo To, X86VectorVTInfo Cast,
1013                                   PatFrag vextract_extract,
1014                                   SDNodeXForm EXTRACT_get_vextract_imm,
1015                                   list<Predicate> p> {
1016 let Predicates = p in {
1017   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1018                               (bitconvert
1019                                (To.VT (vextract_extract:$ext
1020                                        (From.VT From.RC:$src), (iPTR imm)))),
1021                               To.RC:$src0)),
1022             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1023                       Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1024                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1025
1026   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1027                               (bitconvert
1028                                (To.VT (vextract_extract:$ext
1029                                        (From.VT From.RC:$src), (iPTR imm)))),
1030                               Cast.ImmAllZerosV)),
1031             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1032                       Cast.KRCWM:$mask, From.RC:$src,
1033                       (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1034 }
1035 }
1036
1037 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1038                               v4f32x_info, vextract128_extract,
1039                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1041                               v2f64x_info, vextract128_extract,
1042                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043
1044 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1045                               v4i32x_info, vextract128_extract,
1046                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1047 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1048                               v4i32x_info, vextract128_extract,
1049                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1050 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1051                               v4i32x_info, vextract128_extract,
1052                               EXTRACT_get_vextract128_imm, [HasVLX]>;
1053 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1054                               v2i64x_info, vextract128_extract,
1055                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1056 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1057                               v2i64x_info, vextract128_extract,
1058                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1059 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1060                               v2i64x_info, vextract128_extract,
1061                               EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1062
1063 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1064                               v4f32x_info, vextract128_extract,
1065                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1067                               v2f64x_info, vextract128_extract,
1068                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1069
1070 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1071                               v4i32x_info, vextract128_extract,
1072                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1073 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1074                               v4i32x_info, vextract128_extract,
1075                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1077                               v4i32x_info, vextract128_extract,
1078                               EXTRACT_get_vextract128_imm, [HasAVX512]>;
1079 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1080                               v2i64x_info, vextract128_extract,
1081                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1082 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1083                               v2i64x_info, vextract128_extract,
1084                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1085 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1086                               v2i64x_info, vextract128_extract,
1087                               EXTRACT_get_vextract128_imm, [HasDQI]>;
1088
1089 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1090                               v8f32x_info, vextract256_extract,
1091                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1092 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1093                               v4f64x_info, vextract256_extract,
1094                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095
1096 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1097                               v8i32x_info, vextract256_extract,
1098                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1099 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1100                               v8i32x_info, vextract256_extract,
1101                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1102 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1103                               v8i32x_info, vextract256_extract,
1104                               EXTRACT_get_vextract256_imm, [HasDQI]>;
1105 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1106                               v4i64x_info, vextract256_extract,
1107                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1108 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1109                               v4i64x_info, vextract256_extract,
1110                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1111 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1112                               v4i64x_info, vextract256_extract,
1113                               EXTRACT_get_vextract256_imm, [HasAVX512]>;
1114
1115 // vextractps - extract 32 bits from XMM
1116 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1117       (ins VR128X:$src1, u8imm:$src2),
1118       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1119       [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))],
1120       IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>;
1121
1122 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1123       (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1124       "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1125       [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1126                           addr:$dst)], IIC_SSE_EXTRACTPS_RM>,
1127       EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>;
1128
1129 //===---------------------------------------------------------------------===//
1130 // AVX-512 BROADCAST
1131 //---
1132 // broadcast with a scalar argument.
1133 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1134                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1135   def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136             (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
1137              (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139                                   (X86VBroadcast SrcInfo.FRC:$src),
1140                                   DestInfo.RC:$src0)),
1141             (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
1142              DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143              (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144   def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145                                   (X86VBroadcast SrcInfo.FRC:$src),
1146                                   DestInfo.ImmAllZerosV)),
1147             (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
1148              DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1149 }
1150
1151 // Split version to allow mask and broadcast node to be different types. This
1152 // helps support the 32x2 broadcasts.
1153 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1154                                      SchedWrite SchedRR, SchedWrite SchedRM,
1155                                      X86VectorVTInfo MaskInfo,
1156                                      X86VectorVTInfo DestInfo,
1157                                      X86VectorVTInfo SrcInfo,
1158                                      SDPatternOperator UnmaskedOp = X86VBroadcast> {
1159   let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1160   defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1161                    (outs MaskInfo.RC:$dst),
1162                    (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1163                    (MaskInfo.VT
1164                     (bitconvert
1165                      (DestInfo.VT
1166                       (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1167                    (MaskInfo.VT
1168                     (bitconvert
1169                      (DestInfo.VT
1170                       (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1171                    NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
1172   let mayLoad = 1 in
1173   defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1174                    (outs MaskInfo.RC:$dst),
1175                    (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1176                    (MaskInfo.VT
1177                     (bitconvert
1178                      (DestInfo.VT (UnmaskedOp
1179                                    (SrcInfo.ScalarLdFrag addr:$src))))),
1180                    (MaskInfo.VT
1181                     (bitconvert
1182                      (DestInfo.VT (X86VBroadcast
1183                                    (SrcInfo.ScalarLdFrag addr:$src))))),
1184                    NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1185                    Sched<[SchedRM]>;
1186   }
1187
1188   def : Pat<(MaskInfo.VT
1189              (bitconvert
1190               (DestInfo.VT (UnmaskedOp
1191                             (SrcInfo.VT (scalar_to_vector
1192                                          (SrcInfo.ScalarLdFrag addr:$src))))))),
1193             (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
1194   def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1195                           (bitconvert
1196                            (DestInfo.VT
1197                             (X86VBroadcast
1198                              (SrcInfo.VT (scalar_to_vector
1199                                           (SrcInfo.ScalarLdFrag addr:$src)))))),
1200                           MaskInfo.RC:$src0)),
1201             (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
1202              MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1203   def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1204                           (bitconvert
1205                            (DestInfo.VT
1206                             (X86VBroadcast
1207                              (SrcInfo.VT (scalar_to_vector
1208                                           (SrcInfo.ScalarLdFrag addr:$src)))))),
1209                           MaskInfo.ImmAllZerosV)),
1210             (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
1211              MaskInfo.KRCWM:$mask, addr:$src)>;
1212 }
1213
1214 // Helper class to force mask and broadcast result to same type.
1215 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1216                                SchedWrite SchedRR, SchedWrite SchedRM,
1217                                X86VectorVTInfo DestInfo,
1218                                X86VectorVTInfo SrcInfo> :
1219   avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1220                             DestInfo, DestInfo, SrcInfo>;
1221
1222 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1223                                                        AVX512VLVectorVTInfo _> {
1224   let Predicates = [HasAVX512] in
1225     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 
1226                                   WriteFShuffle256Ld, _.info512, _.info128>,
1227               avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1228                                       EVEX_V512;
1229
1230   let Predicates = [HasVLX] in {
1231     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1232                                      WriteFShuffle256Ld, _.info256, _.info128>,
1233                  avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1234                                          EVEX_V256;
1235   }
1236 }
1237
1238 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1239                                                        AVX512VLVectorVTInfo _> {
1240   let Predicates = [HasAVX512] in
1241     defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1242                                   WriteFShuffle256Ld, _.info512, _.info128>,
1243               avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1244                                EVEX_V512;
1245
1246   let Predicates = [HasVLX] in {
1247     defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1248                                      WriteFShuffle256Ld, _.info256, _.info128>,
1249                  avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1250                              EVEX_V256;
1251     defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1252                                      WriteFShuffle256Ld, _.info128, _.info128>,
1253                  avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
1254                              EVEX_V128;
1255   }
1256 }
1257 defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1258                                        avx512vl_f32_info>;
1259 defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1260                                        avx512vl_f64_info>, VEX_W;
1261
1262 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
1263           (VBROADCASTSSZm addr:$src)>;
1264 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
1265           (VBROADCASTSDZm addr:$src)>;
1266
1267 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1269                                     RegisterClass SrcRC> {
1270   let ExeDomain = _.ExeDomain in
1271   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1272                          (ins SrcRC:$src),
1273                          "vpbroadcast"##_.Suffix, "$src", "$src",
1274                          (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
1275                          Sched<[SchedRR]>;
1276 }
1277
1278 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279                                     X86VectorVTInfo _, SDPatternOperator OpNode,
1280                                     RegisterClass SrcRC, SubRegIndex Subreg> {
1281   let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282   defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283                         (outs _.RC:$dst), (ins GR32:$src),
1284                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286                         "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287                         NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1288
1289   def : Pat <(_.VT (OpNode SrcRC:$src)),
1290              (!cast<Instruction>(Name#r)
1291               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1292
1293   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294              (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1296
1297   def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298              (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299               (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1300 }
1301
1302 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303                       AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304                       RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305   let Predicates = [prd] in
1306     defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307               OpNode, SrcRC, Subreg>, EVEX_V512;
1308   let Predicates = [prd, HasVLX] in {
1309     defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310               _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311     defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312               _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1313   }
1314 }
1315
1316 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317                                        SDPatternOperator OpNode,
1318                                        RegisterClass SrcRC, Predicate prd> {
1319   let Predicates = [prd] in
1320     defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1321                                       SrcRC>, EVEX_V512;
1322   let Predicates = [prd, HasVLX] in {
1323     defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1324                                          SrcRC>, EVEX_V256;
1325     defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1326                                          SrcRC>, EVEX_V128;
1327   }
1328 }
1329
1330 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331                        avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333                        avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1334                        HasBWI>;
1335 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336                                                  X86VBroadcast, GR32, HasAVX512>;
1337 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338                                                  X86VBroadcast, GR64, HasAVX512>, VEX_W;
1339
1340 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
1341            (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
1342 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
1343            (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
1344
1345 // Provide aliases for broadcast from the same register class that
1346 // automatically does the extract.
1347 multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1348                                             X86VectorVTInfo SrcInfo> {
1349   def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1350             (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1351                 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1352 }
1353
1354 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1355                                         AVX512VLVectorVTInfo _, Predicate prd> {
1356   let Predicates = [prd] in {
1357     defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1358                                    WriteShuffle256Ld, _.info512, _.info128>,
1359                avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1360                                   EVEX_V512;
1361     // Defined separately to avoid redefinition.
1362     defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1363   }
1364   let Predicates = [prd, HasVLX] in {
1365     defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1366                                     WriteShuffle256Ld, _.info256, _.info128>,
1367                 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1368                                  EVEX_V256;
1369     defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1370                                     WriteShuffleLd, _.info128, _.info128>,
1371                                  EVEX_V128;
1372   }
1373 }
1374
1375 defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1376                                            avx512vl_i8_info, HasBWI>;
1377 defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1378                                            avx512vl_i16_info, HasBWI>;
1379 defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1380                                            avx512vl_i32_info, HasAVX512>;
1381 defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1382                                            avx512vl_i64_info, HasAVX512>, VEX_W;
1383
1384 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1385                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1386   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1388                            (_Dst.VT (X86SubVBroadcast
1389                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1390                            NoItinerary>, AVX5128IBase, EVEX,
1391                            Sched<[WriteShuffleLd]>;
1392 }
1393
1394 // This should be used for the AVX512DQ broadcast instructions. It disables
1395 // the unmasked patterns so that we only use the DQ instructions when masking
1396 //  is requested.
1397 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1398                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1399   let hasSideEffects = 0, mayLoad = 1 in
1400   defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1401                            (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1402                            (null_frag),
1403                            (_Dst.VT (X86SubVBroadcast
1404                              (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1405                             NoItinerary>, AVX5128IBase, EVEX,
1406                             Sched<[WriteShuffleLd]>;
1407 }
1408
1409 let Predicates = [HasAVX512] in {
1410   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1411   def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1412             (VPBROADCASTQZm addr:$src)>;
1413 }
1414
1415 let Predicates = [HasVLX] in {
1416   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1417   def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1418             (VPBROADCASTQZ128m addr:$src)>;
1419   def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1420             (VPBROADCASTQZ256m addr:$src)>;
1421 }
1422 let Predicates = [HasVLX, HasBWI] in {
1423   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1424   // This means we'll encounter truncated i32 loads; match that here.
1425   def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1426             (VPBROADCASTWZ128m addr:$src)>;
1427   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1428             (VPBROADCASTWZ256m addr:$src)>;
1429   def : Pat<(v8i16 (X86VBroadcast
1430               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1431             (VPBROADCASTWZ128m addr:$src)>;
1432   def : Pat<(v16i16 (X86VBroadcast
1433               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1434             (VPBROADCASTWZ256m addr:$src)>;
1435 }
1436
1437 //===----------------------------------------------------------------------===//
1438 // AVX-512 BROADCAST SUBVECTORS
1439 //
1440
1441 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1442                        v16i32_info, v4i32x_info>,
1443                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1444 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1445                        v16f32_info, v4f32x_info>,
1446                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
1447 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1448                        v8i64_info, v4i64x_info>, VEX_W,
1449                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1450 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1451                        v8f64_info, v4f64x_info>, VEX_W,
1452                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
1453
1454 let Predicates = [HasAVX512] in {
1455 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1456           (VBROADCASTF64X4rm addr:$src)>;
1457 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1458           (VBROADCASTI64X4rm addr:$src)>;
1459 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1460           (VBROADCASTI64X4rm addr:$src)>;
1461 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1462           (VBROADCASTI64X4rm addr:$src)>;
1463
1464 // Provide fallback in case the load node that is used in the patterns above
1465 // is used by additional users, which prevents the pattern selection.
1466 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1467           (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468                            (v4f64 VR256X:$src), 1)>;
1469 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1470           (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1471                            (v8f32 VR256X:$src), 1)>;
1472 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1473           (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1474                            (v4i64 VR256X:$src), 1)>;
1475 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1476           (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1477                            (v8i32 VR256X:$src), 1)>;
1478 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1479           (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1480                            (v16i16 VR256X:$src), 1)>;
1481 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1482           (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1483                            (v32i8 VR256X:$src), 1)>;
1484
1485 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1486           (VBROADCASTF32X4rm addr:$src)>;
1487 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1488           (VBROADCASTI32X4rm addr:$src)>;
1489 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1490           (VBROADCASTI32X4rm addr:$src)>;
1491 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1492           (VBROADCASTI32X4rm addr:$src)>;
1493 }
1494
1495 let Predicates = [HasVLX] in {
1496 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1497                            v8i32x_info, v4i32x_info>,
1498                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1499 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1500                            v8f32x_info, v4f32x_info>,
1501                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
1502
1503 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1504           (VBROADCASTF32X4Z256rm addr:$src)>;
1505 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1506           (VBROADCASTI32X4Z256rm addr:$src)>;
1507 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1508           (VBROADCASTI32X4Z256rm addr:$src)>;
1509 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1510           (VBROADCASTI32X4Z256rm addr:$src)>;
1511
1512 // Provide fallback in case the load node that is used in the patterns above
1513 // is used by additional users, which prevents the pattern selection.
1514 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1515           (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1516                               (v2f64 VR128X:$src), 1)>;
1517 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1518           (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1519                               (v4f32 VR128X:$src), 1)>;
1520 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1521           (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1522                               (v2i64 VR128X:$src), 1)>;
1523 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1524           (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1525                               (v4i32 VR128X:$src), 1)>;
1526 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1527           (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1528                               (v8i16 VR128X:$src), 1)>;
1529 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1530           (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1531                               (v16i8 VR128X:$src), 1)>;
1532 }
1533
1534 let Predicates = [HasVLX, HasDQI] in {
1535 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1536                            v4i64x_info, v2i64x_info>, VEX_W,
1537                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1538 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1539                            v4f64x_info, v2f64x_info>, VEX_W,
1540                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
1541 }
1542
1543 let Predicates = [HasDQI] in {
1544 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1545                        v8i64_info, v2i64x_info>, VEX_W,
1546                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1547 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1548                        v16i32_info, v8i32x_info>,
1549                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1550 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1551                        v8f64_info, v2f64x_info>, VEX_W,
1552                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
1553 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1554                        v16f32_info, v8f32x_info>,
1555                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
1556 }
1557
1558 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1559                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1560   let Predicates = [HasDQI] in
1561     defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1562                                           WriteShuffle256Ld, _Dst.info512,
1563                                           _Src.info512, _Src.info128, null_frag>,
1564                                           EVEX_V512;
1565   let Predicates = [HasDQI, HasVLX] in
1566     defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1567                                           WriteShuffle256Ld, _Dst.info256,
1568                                           _Src.info256, _Src.info128, null_frag>,
1569                                           EVEX_V256;
1570 }
1571
1572 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1573                          AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1574   avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1575
1576   let Predicates = [HasDQI, HasVLX] in
1577     defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1578                                           WriteShuffleLd, _Dst.info128,
1579                                           _Src.info128, _Src.info128, null_frag>,
1580                                           EVEX_V128;
1581 }
1582
1583 defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1584                                           avx512vl_i32_info, avx512vl_i64_info>;
1585 defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1586                                           avx512vl_f32_info, avx512vl_f64_info>;
1587
1588 let Predicates = [HasVLX] in {
1589 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1590           (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1591 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1592           (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1593 }
1594
1595 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1596           (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
1597 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1598           (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1599
1600 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1601           (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
1602 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1603           (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1604
1605 //===----------------------------------------------------------------------===//
1606 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1607 //---
1608 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1609                                   X86VectorVTInfo _, RegisterClass KRC> {
1610   def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1611                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1612                   [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
1613                   IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
1614 }
1615
1616 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1617                                  AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1618   let Predicates = [HasCDI] in
1619     defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1620   let Predicates = [HasCDI, HasVLX] in {
1621     defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1622     defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1623   }
1624 }
1625
1626 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1627                                                avx512vl_i32_info, VK16>;
1628 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1629                                                avx512vl_i64_info, VK8>, VEX_W;
1630
1631 //===----------------------------------------------------------------------===//
1632 // -- VPERMI2 - 3 source operands form --
1633
1634 let Sched = WriteFShuffle256 in
1635 def AVX512_PERM2_F : OpndItins<
1636   IIC_SSE_SHUFP, IIC_SSE_SHUFP
1637 >;
1638
1639 let Sched = WriteShuffle256 in
1640 def AVX512_PERM2_I : OpndItins<
1641   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
1642 >;
1643
1644 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins,
1645                          X86VectorVTInfo _> {
1646 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1647   // The index operand in the pattern should really be an integer type. However,
1648   // if we do that and it happens to come from a bitcast, then it becomes
1649   // difficult to find the bitcast needed to convert the index to the
1650   // destination type for the passthru since it will be folded with the bitcast
1651   // of the index operand.
1652   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1653           (ins _.RC:$src2, _.RC:$src3),
1654           OpcodeStr, "$src3, $src2", "$src2, $src3",
1655           (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)),
1656           itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
1657
1658   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1659             (ins _.RC:$src2, _.MemOp:$src3),
1660             OpcodeStr, "$src3, $src2", "$src2, $src3",
1661             (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
1662                    (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>,
1663             EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
1664   }
1665 }
1666
1667 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
1668                             X86VectorVTInfo _> {
1669   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1670   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1671               (ins _.RC:$src2, _.ScalarMemOp:$src3),
1672               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1673               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1674               (_.VT (X86VPermi2X _.RC:$src1,
1675                _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1676               itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1677               Sched<[itins.Sched.Folded, ReadAfterLd]>;
1678 }
1679
1680 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
1681                                AVX512VLVectorVTInfo VTInfo> {
1682   defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>,
1683             avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1684   let Predicates = [HasVLX] in {
1685   defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>,
1686                  avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1687   defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,
1688                  avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1689   }
1690 }
1691
1692 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1693                                   OpndItins itins,
1694                                   AVX512VLVectorVTInfo VTInfo,
1695                                   Predicate Prd> {
1696   let Predicates = [Prd] in
1697   defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1698   let Predicates = [Prd, HasVLX] in {
1699   defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1700   defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,  EVEX_V256;
1701   }
1702 }
1703
1704 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I,
1705                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1706 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I,
1707                   avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1708 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I,
1709                   avx512vl_i16_info, HasBWI>,
1710                   VEX_W, EVEX_CD8<16, CD8VF>;
1711 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I,
1712                   avx512vl_i8_info, HasVBMI>,
1713                   EVEX_CD8<8, CD8VF>;
1714 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F,
1715                   avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
1716 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F,
1717                   avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1718
1719 // VPERMT2
1720 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins,
1721                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1722 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1723   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1724           (ins IdxVT.RC:$src2, _.RC:$src3),
1725           OpcodeStr, "$src3, $src2", "$src2, $src3",
1726           (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)),
1727           itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
1728
1729   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1730             (ins IdxVT.RC:$src2, _.MemOp:$src3),
1731             OpcodeStr, "$src3, $src2", "$src2, $src3",
1732             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1733                    (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>,
1734             EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
1735   }
1736 }
1737 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
1738                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1739   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1740   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1741               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1742               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1743               !strconcat("$src2, ${src3}", _.BroadcastStr ),
1744               (_.VT (X86VPermt2 _.RC:$src1,
1745                IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1746               itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1747               Sched<[itins.Sched.Folded, ReadAfterLd]>;
1748 }
1749
1750 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
1751                                AVX512VLVectorVTInfo VTInfo,
1752                                AVX512VLVectorVTInfo ShuffleMask> {
1753   defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
1754                               ShuffleMask.info512>,
1755             avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512,
1756                               ShuffleMask.info512>, EVEX_V512;
1757   let Predicates = [HasVLX] in {
1758   defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
1759                               ShuffleMask.info128>,
1760                  avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128,
1761                               ShuffleMask.info128>, EVEX_V128;
1762   defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
1763                               ShuffleMask.info256>,
1764                  avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256,
1765                               ShuffleMask.info256>, EVEX_V256;
1766   }
1767 }
1768
1769 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins,
1770                                  AVX512VLVectorVTInfo VTInfo,
1771                                  AVX512VLVectorVTInfo Idx,
1772                                  Predicate Prd> {
1773   let Predicates = [Prd] in
1774   defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
1775                            Idx.info512>, EVEX_V512;
1776   let Predicates = [Prd, HasVLX] in {
1777   defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
1778                                Idx.info128>, EVEX_V128;
1779   defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
1780                                Idx.info256>, EVEX_V256;
1781   }
1782 }
1783
1784 defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I,
1785                   avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1786 defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I,
1787                   avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1788 defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I,
1789                   avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1790                   VEX_W, EVEX_CD8<16, CD8VF>;
1791 defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I,
1792                   avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1793                   EVEX_CD8<8, CD8VF>;
1794 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F,
1795                   avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1796 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F,
1797                   avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1798
1799 //===----------------------------------------------------------------------===//
1800 // AVX-512 - BLEND using mask
1801 //
1802
1803 let Sched = WriteFVarBlend in
1804 def AVX512_BLENDM : OpndItins<
1805   IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
1806 >;
1807
1808 let Sched = WriteVarBlend in
1809 def AVX512_PBLENDM : OpndItins<
1810   IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
1811 >;
1812
1813 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins,
1814                             X86VectorVTInfo _> {
1815   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1816   def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1817              (ins _.RC:$src1, _.RC:$src2),
1818              !strconcat(OpcodeStr,
1819              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1820              [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1821   def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1822              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1823              !strconcat(OpcodeStr,
1824              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1825              [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
1826   def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1827              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1828              !strconcat(OpcodeStr,
1829              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1830              [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>;
1831   let mayLoad = 1 in {
1832   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1833              (ins _.RC:$src1, _.MemOp:$src2),
1834              !strconcat(OpcodeStr,
1835              "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1836              [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1837              Sched<[itins.Sched.Folded, ReadAfterLd]>;
1838   def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1840              !strconcat(OpcodeStr,
1841              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1842              [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1843              Sched<[itins.Sched.Folded, ReadAfterLd]>;
1844   def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1845              (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1846              !strconcat(OpcodeStr,
1847              "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1848              [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1849              Sched<[itins.Sched.Folded, ReadAfterLd]>;
1850   }
1851   }
1852 }
1853 multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins,
1854                                 X86VectorVTInfo _> {
1855   let mayLoad = 1, hasSideEffects = 0 in {
1856   def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1857       (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1858        !strconcat(OpcodeStr,
1859             "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1860             "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1861       [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1862       Sched<[itins.Sched.Folded, ReadAfterLd]>;
1863
1864   def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1865       (ins _.RC:$src1, _.ScalarMemOp:$src2),
1866        !strconcat(OpcodeStr,
1867             "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1868             "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1869       [], itins.rm>,  EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1870       Sched<[itins.Sched.Folded, ReadAfterLd]>;
1871   }
1872 }
1873
1874 multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins,
1875                                  AVX512VLVectorVTInfo VTInfo> {
1876   defm Z : avx512_blendmask      <opc, OpcodeStr, itins, VTInfo.info512>,
1877            avx512_blendmask_rmb  <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1878
1879   let Predicates = [HasVLX] in {
1880     defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>,
1881                 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1882     defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>,
1883                 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1884   }
1885 }
1886
1887 multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins,
1888                          AVX512VLVectorVTInfo VTInfo> {
1889   let Predicates = [HasBWI] in
1890     defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1891
1892   let Predicates = [HasBWI, HasVLX] in {
1893     defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1894     defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1895   }
1896 }
1897
1898
1899 defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>;
1900 defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W;
1901 defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>;
1902 defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W;
1903 defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>;
1904 defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W;
1905
1906
1907 //===----------------------------------------------------------------------===//
1908 // Compare Instructions
1909 //===----------------------------------------------------------------------===//
1910
1911 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1912
1913 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
1914                              OpndItins itins> {
1915   defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1916                       (outs _.KRC:$dst),
1917                       (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1918                       "vcmp${cc}"#_.Suffix,
1919                       "$src2, $src1", "$src1, $src2",
1920                       (OpNode (_.VT _.RC:$src1),
1921                               (_.VT _.RC:$src2),
1922                               imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1923   let mayLoad = 1 in
1924   defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1925                     (outs _.KRC:$dst),
1926                     (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
1927                     "vcmp${cc}"#_.Suffix,
1928                     "$src2, $src1", "$src1, $src2",
1929                     (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
1930                         imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1931                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
1932
1933   defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1934                      (outs _.KRC:$dst),
1935                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1936                      "vcmp${cc}"#_.Suffix,
1937                      "{sae}, $src2, $src1", "$src1, $src2, {sae}",
1938                      (OpNodeRnd (_.VT _.RC:$src1),
1939                                 (_.VT _.RC:$src2),
1940                                 imm:$cc,
1941                                 (i32 FROUND_NO_EXC)), itins.rr>,
1942                      EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
1943   // Accept explicit immediate argument form instead of comparison code.
1944   let isAsmParserOnly = 1, hasSideEffects = 0 in {
1945     defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1946                         (outs VK1:$dst),
1947                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1948                         "vcmp"#_.Suffix,
1949                         "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V,
1950                         Sched<[itins.Sched]>;
1951   let mayLoad = 1 in
1952     defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1953                         (outs _.KRC:$dst),
1954                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1955                         "vcmp"#_.Suffix,
1956                         "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
1957                         EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1958                         Sched<[itins.Sched.Folded, ReadAfterLd]>;
1959
1960     defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1961                        (outs _.KRC:$dst),
1962                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1963                        "vcmp"#_.Suffix,
1964                        "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>,
1965                        EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
1966   }// let isAsmParserOnly = 1, hasSideEffects = 0
1967
1968   let isCodeGenOnly = 1 in {
1969     let isCommutable = 1 in
1970     def rr : AVX512Ii8<0xC2, MRMSrcReg,
1971                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
1972                 !strconcat("vcmp${cc}", _.Suffix,
1973                            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1974                 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1975                                           _.FRC:$src2,
1976                                           imm:$cc))],
1977                 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1978     def rm : AVX512Ii8<0xC2, MRMSrcMem,
1979               (outs _.KRC:$dst),
1980               (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1981               !strconcat("vcmp${cc}", _.Suffix,
1982                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1983               [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1984                                         (_.ScalarLdFrag addr:$src2),
1985                                         imm:$cc))],
1986               itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1987               Sched<[itins.Sched.Folded, ReadAfterLd]>;
1988   }
1989 }
1990
1991 let Predicates = [HasAVX512] in {
1992   let ExeDomain = SSEPackedSingle in
1993   defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
1994                                    SSE_ALU_F32S>, AVX512XSIi8Base;
1995   let ExeDomain = SSEPackedDouble in
1996   defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
1997                                    SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
1998 }
1999
2000 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2001               OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
2002   let isCommutable = IsCommutable in
2003   def rr : AVX512BI<opc, MRMSrcReg,
2004              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2005              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2006              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
2007              itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2008   def rm : AVX512BI<opc, MRMSrcMem,
2009              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2010              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2011              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2012                                      (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
2013              itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2014   let isCommutable = IsCommutable in
2015   def rrk : AVX512BI<opc, MRMSrcReg,
2016               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2017               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2018                           "$dst {${mask}}, $src1, $src2}"),
2019               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2020                                    (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
2021               itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2022   def rmk : AVX512BI<opc, MRMSrcMem,
2023               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2024               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2025                           "$dst {${mask}}, $src1, $src2}"),
2026               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2027                                    (OpNode (_.VT _.RC:$src1),
2028                                        (_.VT (bitconvert
2029                                               (_.LdFrag addr:$src2))))))],
2030               itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2031 }
2032
2033 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
2034               OpndItins itins,  X86VectorVTInfo _, bit IsCommutable> :
2035            avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
2036   def rmb : AVX512BI<opc, MRMSrcMem,
2037               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2038               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2039                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2040               [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2041                               (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
2042               itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2043   def rmbk : AVX512BI<opc, MRMSrcMem,
2044                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2045                                        _.ScalarMemOp:$src2),
2046                !strconcat(OpcodeStr,
2047                           "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2048                           "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2049                [(set _.KRC:$dst, (and _.KRCWM:$mask,
2050                                       (OpNode (_.VT _.RC:$src1),
2051                                         (X86VBroadcast
2052                                           (_.ScalarLdFrag addr:$src2)))))],
2053                itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2054                Sched<[itins.Sched.Folded, ReadAfterLd]>;
2055 }
2056
2057 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
2058                                  OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2059                                  Predicate prd, bit IsCommutable = 0> {
2060   let Predicates = [prd] in
2061   defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
2062                               IsCommutable>, EVEX_V512;
2063
2064   let Predicates = [prd, HasVLX] in {
2065     defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
2066                                    IsCommutable>, EVEX_V256;
2067     defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
2068                                    IsCommutable>, EVEX_V128;
2069   }
2070 }
2071
2072 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2073                                      SDNode OpNode, OpndItins itins,
2074                                      AVX512VLVectorVTInfo VTInfo,
2075                                      Predicate prd, bit IsCommutable = 0> {
2076   let Predicates = [prd] in
2077   defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
2078                                   IsCommutable>, EVEX_V512;
2079
2080   let Predicates = [prd, HasVLX] in {
2081     defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
2082                                        IsCommutable>, EVEX_V256;
2083     defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
2084                                        IsCommutable>, EVEX_V128;
2085   }
2086 }
2087
2088 // FIXME: Is there a better scheduler itinerary for VPCMP?
2089 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
2090                       SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
2091                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2092
2093 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
2094                       SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
2095                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2096
2097 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
2098                       SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
2099                 EVEX_CD8<32, CD8VF>;
2100
2101 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
2102                       SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
2103                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2104
2105 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2106                       SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
2107                 EVEX_CD8<8, CD8VF>, VEX_WIG;
2108
2109 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2110                       SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
2111                 EVEX_CD8<16, CD8VF>, VEX_WIG;
2112
2113 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2114                       SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
2115                 EVEX_CD8<32, CD8VF>;
2116
2117 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2118                       SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
2119                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2120
2121 // Transforms to swizzle an immediate to help matching memory operand in first
2122 // operand.
2123 def CommutePCMPCC : SDNodeXForm<imm, [{
2124   uint8_t Imm = N->getZExtValue() & 0x7;
2125   switch (Imm) {
2126   default: llvm_unreachable("Unreachable!");
2127   case 0x01: Imm = 0x06; break; // LT  -> NLE
2128   case 0x02: Imm = 0x05; break; // LE  -> NLT
2129   case 0x05: Imm = 0x02; break; // NLT -> LE
2130   case 0x06: Imm = 0x01; break; // NLE -> LT
2131   case 0x00: // EQ
2132   case 0x03: // FALSE
2133   case 0x04: // NE
2134   case 0x07: // TRUE
2135     break;
2136   }
2137   return getI8Imm(Imm, SDLoc(N));
2138 }]>;
2139
2140 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
2141                           OpndItins itins, X86VectorVTInfo _> {
2142   let isCommutable = 1 in
2143   def rri : AVX512AIi8<opc, MRMSrcReg,
2144              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2145              !strconcat("vpcmp${cc}", Suffix,
2146                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2147              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2148                                        imm:$cc))],
2149              itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2150   def rmi : AVX512AIi8<opc, MRMSrcMem,
2151              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2152              !strconcat("vpcmp${cc}", Suffix,
2153                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2154              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2155                               (_.VT (bitconvert (_.LdFrag addr:$src2))),
2156                               imm:$cc))],
2157              itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2158   let isCommutable = 1 in
2159   def rrik : AVX512AIi8<opc, MRMSrcReg,
2160               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2161                                       AVX512ICC:$cc),
2162               !strconcat("vpcmp${cc}", Suffix,
2163                          "\t{$src2, $src1, $dst {${mask}}|",
2164                          "$dst {${mask}}, $src1, $src2}"),
2165               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2166                                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2167                                           imm:$cc)))],
2168               itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2169   def rmik : AVX512AIi8<opc, MRMSrcMem,
2170               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2171                                     AVX512ICC:$cc),
2172               !strconcat("vpcmp${cc}", Suffix,
2173                          "\t{$src2, $src1, $dst {${mask}}|",
2174                          "$dst {${mask}}, $src1, $src2}"),
2175               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2176                                    (OpNode (_.VT _.RC:$src1),
2177                                       (_.VT (bitconvert (_.LdFrag addr:$src2))),
2178                                       imm:$cc)))],
2179               itins.rm>, EVEX_4V, EVEX_K,
2180               Sched<[itins.Sched.Folded, ReadAfterLd]>;
2181
2182   // Accept explicit immediate argument form instead of comparison code.
2183   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2184     def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2185                (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2186                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2187                           "$dst, $src1, $src2, $cc}"),
2188                [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2189     let mayLoad = 1 in
2190     def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2191                (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2192                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2193                           "$dst, $src1, $src2, $cc}"),
2194                [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2195     def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2196                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2197                                        u8imm:$cc),
2198                !strconcat("vpcmp", Suffix,
2199                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
2200                           "$dst {${mask}}, $src1, $src2, $cc}"),
2201                [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2202     let mayLoad = 1 in
2203     def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2204                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2205                                        u8imm:$cc),
2206                !strconcat("vpcmp", Suffix,
2207                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
2208                           "$dst {${mask}}, $src1, $src2, $cc}"),
2209                [], itins.rm>, EVEX_4V, EVEX_K,
2210                Sched<[itins.Sched.Folded, ReadAfterLd]>;
2211   }
2212
2213   def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
2214                     (_.VT _.RC:$src1), imm:$cc),
2215             (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2216                                                       (CommutePCMPCC imm:$cc))>;
2217
2218   def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
2219                                         (_.VT _.RC:$src1), imm:$cc)),
2220             (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2221                                                        _.RC:$src1, addr:$src2,
2222                                                        (CommutePCMPCC imm:$cc))>;
2223 }
2224
2225 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
2226                               OpndItins itins, X86VectorVTInfo _> :
2227            avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
2228   def rmib : AVX512AIi8<opc, MRMSrcMem,
2229              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2230                                      AVX512ICC:$cc),
2231              !strconcat("vpcmp${cc}", Suffix,
2232                         "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2233                         "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2234              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2235                                (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2236                                imm:$cc))],
2237              itins.rm>, EVEX_4V, EVEX_B,
2238              Sched<[itins.Sched.Folded, ReadAfterLd]>;
2239   def rmibk : AVX512AIi8<opc, MRMSrcMem,
2240               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2241                                        _.ScalarMemOp:$src2, AVX512ICC:$cc),
2242               !strconcat("vpcmp${cc}", Suffix,
2243                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2244                        "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2245               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2246                                   (OpNode (_.VT _.RC:$src1),
2247                                     (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2248                                     imm:$cc)))],
2249               itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2250               Sched<[itins.Sched.Folded, ReadAfterLd]>;
2251
2252   // Accept explicit immediate argument form instead of comparison code.
2253   let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2254     def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2255                (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2256                                        u8imm:$cc),
2257                !strconcat("vpcmp", Suffix,
2258                    "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2259                    "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2260                [], itins.rm>, EVEX_4V, EVEX_B,
2261                Sched<[itins.Sched.Folded, ReadAfterLd]>;
2262     def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2263                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2264                                        _.ScalarMemOp:$src2, u8imm:$cc),
2265                !strconcat("vpcmp", Suffix,
2266                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2267                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2268                [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2269                Sched<[itins.Sched.Folded, ReadAfterLd]>;
2270   }
2271
2272   def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2273                     (_.VT _.RC:$src1), imm:$cc),
2274             (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
2275                                                        (CommutePCMPCC imm:$cc))>;
2276
2277   def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
2278                                          (_.ScalarLdFrag addr:$src2)),
2279                                         (_.VT _.RC:$src1), imm:$cc)),
2280             (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
2281                                                        _.RC:$src1, addr:$src2,
2282                                                        (CommutePCMPCC imm:$cc))>;
2283 }
2284
2285 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
2286                              OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2287                              Predicate prd> {
2288   let Predicates = [prd] in
2289   defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
2290                           EVEX_V512;
2291
2292   let Predicates = [prd, HasVLX] in {
2293     defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
2294                                EVEX_V256;
2295     defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
2296                                EVEX_V128;
2297   }
2298 }
2299
2300 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
2301                                  OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2302                                  Predicate prd> {
2303   let Predicates = [prd] in
2304   defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
2305            EVEX_V512;
2306
2307   let Predicates = [prd, HasVLX] in {
2308     defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
2309                 EVEX_V256;
2310     defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
2311                 EVEX_V128;
2312   }
2313 }
2314
2315 // FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
2316 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
2317                                 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2318 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
2319                                  avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2320
2321 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
2322                                 avx512vl_i16_info, HasBWI>,
2323                                 VEX_W, EVEX_CD8<16, CD8VF>;
2324 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
2325                                  avx512vl_i16_info, HasBWI>,
2326                                  VEX_W, EVEX_CD8<16, CD8VF>;
2327
2328 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
2329                                     avx512vl_i32_info, HasAVX512>,
2330                                     EVEX_CD8<32, CD8VF>;
2331 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
2332                                      avx512vl_i32_info, HasAVX512>,
2333                                      EVEX_CD8<32, CD8VF>;
2334
2335 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
2336                                     avx512vl_i64_info, HasAVX512>,
2337                                     VEX_W, EVEX_CD8<64, CD8VF>;
2338 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
2339                                      avx512vl_i64_info, HasAVX512>,
2340                                      VEX_W, EVEX_CD8<64, CD8VF>;
2341
2342
2343 multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
2344   defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2345                    (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2346                    "vcmp${cc}"#_.Suffix,
2347                    "$src2, $src1", "$src1, $src2",
2348                    (X86cmpm (_.VT _.RC:$src1),
2349                          (_.VT _.RC:$src2),
2350                            imm:$cc), itins.rr, 1>,
2351                    Sched<[itins.Sched]>;
2352
2353   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2354                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2355                 "vcmp${cc}"#_.Suffix,
2356                 "$src2, $src1", "$src1, $src2",
2357                 (X86cmpm (_.VT _.RC:$src1),
2358                         (_.VT (bitconvert (_.LdFrag addr:$src2))),
2359                         imm:$cc), itins.rm>,
2360                 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2361
2362   defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2363                 (outs _.KRC:$dst),
2364                 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2365                 "vcmp${cc}"#_.Suffix,
2366                 "${src2}"##_.BroadcastStr##", $src1",
2367                 "$src1, ${src2}"##_.BroadcastStr,
2368                 (X86cmpm (_.VT _.RC:$src1),
2369                         (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2370                         imm:$cc), itins.rm>,
2371                 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2372   // Accept explicit immediate argument form instead of comparison code.
2373   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2374     defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2375                          (outs _.KRC:$dst),
2376                          (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2377                          "vcmp"#_.Suffix,
2378                          "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
2379                          Sched<[itins.Sched]>;
2380
2381     let mayLoad = 1 in {
2382       defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2383                              (outs _.KRC:$dst),
2384                              (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2385                              "vcmp"#_.Suffix,
2386                              "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2387                              Sched<[itins.Sched.Folded, ReadAfterLd]>;
2388
2389       defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2390                          (outs _.KRC:$dst),
2391                          (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2392                          "vcmp"#_.Suffix,
2393                          "$cc, ${src2}"##_.BroadcastStr##", $src1",
2394                          "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
2395                          EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2396     }
2397   }
2398
2399   // Patterns for selecting with loads in other operand.
2400   def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2401                      CommutableCMPCC:$cc),
2402             (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2403                                                       imm:$cc)>;
2404
2405   def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2406                                          (_.VT _.RC:$src1),
2407                                          CommutableCMPCC:$cc)),
2408             (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2409                                                        _.RC:$src1, addr:$src2,
2410                                                        imm:$cc)>;
2411
2412   def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2413                      (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2414             (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2415                                                        imm:$cc)>;
2416
2417   def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2418                                           (_.ScalarLdFrag addr:$src2)),
2419                                          (_.VT _.RC:$src1),
2420                                          CommutableCMPCC:$cc)),
2421             (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2422                                                         _.RC:$src1, addr:$src2,
2423                                                         imm:$cc)>;
2424 }
2425
2426 multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
2427   // comparison code form (VCMP[EQ/LT/LE/...]
2428   defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2429                      (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2430                      "vcmp${cc}"#_.Suffix,
2431                      "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2432                      (X86cmpmRnd (_.VT _.RC:$src1),
2433                                     (_.VT _.RC:$src2),
2434                                     imm:$cc,
2435                                 (i32 FROUND_NO_EXC)), itins.rr>,
2436                      EVEX_B, Sched<[itins.Sched]>;
2437
2438   let isAsmParserOnly = 1, hasSideEffects = 0 in {
2439     defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2440                          (outs _.KRC:$dst),
2441                          (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2442                          "vcmp"#_.Suffix,
2443                          "$cc, {sae}, $src2, $src1",
2444                          "$src1, $src2, {sae}, $cc", itins.rr>,
2445                          EVEX_B, Sched<[itins.Sched]>;
2446    }
2447 }
2448
2449 multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
2450   let Predicates = [HasAVX512] in {
2451     defm Z    : avx512_vcmp_common<itins, _.info512>,
2452                 avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
2453
2454   }
2455   let Predicates = [HasAVX512,HasVLX] in {
2456    defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
2457    defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
2458   }
2459 }
2460
2461 defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
2462                           AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2463 defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
2464                           AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2465
2466
2467 // Patterns to select fp compares with load as first operand.
2468 let Predicates = [HasAVX512] in {
2469   def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2470                             CommutableCMPCC:$cc)),
2471             (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2472
2473   def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2474                             CommutableCMPCC:$cc)),
2475             (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2476 }
2477
2478 // ----------------------------------------------------------------
2479 // FPClass
2480 //handle fpclass instruction  mask =  op(reg_scalar,imm)
2481 //                                    op(mem_scalar,imm)
2482 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2483                                  OpndItins itins,  X86VectorVTInfo _,
2484                                  Predicate prd> {
2485   let Predicates = [prd], ExeDomain = _.ExeDomain in {
2486       def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2487                       (ins _.RC:$src1, i32u8imm:$src2),
2488                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2489                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2490                               (i32 imm:$src2)))], itins.rr>,
2491                       Sched<[itins.Sched]>;
2492       def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2493                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2494                       OpcodeStr##_.Suffix#
2495                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2496                       [(set _.KRC:$dst,(or _.KRCWM:$mask,
2497                                       (OpNode (_.VT _.RC:$src1),
2498                                       (i32 imm:$src2))))], itins.rr>,
2499                       EVEX_K, Sched<[itins.Sched]>;
2500     def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2501                     (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2502                     OpcodeStr##_.Suffix##
2503                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2504                     [(set _.KRC:$dst,
2505                           (OpNode _.ScalarIntMemCPat:$src1,
2506                                   (i32 imm:$src2)))], itins.rm>,
2507                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
2508     def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2509                     (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2510                     OpcodeStr##_.Suffix##
2511                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2512                     [(set _.KRC:$dst,(or _.KRCWM:$mask,
2513                         (OpNode _.ScalarIntMemCPat:$src1,
2514                             (i32 imm:$src2))))], itins.rm>,
2515                     EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2516   }
2517 }
2518
2519 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2520 //                                  fpclass(reg_vec, mem_vec, imm)
2521 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
2522 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2523                                  OpndItins itins, X86VectorVTInfo _,
2524                                  string mem, string broadcast>{
2525   let ExeDomain = _.ExeDomain in {
2526   def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2527                       (ins _.RC:$src1, i32u8imm:$src2),
2528                       OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2529                       [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2530                                        (i32 imm:$src2)))], itins.rr>,
2531                       Sched<[itins.Sched]>;
2532   def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2533                       (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2534                       OpcodeStr##_.Suffix#
2535                       "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2536                       [(set _.KRC:$dst,(or _.KRCWM:$mask,
2537                                        (OpNode (_.VT _.RC:$src1),
2538                                        (i32 imm:$src2))))], itins.rr>,
2539                       EVEX_K, Sched<[itins.Sched]>;
2540   def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2541                     (ins _.MemOp:$src1, i32u8imm:$src2),
2542                     OpcodeStr##_.Suffix##mem#
2543                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2544                     [(set _.KRC:$dst,(OpNode
2545                                      (_.VT (bitconvert (_.LdFrag addr:$src1))),
2546                                      (i32 imm:$src2)))], itins.rm>,
2547                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
2548   def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2549                     (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2550                     OpcodeStr##_.Suffix##mem#
2551                     "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2552                     [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
2553                                   (_.VT (bitconvert (_.LdFrag addr:$src1))),
2554                                   (i32 imm:$src2))))], itins.rm>,
2555                     EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2556   def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2557                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2558                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2559                                       _.BroadcastStr##", $dst|$dst, ${src1}"
2560                                                   ##_.BroadcastStr##", $src2}",
2561                     [(set _.KRC:$dst,(OpNode
2562                                      (_.VT (X86VBroadcast
2563                                            (_.ScalarLdFrag addr:$src1))),
2564                                      (i32 imm:$src2)))], itins.rm>,
2565                     EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2566   def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2567                     (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2568                     OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2569                           _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2570                                                    _.BroadcastStr##", $src2}",
2571                     [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
2572                                      (_.VT (X86VBroadcast
2573                                            (_.ScalarLdFrag addr:$src1))),
2574                                      (i32 imm:$src2))))], itins.rm>,
2575                     EVEX_B, EVEX_K,  Sched<[itins.Sched.Folded, ReadAfterLd]>;
2576   }
2577 }
2578
2579 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2580                                      bits<8> opc, SDNode OpNode,
2581                                      OpndItins itins, Predicate prd,
2582                                      string broadcast>{
2583   let Predicates = [prd] in {
2584     defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2585                                       _.info512, "{z}", broadcast>, EVEX_V512;
2586   }
2587   let Predicates = [prd, HasVLX] in {
2588     defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2589                                       _.info128, "{x}", broadcast>, EVEX_V128;
2590     defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2591                                       _.info256, "{y}", broadcast>, EVEX_V256;
2592   }
2593 }
2594
2595 // FIXME: Is there a better scheduler itinerary for VFPCLASS?
2596 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2597              bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
2598   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2599                                       VecOpNode, SSE_ALU_F32P, prd, "{l}">,
2600                                       EVEX_CD8<32, CD8VF>;
2601   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2602                                       VecOpNode, SSE_ALU_F64P, prd, "{q}">,
2603                                       EVEX_CD8<64, CD8VF> , VEX_W;
2604   defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2605                                   SSE_ALU_F32S, f32x_info, prd>,
2606                                   EVEX_CD8<32, CD8VT1>;
2607   defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2608                                   SSE_ALU_F64S, f64x_info, prd>,
2609                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2610 }
2611
2612 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2613                                       X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
2614
2615 //-----------------------------------------------------------------
2616 // Mask register copy, including
2617 // - copy between mask registers
2618 // - load/store mask registers
2619 // - copy from GPR to mask register and vice versa
2620 //
2621 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2622                          string OpcodeStr, RegisterClass KRC,
2623                          ValueType vvt, X86MemOperand x86memop> {
2624   let hasSideEffects = 0, SchedRW = [WriteMove] in
2625   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2626              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2627              IIC_SSE_MOVDQ>;
2628   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2629              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2630              [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>;
2631   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2632              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2633              [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>;
2634 }
2635
2636 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2637                              string OpcodeStr,
2638                              RegisterClass KRC, RegisterClass GRC> {
2639   let hasSideEffects = 0 in {
2640     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2641                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2642                IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
2643     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2644                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2645                IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
2646   }
2647 }
2648
2649 let Predicates = [HasDQI] in
2650   defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2651                avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2652                VEX, PD;
2653
2654 let Predicates = [HasAVX512] in
2655   defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2656                avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2657                VEX, PS;
2658
2659 let Predicates = [HasBWI] in {
2660   defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2661                VEX, PD, VEX_W;
2662   defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2663                VEX, XD;
2664   defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2665                VEX, PS, VEX_W;
2666   defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2667                VEX, XD, VEX_W;
2668 }
2669
2670 // GR from/to mask register
2671 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2672           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2673 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2674           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2675
2676 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2677           (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2678 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2679           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2680
2681 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2682           (KMOVWrk VK16:$src)>;
2683 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2684           (COPY_TO_REGCLASS VK16:$src, GR32)>;
2685
2686 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2687           (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
2688 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2689           (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2690 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2691           (COPY_TO_REGCLASS VK8:$src, GR32)>;
2692
2693 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2694           (COPY_TO_REGCLASS GR32:$src, VK32)>;
2695 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2696           (COPY_TO_REGCLASS VK32:$src, GR32)>;
2697 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2698           (COPY_TO_REGCLASS GR64:$src, VK64)>;
2699 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2700           (COPY_TO_REGCLASS VK64:$src, GR64)>;
2701
2702 // Load/store kreg
2703 let Predicates = [HasDQI] in {
2704   def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
2705             (KMOVBmk addr:$dst, VK8:$src)>;
2706   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2707             (KMOVBkm addr:$src)>;
2708
2709   def : Pat<(store VK4:$src, addr:$dst),
2710             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2711   def : Pat<(store VK2:$src, addr:$dst),
2712             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
2713   def : Pat<(store VK1:$src, addr:$dst),
2714             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2715
2716   def : Pat<(v2i1 (load addr:$src)),
2717             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2718   def : Pat<(v4i1 (load addr:$src)),
2719             (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2720 }
2721 let Predicates = [HasAVX512, NoDQI] in {
2722   def : Pat<(store VK1:$src, addr:$dst),
2723             (MOV8mr addr:$dst,
2724              (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
2725               sub_8bit)))>;
2726   def : Pat<(store VK2:$src, addr:$dst),
2727             (MOV8mr addr:$dst,
2728              (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
2729               sub_8bit)))>;
2730   def : Pat<(store VK4:$src, addr:$dst),
2731             (MOV8mr addr:$dst,
2732              (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
2733               sub_8bit)))>;
2734   def : Pat<(store VK8:$src, addr:$dst),
2735             (MOV8mr addr:$dst,
2736              (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
2737               sub_8bit)))>;
2738
2739   def : Pat<(v8i1 (load addr:$src)),
2740             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2741   def : Pat<(v2i1 (load addr:$src)),
2742             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
2743   def : Pat<(v4i1 (load addr:$src)),
2744             (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
2745 }
2746
2747 let Predicates = [HasAVX512] in {
2748   def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
2749             (KMOVWmk addr:$dst, VK16:$src)>;
2750   def : Pat<(v1i1 (load addr:$src)),
2751             (COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
2752   def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
2753             (KMOVWkm addr:$src)>;
2754 }
2755 let Predicates = [HasBWI] in {
2756   def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
2757             (KMOVDmk addr:$dst, VK32:$src)>;
2758   def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
2759             (KMOVDkm addr:$src)>;
2760   def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
2761             (KMOVQmk addr:$dst, VK64:$src)>;
2762   def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
2763             (KMOVQkm addr:$src)>;
2764 }
2765
2766 let Predicates = [HasAVX512] in {
2767   multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2768     def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2769               (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2770
2771     def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
2772               (COPY_TO_REGCLASS maskRC:$src, GR32)>;
2773
2774     def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2775               (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2776   }
2777
2778   defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2779   defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2780   defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2781   defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2782   defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2783   defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2784   defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2785
2786   def : Pat<(X86kshiftr  (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2787           (COPY_TO_REGCLASS
2788                 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2789                                             GR8:$src, sub_8bit), (i32 1))), VK1)>;
2790   def : Pat<(X86kshiftr  (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2791             (COPY_TO_REGCLASS
2792                 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2793                                             GR8:$src, sub_8bit), (i32 1))), VK16)>;
2794   def : Pat<(X86kshiftr  (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2795          (COPY_TO_REGCLASS
2796           (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2797                                             GR8:$src, sub_8bit), (i32 1))), VK8)>;
2798
2799 }
2800
2801 // Mask unary operation
2802 // - KNOT
2803 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2804                             RegisterClass KRC, SDPatternOperator OpNode,
2805                             OpndItins itins, Predicate prd> {
2806   let Predicates = [prd] in
2807     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2808                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2809                [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
2810                Sched<[itins.Sched]>;
2811 }
2812
2813 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2814                                 SDPatternOperator OpNode, OpndItins itins> {
2815   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2816                             itins, HasDQI>, VEX, PD;
2817   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2818                             itins, HasAVX512>, VEX, PS;
2819   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2820                             itins, HasBWI>, VEX, PD, VEX_W;
2821   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2822                             itins, HasBWI>, VEX, PS, VEX_W;
2823 }
2824
2825 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
2826
2827 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2828 let Predicates = [HasAVX512, NoDQI] in
2829 def : Pat<(vnot VK8:$src),
2830           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2831
2832 def : Pat<(vnot VK4:$src),
2833           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2834 def : Pat<(vnot VK2:$src),
2835           (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2836
2837 // Mask binary operation
2838 // - KAND, KANDN, KOR, KXNOR, KXOR
2839 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2840                            RegisterClass KRC, SDPatternOperator OpNode,
2841                            OpndItins itins, Predicate prd, bit IsCommutable> {
2842   let Predicates = [prd], isCommutable = IsCommutable in
2843     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2844                !strconcat(OpcodeStr,
2845                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2846                [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2847                Sched<[itins.Sched]>;
2848 }
2849
2850 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2851                                SDPatternOperator OpNode, OpndItins itins,
2852                                bit IsCommutable, Predicate prdW = HasAVX512> {
2853   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2854                              itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2855   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2856                              itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2857   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2858                              itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2859   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2860                              itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2861 }
2862
2863 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2864 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2865 // These nodes use 'vnot' instead of 'not' to support vectors.
2866 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2867 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2868
2869 defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,   SSE_BIT_ITINS_P, 1>;
2870 defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,    SSE_BIT_ITINS_P, 1>;
2871 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
2872 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,   SSE_BIT_ITINS_P, 1>;
2873 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
2874 defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  add,   SSE_BIT_ITINS_P, 1, HasDQI>;
2875
2876 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2877                             Instruction Inst> {
2878   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2879   // for the DQI set, this type is legal and KxxxB instruction is used
2880   let Predicates = [NoDQI] in
2881   def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2882             (COPY_TO_REGCLASS
2883               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2884                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2885
2886   // All types smaller than 8 bits require conversion anyway
2887   def : Pat<(OpNode VK1:$src1, VK1:$src2),
2888         (COPY_TO_REGCLASS (Inst
2889                            (COPY_TO_REGCLASS VK1:$src1, VK16),
2890                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2891   def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2892         (COPY_TO_REGCLASS (Inst
2893                            (COPY_TO_REGCLASS VK2:$src1, VK16),
2894                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
2895   def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2896         (COPY_TO_REGCLASS (Inst
2897                            (COPY_TO_REGCLASS VK4:$src1, VK16),
2898                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
2899 }
2900
2901 defm : avx512_binop_pat<and,   and,  KANDWrr>;
2902 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2903 defm : avx512_binop_pat<or,    or,   KORWrr>;
2904 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2905 defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
2906
2907 // Mask unpacking
2908 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
2909                              RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
2910   let Predicates = [prd] in {
2911     let hasSideEffects = 0 in
2912     def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2913                (ins KRC:$src1, KRC:$src2),
2914                "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2915                itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
2916
2917     def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2918               (!cast<Instruction>(NAME##rr)
2919                         (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2920                         (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2921   }
2922 }
2923
2924 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
2925 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
2926 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
2927
2928 // Mask bit testing
2929 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2930                               SDNode OpNode, OpndItins itins, Predicate prd> {
2931   let Predicates = [prd], Defs = [EFLAGS] in
2932     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2933                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2934                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2935                Sched<[itins.Sched]>;
2936 }
2937
2938 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2939                                 OpndItins itins, Predicate prdW = HasAVX512> {
2940   defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
2941                                                                 VEX, PD;
2942   defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
2943                                                                 VEX, PS;
2944   defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
2945                                                                 VEX, PS, VEX_W;
2946   defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
2947                                                                 VEX, PD, VEX_W;
2948 }
2949
2950 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
2951 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
2952
2953 // Mask shift
2954 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2955                              SDNode OpNode, OpndItins itins> {
2956   let Predicates = [HasAVX512] in
2957     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2958                  !strconcat(OpcodeStr,
2959                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2960                             [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
2961                  itins.rr>, Sched<[itins.Sched]>;
2962 }
2963
2964 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2965                                SDNode OpNode, OpndItins itins> {
2966   defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2967                                itins>, VEX, TAPD, VEX_W;
2968   let Predicates = [HasDQI] in
2969   defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2970                                itins>, VEX, TAPD;
2971   let Predicates = [HasBWI] in {
2972   defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2973                                itins>, VEX, TAPD, VEX_W;
2974   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2975                                itins>, VEX, TAPD;
2976   }
2977 }
2978
2979 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
2980 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
2981
2982 multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
2983 def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
2984             (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
2985             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2986             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
2987
2988 def : Pat<(v8i1 (and VK8:$mask,
2989                      (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
2990           (COPY_TO_REGCLASS
2991            (!cast<Instruction>(InstStr##Zrrk)
2992             (COPY_TO_REGCLASS VK8:$mask, VK16),
2993             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2994             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
2995            VK8)>;
2996 }
2997
2998 multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
2999                                                 AVX512VLVectorVTInfo _> {
3000 def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
3001             (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
3002             (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3003             (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
3004             imm:$cc), VK8)>;
3005
3006 def : Pat<(v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1),
3007                                         (_.info256.VT VR256X:$src2), imm:$cc))),
3008             (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3009             (COPY_TO_REGCLASS VK8:$mask, VK16),
3010             (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3011             (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
3012             imm:$cc), VK8)>;
3013 }
3014
3015 let Predicates = [HasAVX512, NoVLX] in {
3016   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
3017   defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
3018
3019   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
3020   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
3021   defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
3022 }
3023
3024 // Mask setting all 0s or 1s
3025 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3026   let Predicates = [HasAVX512] in
3027     let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3028         SchedRW = [WriteZero] in
3029       def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3030                      [(set KRC:$dst, (VT Val))]>;
3031 }
3032
3033 multiclass avx512_mask_setop_w<PatFrag Val> {
3034   defm W : avx512_mask_setop<VK16, v16i1, Val>;
3035   defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3036   defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3037 }
3038
3039 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3040 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3041
3042 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3043 let Predicates = [HasAVX512] in {
3044   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3045   def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3046   def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3047   def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3048   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3049   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3050   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3051   def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3052 }
3053
3054 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3055 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3056                                              RegisterClass RC, ValueType VT> {
3057   def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3058             (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3059
3060   def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3061             (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3062 }
3063 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3064 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3065 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3066 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3067 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3068 defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3069
3070 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3071 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3072 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3073 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3074 defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3075
3076 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3077 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3078 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3079 defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3080
3081 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3082 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3083 defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3084
3085 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3086 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3087
3088 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3089
3090
3091 multiclass vextract_for_mask_to_mask<string InstrStr, X86KVectorVTInfo From,
3092                                      X86KVectorVTInfo To, Predicate prd> {
3093 let Predicates = [prd] in
3094   def :
3095     Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
3096         (To.KVT(COPY_TO_REGCLASS
3097                   (!cast<Instruction>(InstrStr#"ri") From.KVT:$src,
3098                       (i8 imm:$imm8)), To.KRC))>;
3099 }
3100
3101 multiclass vextract_for_mask_to_mask_legal_w<X86KVectorVTInfo From,
3102                                              X86KVectorVTInfo To> {
3103 def :
3104   Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
3105       (To.KVT(COPY_TO_REGCLASS
3106                (KSHIFTRWri(COPY_TO_REGCLASS From.KRC:$src, VK16),
3107                    (i8 imm:$imm8)), To.KRC))>;
3108 }
3109
3110 defm : vextract_for_mask_to_mask_legal_w<v2i1_info, v1i1_info>;
3111 defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v1i1_info>;
3112 defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v1i1_info>;
3113 defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v2i1_info>;
3114 defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v2i1_info>;
3115 defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v4i1_info>;
3116
3117 defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v1i1_info, HasAVX512>;
3118 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v1i1_info, HasBWI>;
3119 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v1i1_info, HasBWI>;
3120 defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v2i1_info, HasAVX512>;
3121 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v2i1_info, HasBWI>;
3122 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v2i1_info, HasBWI>;
3123 defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v4i1_info, HasAVX512>;
3124 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v4i1_info, HasBWI>;
3125 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v4i1_info, HasBWI>;
3126 defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v8i1_info, HasAVX512>;
3127 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v8i1_info, HasBWI>;
3128 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v8i1_info, HasBWI>;
3129 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v16i1_info, HasBWI>;
3130 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v16i1_info, HasBWI>;
3131 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v32i1_info, HasBWI>;
3132
3133 // Patterns for kmask shift
3134 multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
3135   def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
3136             (VT (COPY_TO_REGCLASS
3137                    (KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
3138                                (I8Imm $imm)),
3139                    RC))>;
3140   def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
3141             (VT (COPY_TO_REGCLASS
3142                    (KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
3143                                (I8Imm $imm)),
3144                    RC))>;
3145 }
3146
3147 defm : mask_shift_lowering<VK8, v8i1>, Requires<[HasAVX512, NoDQI]>;
3148 defm : mask_shift_lowering<VK4, v4i1>, Requires<[HasAVX512]>;
3149 defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
3150 //===----------------------------------------------------------------------===//
3151 // AVX-512 - Aligned and unaligned load and store
3152 //
3153
3154
3155 multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3156                        X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3157                        bit NoRMPattern = 0,
3158                        SDPatternOperator SelectOprr = vselect> {
3159   let hasSideEffects = 0 in {
3160   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3161                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3162                     _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>;
3163   def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3164                       (ins _.KRCWM:$mask,  _.RC:$src),
3165                       !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3166                        "${dst} {${mask}} {z}, $src}"),
3167                        [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3168                                            (_.VT _.RC:$src),
3169                                            _.ImmAllZerosV)))], _.ExeDomain,
3170                        itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>;
3171
3172   let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3173   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3174                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3175                     !if(NoRMPattern, [],
3176                         [(set _.RC:$dst,
3177                           (_.VT (bitconvert (ld_frag addr:$src))))]),
3178                     _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>;
3179
3180   let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3181     def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3182                       (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3183                       !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3184                       "${dst} {${mask}}, $src1}"),
3185                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3186                                           (_.VT _.RC:$src1),
3187                                           (_.VT _.RC:$src0))))], _.ExeDomain,
3188                        itins.rr>, EVEX, EVEX_K, Sched<[WriteMove]>;
3189     def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3190                      (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3191                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3192                       "${dst} {${mask}}, $src1}"),
3193                      [(set _.RC:$dst, (_.VT
3194                          (vselect _.KRCWM:$mask,
3195                           (_.VT (bitconvert (ld_frag addr:$src1))),
3196                            (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>,
3197                      EVEX, EVEX_K, Sched<[WriteLoad]>;
3198   }
3199   def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3200                   (ins _.KRCWM:$mask, _.MemOp:$src),
3201                   OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3202                                 "${dst} {${mask}} {z}, $src}",
3203                   [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3204                     (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3205                   _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
3206   }
3207   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3208             (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3209
3210   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3211             (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3212
3213   def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3214             (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3215              _.KRCWM:$mask, addr:$ptr)>;
3216 }
3217
3218 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3219                                   AVX512VLVectorVTInfo _,
3220                                   Predicate prd> {
3221   let Predicates = [prd] in
3222   defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
3223                        _.info512.AlignedLdFrag, masked_load_aligned512>,
3224                        EVEX_V512;
3225
3226   let Predicates = [prd, HasVLX] in {
3227   defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
3228                           _.info256.AlignedLdFrag, masked_load_aligned256>,
3229                           EVEX_V256;
3230   defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
3231                           _.info128.AlignedLdFrag, masked_load_aligned128>,
3232                           EVEX_V128;
3233   }
3234 }
3235
3236 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3237                                   AVX512VLVectorVTInfo _,
3238                                   Predicate prd,
3239                                   bit NoRMPattern = 0,
3240                                   SDPatternOperator SelectOprr = vselect> {
3241   let Predicates = [prd] in
3242   defm Z : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info512, _.info512.LdFrag,
3243                        masked_load_unaligned, NoRMPattern,
3244                        SelectOprr>, EVEX_V512;
3245
3246   let Predicates = [prd, HasVLX] in {
3247   defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info256, _.info256.LdFrag,
3248                          masked_load_unaligned, NoRMPattern,
3249                          SelectOprr>, EVEX_V256;
3250   defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info128, _.info128.LdFrag,
3251                          masked_load_unaligned, NoRMPattern,
3252                          SelectOprr>, EVEX_V128;
3253   }
3254 }
3255
3256 multiclass avx512_store<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3257                         X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3258                         string Name, bit NoMRPattern = 0> {
3259   let hasSideEffects = 0 in {
3260   def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3261                          OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
3262                          [], _.ExeDomain, itins.rr>, EVEX, FoldGenData<Name#rr>,
3263                          Sched<[WriteMove]>;
3264   def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3265                          (ins _.KRCWM:$mask, _.RC:$src),
3266                          OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3267                          "${dst} {${mask}}, $src}",
3268                          [], _.ExeDomain, itins.rr>,  EVEX, EVEX_K,
3269                          FoldGenData<Name#rrk>, Sched<[WriteMove]>;
3270   def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3271                           (ins _.KRCWM:$mask, _.RC:$src),
3272                           OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
3273                           "${dst} {${mask}} {z}, $src}",
3274                           [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ,
3275                           FoldGenData<Name#rrkz>, Sched<[WriteMove]>;
3276   }
3277
3278   let hasSideEffects = 0, mayStore = 1 in
3279   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3280                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3281                     !if(NoMRPattern, [],
3282                         [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3283                     _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>;
3284   def mrk : AVX512PI<opc, MRMDestMem, (outs),
3285                      (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3286               OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3287                [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>;
3288
3289   def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3290            (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3291                                                     _.KRCWM:$mask, _.RC:$src)>;
3292 }
3293
3294
3295 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3296                             AVX512VLVectorVTInfo _, Predicate prd,
3297                             string Name, bit NoMRPattern = 0> {
3298   let Predicates = [prd] in
3299   defm Z : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info512, store,
3300                         masked_store_unaligned, Name#Z, NoMRPattern>, EVEX_V512;
3301
3302   let Predicates = [prd, HasVLX] in {
3303     defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info256, store,
3304                              masked_store_unaligned, Name#Z256,
3305                              NoMRPattern>, EVEX_V256;
3306     defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info128, store,
3307                              masked_store_unaligned, Name#Z128,
3308                              NoMRPattern>, EVEX_V128;
3309   }
3310 }
3311
3312 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3313                                   AVX512VLVectorVTInfo _,  Predicate prd,
3314                                   string Name> {
3315   let Predicates = [prd] in
3316   defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore,
3317                         masked_store_aligned512, Name#Z>, EVEX_V512;
3318
3319   let Predicates = [prd, HasVLX] in {
3320     defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore,
3321                              masked_store_aligned256, Name#Z256>, EVEX_V256;
3322     defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore,
3323                              masked_store_aligned128, Name#Z128>, EVEX_V128;
3324   }
3325 }
3326
3327 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3328                                      HasAVX512>,
3329                avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3330                                       HasAVX512, "VMOVAPS">,
3331                PS, EVEX_CD8<32, CD8VF>;
3332
3333 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3334                                      HasAVX512>,
3335                avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3336                                      HasAVX512, "VMOVAPD">,
3337                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3338
3339 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3340                               0, null_frag>,
3341                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3342                                "VMOVUPS">,
3343                               PS, EVEX_CD8<32, CD8VF>;
3344
3345 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3346                               0, null_frag>,
3347                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3348                                "VMOVUPD">,
3349                PD, VEX_W, EVEX_CD8<64, CD8VF>;
3350
3351 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3352                                        HasAVX512>,
3353                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3354                                        HasAVX512, "VMOVDQA32">,
3355                  PD, EVEX_CD8<32, CD8VF>;
3356
3357 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3358                                        HasAVX512>,
3359                  avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3360                                     HasAVX512, "VMOVDQA64">,
3361                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
3362
3363 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
3364                 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
3365                                  HasBWI, "VMOVDQU8", 1>,
3366                 XD, EVEX_CD8<8, CD8VF>;
3367
3368 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
3369                  avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
3370                                  HasBWI, "VMOVDQU16", 1>,
3371                  XD, VEX_W, EVEX_CD8<16, CD8VF>;
3372
3373 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3374                                 0, null_frag>,
3375                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
3376                                  HasAVX512, "VMOVDQU32">,
3377                  XS, EVEX_CD8<32, CD8VF>;
3378
3379 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3380                                 0, null_frag>,
3381                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
3382                                  HasAVX512, "VMOVDQU64">,
3383                  XS, VEX_W, EVEX_CD8<64, CD8VF>;
3384
3385 // Special instructions to help with spilling when we don't have VLX. We need
3386 // to load or store from a ZMM register instead. These are converted in
3387 // expandPostRAPseudos.
3388 let isReMaterializable = 1, canFoldAsLoad = 1,
3389     isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3390 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3391                             "", [], IIC_SSE_MOVA_P_RM>;
3392 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3393                             "", [], IIC_SSE_MOVA_P_RM>;
3394 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3395                             "", [], IIC_SSE_MOVA_P_RM>;
3396 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3397                             "", [], IIC_SSE_MOVA_P_RM>;
3398 }
3399
3400 let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in {
3401 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3402                             "", [], IIC_SSE_MOVA_P_MR>;
3403 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3404                             "", [], IIC_SSE_MOVA_P_MR>;
3405 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3406                             "", [], IIC_SSE_MOVA_P_MR>;
3407 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3408                             "", [], IIC_SSE_MOVA_P_MR>;
3409 }
3410
3411 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3412                           (v8i64 VR512:$src))),
3413    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3414                                               VK8), VR512:$src)>;
3415
3416 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3417                            (v16i32 VR512:$src))),
3418                   (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3419
3420 // These patterns exist to prevent the above patterns from introducing a second
3421 // mask inversion when one already exists.
3422 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3423                           (bc_v8i64 (v16i32 immAllZerosV)),
3424                           (v8i64 VR512:$src))),
3425                  (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3426 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3427                            (v16i32 immAllZerosV),
3428                            (v16i32 VR512:$src))),
3429                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3430
3431 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3432 // available. Use a 512-bit operation and extract.
3433 let Predicates = [HasAVX512, NoVLX] in {
3434 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
3435                           (v8f32 VR256X:$src0))),
3436           (EXTRACT_SUBREG
3437            (v16f32
3438             (VMOVAPSZrrk
3439              (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
3440              (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
3441              (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
3442            sub_ymm)>;
3443
3444 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
3445                           (v8i32 VR256X:$src0))),
3446           (EXTRACT_SUBREG
3447            (v16i32
3448             (VMOVDQA32Zrrk
3449              (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
3450              (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
3451              (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
3452            sub_ymm)>;
3453 }
3454
3455 let Predicates = [HasAVX512] in {
3456   // 512-bit store.
3457   def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3458             (VMOVDQA32Zmr addr:$dst, VR512:$src)>;
3459   def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3460             (VMOVDQA32Zmr addr:$dst, VR512:$src)>;
3461   def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3462             (VMOVDQU32Zmr addr:$dst, VR512:$src)>;
3463   def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3464             (VMOVDQU32Zmr addr:$dst, VR512:$src)>;
3465 }
3466
3467 let Predicates = [HasVLX] in {
3468   // 128-bit store.
3469   def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3470             (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3471   def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3472             (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3473   def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3474             (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3475   def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3476             (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3477
3478   // 256-bit store.
3479   def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3480             (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3481   def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3482             (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3483   def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3484             (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3485   def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3486             (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3487 }
3488
3489 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3490                                    X86VectorVTInfo To, X86VectorVTInfo Cast> {
3491   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3492                               (bitconvert
3493                                (To.VT (extract_subvector
3494                                        (From.VT From.RC:$src), (iPTR 0)))),
3495                               To.RC:$src0)),
3496             (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3497                       Cast.RC:$src0, Cast.KRCWM:$mask,
3498                       (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3499
3500   def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3501                               (bitconvert
3502                                (To.VT (extract_subvector
3503                                        (From.VT From.RC:$src), (iPTR 0)))),
3504                               Cast.ImmAllZerosV)),
3505             (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3506                       Cast.KRCWM:$mask,
3507                       (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3508 }
3509
3510
3511 let Predicates = [HasVLX] in {
3512 // A masked extract from the first 128-bits of a 256-bit vector can be
3513 // implemented with masked move.
3514 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info,  v2i64x_info, v2i64x_info>;
3515 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info,  v4i32x_info, v2i64x_info>;
3516 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3517 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info,  v16i8x_info, v2i64x_info>;
3518 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info,  v2i64x_info, v4i32x_info>;
3519 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info,  v4i32x_info, v4i32x_info>;
3520 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3521 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info,  v16i8x_info, v4i32x_info>;
3522 defm : masked_move_for_extract<"VMOVAPDZ128",   v4f64x_info,  v2f64x_info, v2f64x_info>;
3523 defm : masked_move_for_extract<"VMOVAPDZ128",   v8f32x_info,  v4f32x_info, v2f64x_info>;
3524 defm : masked_move_for_extract<"VMOVAPSZ128",   v4f64x_info,  v2f64x_info, v4f32x_info>;
3525 defm : masked_move_for_extract<"VMOVAPSZ128",   v8f32x_info,  v4f32x_info, v4f32x_info>;
3526
3527 // A masked extract from the first 128-bits of a 512-bit vector can be
3528 // implemented with masked move.
3529 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info,  v2i64x_info, v2i64x_info>;
3530 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3531 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3532 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info,  v16i8x_info, v2i64x_info>;
3533 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info,  v2i64x_info, v4i32x_info>;
3534 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3535 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3536 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info,  v16i8x_info, v4i32x_info>;
3537 defm : masked_move_for_extract<"VMOVAPDZ128",   v8f64_info,  v2f64x_info, v2f64x_info>;
3538 defm : masked_move_for_extract<"VMOVAPDZ128",   v16f32_info, v4f32x_info, v2f64x_info>;
3539 defm : masked_move_for_extract<"VMOVAPSZ128",   v8f64_info,  v2f64x_info, v4f32x_info>;
3540 defm : masked_move_for_extract<"VMOVAPSZ128",   v16f32_info, v4f32x_info, v4f32x_info>;
3541
3542 // A masked extract from the first 256-bits of a 512-bit vector can be
3543 // implemented with masked move.
3544 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info,  v4i64x_info,  v4i64x_info>;
3545 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info,  v4i64x_info>;
3546 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3547 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info,  v32i8x_info,  v4i64x_info>;
3548 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info,  v4i64x_info,  v8i32x_info>;
3549 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info,  v8i32x_info>;
3550 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3551 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info,  v32i8x_info,  v8i32x_info>;
3552 defm : masked_move_for_extract<"VMOVAPDZ256",   v8f64_info,  v4f64x_info,  v4f64x_info>;
3553 defm : masked_move_for_extract<"VMOVAPDZ256",   v16f32_info, v8f32x_info,  v4f64x_info>;
3554 defm : masked_move_for_extract<"VMOVAPSZ256",   v8f64_info,  v4f64x_info,  v8f32x_info>;
3555 defm : masked_move_for_extract<"VMOVAPSZ256",   v16f32_info, v8f32x_info,  v8f32x_info>;
3556 }
3557
3558 // Move Int Doubleword to Packed Double Int
3559 //
3560 let ExeDomain = SSEPackedInt in {
3561 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3562                       "vmovd\t{$src, $dst|$dst, $src}",
3563                       [(set VR128X:$dst,
3564                         (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
3565                         EVEX, Sched<[WriteMove]>;
3566 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3567                       "vmovd\t{$src, $dst|$dst, $src}",
3568                       [(set VR128X:$dst,
3569                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
3570                       IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
3571 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3572                       "vmovq\t{$src, $dst|$dst, $src}",
3573                         [(set VR128X:$dst,
3574                           (v2i64 (scalar_to_vector GR64:$src)))],
3575                           IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3576 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3577 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3578                       (ins i64mem:$src),
3579                       "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3580                       EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>;
3581 let isCodeGenOnly = 1 in {
3582 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3583                        "vmovq\t{$src, $dst|$dst, $src}",
3584                        [(set FR64X:$dst, (bitconvert GR64:$src))],
3585                        IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3586 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3587                       "vmovq\t{$src, $dst|$dst, $src}",
3588                       [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3589                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
3590 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3591                          "vmovq\t{$src, $dst|$dst, $src}",
3592                          [(set GR64:$dst, (bitconvert FR64X:$src))],
3593                          IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3594 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3595                          "vmovq\t{$src, $dst|$dst, $src}",
3596                          [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
3597                          IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3598                          EVEX_CD8<64, CD8VT1>;
3599 }
3600 } // ExeDomain = SSEPackedInt
3601
3602 // Move Int Doubleword to Single Scalar
3603 //
3604 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3605 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3606                       "vmovd\t{$src, $dst|$dst, $src}",
3607                       [(set FR32X:$dst, (bitconvert GR32:$src))],
3608                       IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>;
3609
3610 def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3611                       "vmovd\t{$src, $dst|$dst, $src}",
3612                       [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
3613                       IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
3614 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3615
3616 // Move doubleword from xmm register to r/m32
3617 //
3618 let ExeDomain = SSEPackedInt in {
3619 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3620                        "vmovd\t{$src, $dst|$dst, $src}",
3621                        [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3622                                         (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
3623                        EVEX, Sched<[WriteMove]>;
3624 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3625                        (ins i32mem:$dst, VR128X:$src),
3626                        "vmovd\t{$src, $dst|$dst, $src}",
3627                        [(store (i32 (extractelt (v4i32 VR128X:$src),
3628                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
3629                        EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
3630 } // ExeDomain = SSEPackedInt
3631
3632 // Move quadword from xmm1 register to r/m64
3633 //
3634 let ExeDomain = SSEPackedInt in {
3635 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3636                       "vmovq\t{$src, $dst|$dst, $src}",
3637                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3638                                                    (iPTR 0)))],
3639                       IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>,
3640                       Requires<[HasAVX512, In64BitMode]>;
3641
3642 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3643 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3644                       "vmovq\t{$src, $dst|$dst, $src}",
3645                       [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>,
3646                       Requires<[HasAVX512, In64BitMode]>;
3647
3648 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3649                       (ins i64mem:$dst, VR128X:$src),
3650                       "vmovq\t{$src, $dst|$dst, $src}",
3651                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3652                               addr:$dst)], IIC_SSE_MOVDQ>,
3653                       EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3654                       Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3655
3656 let hasSideEffects = 0 in
3657 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3658                              (ins VR128X:$src),
3659                              "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3660                              EVEX, VEX_W, Sched<[WriteMove]>;
3661 } // ExeDomain = SSEPackedInt
3662
3663 // Move Scalar Single to Double Int
3664 //
3665 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3666 def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3667                       (ins FR32X:$src),
3668                       "vmovd\t{$src, $dst|$dst, $src}",
3669                       [(set GR32:$dst, (bitconvert FR32X:$src))],
3670                       IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>;
3671 def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3672                       (ins i32mem:$dst, FR32X:$src),
3673                       "vmovd\t{$src, $dst|$dst, $src}",
3674                       [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
3675                       IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
3676 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3677
3678 // Move Quadword Int to Packed Quadword Int
3679 //
3680 let ExeDomain = SSEPackedInt in {
3681 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3682                       (ins i64mem:$src),
3683                       "vmovq\t{$src, $dst|$dst, $src}",
3684                       [(set VR128X:$dst,
3685                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3686                       EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
3687 } // ExeDomain = SSEPackedInt
3688
3689 //===----------------------------------------------------------------------===//
3690 // AVX-512  MOVSS, MOVSD
3691 //===----------------------------------------------------------------------===//
3692
3693 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3694                               X86VectorVTInfo _> {
3695   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3696              (ins _.RC:$src1, _.RC:$src2),
3697              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3698              [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3699              _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>;
3700   def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3701               (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3702               !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3703               "$dst {${mask}} {z}, $src1, $src2}"),
3704               [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3705                                       (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3706                                       _.ImmAllZerosV)))],
3707               _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>;
3708   let Constraints = "$src0 = $dst"  in
3709   def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3710              (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3711              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3712              "$dst {${mask}}, $src1, $src2}"),
3713              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3714                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3715                                      (_.VT _.RC:$src0))))],
3716              _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>;
3717   let canFoldAsLoad = 1, isReMaterializable = 1 in
3718   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3719              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3720              [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3721              _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>;
3722   let mayLoad = 1, hasSideEffects = 0 in {
3723     let Constraints = "$src0 = $dst" in
3724     def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3725                (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3726                !strconcat(asm, "\t{$src, $dst {${mask}}|",
3727                "$dst {${mask}}, $src}"),
3728                [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>;
3729     def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3730                (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3731                !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3732                "$dst {${mask}} {z}, $src}"),
3733                [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
3734   }
3735   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3736              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3737              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain, IIC_SSE_MOV_S_MR>,
3738              EVEX, Sched<[WriteStore]>;
3739   let mayStore = 1, hasSideEffects = 0 in
3740   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3741               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3742               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3743               [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>;
3744 }
3745
3746 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3747                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3748
3749 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3750                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3751
3752
3753 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3754                                        PatLeaf ZeroFP, X86VectorVTInfo _> {
3755
3756 def : Pat<(_.VT (OpNode _.RC:$src0,
3757                         (_.VT (scalar_to_vector
3758                                   (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3759                                                        (_.EltVT _.FRC:$src1),
3760                                                        (_.EltVT _.FRC:$src2))))))),
3761           (!cast<Instruction>(InstrStr#rrk)
3762                         (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
3763                         (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3764                         (_.VT _.RC:$src0),
3765                         (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
3766
3767 def : Pat<(_.VT (OpNode _.RC:$src0,
3768                         (_.VT (scalar_to_vector
3769                                   (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3770                                                        (_.EltVT _.FRC:$src1),
3771                                                        (_.EltVT ZeroFP))))))),
3772           (!cast<Instruction>(InstrStr#rrkz)
3773                         (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3774                         (_.VT _.RC:$src0),
3775                         (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
3776 }
3777
3778 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3779                                         dag Mask, RegisterClass MaskRC> {
3780
3781 def : Pat<(masked_store addr:$dst, Mask,
3782              (_.info512.VT (insert_subvector undef,
3783                                (_.info256.VT (insert_subvector undef,
3784                                                  (_.info128.VT _.info128.RC:$src),
3785                                                  (iPTR 0))),
3786                                (iPTR 0)))),
3787           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3788                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3789                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3790
3791 }
3792
3793 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3794                                                AVX512VLVectorVTInfo _,
3795                                                dag Mask, RegisterClass MaskRC,
3796                                                SubRegIndex subreg> {
3797
3798 def : Pat<(masked_store addr:$dst, Mask,
3799              (_.info512.VT (insert_subvector undef,
3800                                (_.info256.VT (insert_subvector undef,
3801                                                  (_.info128.VT _.info128.RC:$src),
3802                                                  (iPTR 0))),
3803                                (iPTR 0)))),
3804           (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3805                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3806                       (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3807
3808 }
3809
3810 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3811                                        dag Mask, RegisterClass MaskRC> {
3812
3813 def : Pat<(_.info128.VT (extract_subvector
3814                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
3815                                         (_.info512.VT (bitconvert
3816                                                        (v16i32 immAllZerosV))))),
3817                            (iPTR 0))),
3818           (!cast<Instruction>(InstrStr#rmkz)
3819                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3820                       addr:$srcAddr)>;
3821
3822 def : Pat<(_.info128.VT (extract_subvector
3823                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3824                       (_.info512.VT (insert_subvector undef,
3825                             (_.info256.VT (insert_subvector undef,
3826                                   (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3827                                   (iPTR 0))),
3828                             (iPTR 0))))),
3829                 (iPTR 0))),
3830           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3831                       (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3832                       addr:$srcAddr)>;
3833
3834 }
3835
3836 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3837                                               AVX512VLVectorVTInfo _,
3838                                               dag Mask, RegisterClass MaskRC,
3839                                               SubRegIndex subreg> {
3840
3841 def : Pat<(_.info128.VT (extract_subvector
3842                          (_.info512.VT (masked_load addr:$srcAddr, Mask,
3843                                         (_.info512.VT (bitconvert
3844                                                        (v16i32 immAllZerosV))))),
3845                            (iPTR 0))),
3846           (!cast<Instruction>(InstrStr#rmkz)
3847                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3848                       addr:$srcAddr)>;
3849
3850 def : Pat<(_.info128.VT (extract_subvector
3851                 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3852                       (_.info512.VT (insert_subvector undef,
3853                             (_.info256.VT (insert_subvector undef,
3854                                   (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3855                                   (iPTR 0))),
3856                             (iPTR 0))))),
3857                 (iPTR 0))),
3858           (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3859                       (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3860                       addr:$srcAddr)>;
3861
3862 }
3863
3864 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3865 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3866
3867 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3868                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3869 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3870                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3871 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3872                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3873
3874 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3875                    (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3876 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3877                    (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3878 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3879                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3880
3881 def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3882                            (f32 FR32X:$src1), (f32 FR32X:$src2))),
3883           (COPY_TO_REGCLASS
3884             (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3885                         (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3886                           GR8:$mask, sub_8bit)), VK1WM),
3887             (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
3888             FR32X)>;
3889
3890 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
3891           (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3892            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
3893            (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
3894
3895 def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3896                            (f64 FR64X:$src1), (f64 FR64X:$src2))),
3897           (COPY_TO_REGCLASS
3898             (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3899                         (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3900                           GR8:$mask, sub_8bit)), VK1WM),
3901             (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
3902             FR64X)>;
3903
3904 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
3905           (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3906            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
3907            (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
3908
3909 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
3910           (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
3911            (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3912
3913 let hasSideEffects = 0 in {
3914   def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3915                            (ins VR128X:$src1, VR128X:$src2),
3916                            "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3917                            [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG,
3918                            FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>;
3919
3920 let Constraints = "$src0 = $dst" in
3921   def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3922                              (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
3923                                                    VR128X:$src1, VR128X:$src2),
3924                              "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
3925                                         "$dst {${mask}}, $src1, $src2}",
3926                              [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG,
3927                              FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>;
3928
3929   def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3930                          (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
3931                          "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3932                                     "$dst {${mask}} {z}, $src1, $src2}",
3933                          [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
3934                          FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>;
3935
3936   def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3937                            (ins VR128X:$src1, VR128X:$src2),
3938                            "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3939                            [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W,
3940                            FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>;
3941
3942 let Constraints = "$src0 = $dst" in
3943   def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3944                              (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
3945                                                    VR128X:$src1, VR128X:$src2),
3946                              "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
3947                                         "$dst {${mask}}, $src1, $src2}",
3948                              [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG,
3949                              VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>;
3950
3951   def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3952                               (ins f64x_info.KRCWM:$mask, VR128X:$src1,
3953                                                           VR128X:$src2),
3954                               "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3955                                          "$dst {${mask}} {z}, $src1, $src2}",
3956                               [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
3957                               VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>;
3958 }
3959
3960 let Predicates = [HasAVX512] in {
3961   let AddedComplexity = 15 in {
3962   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
3963             (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
3964   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
3965             (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
3966   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
3967             (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3968                        (COPY_TO_REGCLASS FR64X:$src, VR128))>;
3969   }
3970
3971   // Move low f32 and clear high bits.
3972   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
3973             (SUBREG_TO_REG (i32 0),
3974              (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
3975               (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
3976   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
3977             (SUBREG_TO_REG (i32 0),
3978              (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
3979               (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
3980   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
3981             (SUBREG_TO_REG (i32 0),
3982              (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
3983               (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
3984   def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
3985             (SUBREG_TO_REG (i32 0),
3986              (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
3987               (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
3988
3989   let AddedComplexity = 20 in {
3990   // MOVSSrm zeros the high parts of the register; represent this
3991   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3992   def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
3993             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3994   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
3995             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3996   def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
3997             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3998   def : Pat<(v4f32 (X86vzload addr:$src)),
3999             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4000
4001   // MOVSDrm zeros the high parts of the register; represent this
4002   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4003   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4004             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4005   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4006             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4007   def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4008             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4009   def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4010             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4011   def : Pat<(v2f64 (X86vzload addr:$src)),
4012             (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4013
4014   // Represent the same patterns above but in the form they appear for
4015   // 256-bit types
4016   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4017                    (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4018             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4019   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4020                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4021             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4022   def : Pat<(v8f32 (X86vzload addr:$src)),
4023             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4024   def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4025                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4026             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4027   def : Pat<(v4f64 (X86vzload addr:$src)),
4028             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4029
4030   // Represent the same patterns above but in the form they appear for
4031   // 512-bit types
4032   def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4033                    (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4034             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4035   def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4036                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4037             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4038   def : Pat<(v16f32 (X86vzload addr:$src)),
4039             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4040   def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4041                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4042             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4043   def : Pat<(v8f64 (X86vzload addr:$src)),
4044             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4045   }
4046   def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4047                    (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4048             (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4049
4050   // Move low f64 and clear high bits.
4051   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4052             (SUBREG_TO_REG (i32 0),
4053              (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4054                        (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
4055   def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4056             (SUBREG_TO_REG (i32 0),
4057              (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4058                        (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
4059
4060   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4061             (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4062                        (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
4063   def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4064             (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4065                        (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
4066
4067   // Extract and store.
4068   def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4069                    addr:$dst),
4070             (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4071
4072   // Shuffle with VMOVSS
4073   def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
4074             (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4075
4076   def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4077             (VMOVSSZrr VR128X:$src1,
4078                        (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
4079
4080   // Shuffle with VMOVSD
4081   def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
4082             (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4083
4084   def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4085             (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
4086
4087   def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
4088             (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4089   def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
4090             (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4091 }
4092
4093 let AddedComplexity = 15 in
4094 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4095                                 (ins VR128X:$src),
4096                                 "vmovq\t{$src, $dst|$dst, $src}",
4097                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
4098                                                    (v2i64 VR128X:$src))))],
4099                                 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
4100
4101 let Predicates = [HasAVX512] in {
4102   let AddedComplexity = 15 in {
4103     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4104               (VMOVDI2PDIZrr GR32:$src)>;
4105
4106     def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4107               (VMOV64toPQIZrr GR64:$src)>;
4108
4109     def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4110                                  (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4111               (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4112
4113     def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4114                                  (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4115               (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4116   }
4117   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4118   let AddedComplexity = 20 in {
4119     def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4120               (VMOVDI2PDIZrm addr:$src)>;
4121     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4122               (VMOVDI2PDIZrm addr:$src)>;
4123     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4124               (VMOVDI2PDIZrm addr:$src)>;
4125     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4126               (VMOVDI2PDIZrm addr:$src)>;
4127     def : Pat<(v4i32 (X86vzload addr:$src)),
4128               (VMOVDI2PDIZrm addr:$src)>;
4129     def : Pat<(v8i32 (X86vzload addr:$src)),
4130               (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4131     def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4132               (VMOVQI2PQIZrm addr:$src)>;
4133     def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4134               (VMOVZPQILo2PQIZrr VR128X:$src)>;
4135     def : Pat<(v2i64 (X86vzload addr:$src)),
4136               (VMOVQI2PQIZrm addr:$src)>;
4137     def : Pat<(v4i64 (X86vzload addr:$src)),
4138               (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4139   }
4140
4141   // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4142   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4143                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4144             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4145   def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4146                                 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4147             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4148
4149   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4150   def : Pat<(v16i32 (X86vzload addr:$src)),
4151             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4152   def : Pat<(v8i64 (X86vzload addr:$src)),
4153             (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4154 }
4155 //===----------------------------------------------------------------------===//
4156 // AVX-512 - Non-temporals
4157 //===----------------------------------------------------------------------===//
4158 let SchedRW = [WriteLoad] in {
4159   def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4160                         (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4161                         [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
4162                         EVEX_CD8<64, CD8VF>;
4163
4164   let Predicates = [HasVLX] in {
4165     def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4166                          (ins i256mem:$src),
4167                          "vmovntdqa\t{$src, $dst|$dst, $src}",
4168                          [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
4169                          EVEX_CD8<64, CD8VF>;
4170
4171     def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4172                         (ins i128mem:$src),
4173                         "vmovntdqa\t{$src, $dst|$dst, $src}",
4174                         [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
4175                         EVEX_CD8<64, CD8VF>;
4176   }
4177 }
4178
4179 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4180                         PatFrag st_frag = alignednontemporalstore,
4181                         InstrItinClass itin = IIC_SSE_MOVNT> {
4182   let SchedRW = [WriteStore], AddedComplexity = 400 in
4183   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4184                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4185                     [(st_frag (_.VT _.RC:$src), addr:$dst)],
4186                     _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4187 }
4188
4189 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4190                                                   AVX512VLVectorVTInfo VTInfo> {
4191   let Predicates = [HasAVX512] in
4192     defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
4193
4194   let Predicates = [HasAVX512, HasVLX] in {
4195     defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4196     defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
4197   }
4198 }
4199
4200 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4201 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4202 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
4203
4204 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4205   def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4206             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4207   def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4208             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4209   def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4210             (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4211
4212   def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4213             (VMOVNTDQAZrm addr:$src)>;
4214   def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4215             (VMOVNTDQAZrm addr:$src)>;
4216   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4217             (VMOVNTDQAZrm addr:$src)>;
4218 }
4219
4220 let Predicates = [HasVLX], AddedComplexity = 400 in {
4221   def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4222             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4223   def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4224             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4225   def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4226             (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4227
4228   def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4229             (VMOVNTDQAZ256rm addr:$src)>;
4230   def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4231             (VMOVNTDQAZ256rm addr:$src)>;
4232   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4233             (VMOVNTDQAZ256rm addr:$src)>;
4234
4235   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4236             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4237   def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4238             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4239   def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4240             (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4241
4242   def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4243             (VMOVNTDQAZ128rm addr:$src)>;
4244   def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4245             (VMOVNTDQAZ128rm addr:$src)>;
4246   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4247             (VMOVNTDQAZ128rm addr:$src)>;
4248 }
4249
4250 //===----------------------------------------------------------------------===//
4251 // AVX-512 - Integer arithmetic
4252 //
4253 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4254                            X86VectorVTInfo _, OpndItins itins,
4255                            bit IsCommutable = 0> {
4256   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4257                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4258                     "$src2, $src1", "$src1, $src2",
4259                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4260                     itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4261                     Sched<[itins.Sched]>;
4262
4263   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4264                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4265                   "$src2, $src1", "$src1, $src2",
4266                   (_.VT (OpNode _.RC:$src1,
4267                                 (bitconvert (_.LdFrag addr:$src2)))),
4268                   itins.rm>, AVX512BIBase, EVEX_4V,
4269                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
4270 }
4271
4272 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4273                             X86VectorVTInfo _, OpndItins itins,
4274                             bit IsCommutable = 0> :
4275            avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
4276   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4277                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4278                   "${src2}"##_.BroadcastStr##", $src1",
4279                   "$src1, ${src2}"##_.BroadcastStr,
4280                   (_.VT (OpNode _.RC:$src1,
4281                                 (X86VBroadcast
4282                                     (_.ScalarLdFrag addr:$src2)))),
4283                   itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4284                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
4285 }
4286
4287 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4288                               AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4289                               Predicate prd, bit IsCommutable = 0> {
4290   let Predicates = [prd] in
4291     defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4292                              IsCommutable>, EVEX_V512;
4293
4294   let Predicates = [prd, HasVLX] in {
4295     defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4296                              IsCommutable>, EVEX_V256;
4297     defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4298                              IsCommutable>, EVEX_V128;
4299   }
4300 }
4301
4302 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4303                                AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4304                                Predicate prd, bit IsCommutable = 0> {
4305   let Predicates = [prd] in
4306     defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4307                              IsCommutable>, EVEX_V512;
4308
4309   let Predicates = [prd, HasVLX] in {
4310     defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4311                              IsCommutable>, EVEX_V256;
4312     defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4313                              IsCommutable>, EVEX_V128;
4314   }
4315 }
4316
4317 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4318                                 OpndItins itins, Predicate prd,
4319                                 bit IsCommutable = 0> {
4320   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4321                                itins, prd, IsCommutable>,
4322                                VEX_W, EVEX_CD8<64, CD8VF>;
4323 }
4324
4325 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4326                                 OpndItins itins, Predicate prd,
4327                                 bit IsCommutable = 0> {
4328   defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4329                                itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4330 }
4331
4332 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4333                                 OpndItins itins, Predicate prd,
4334                                 bit IsCommutable = 0> {
4335   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4336                               itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4337                               VEX_WIG;
4338 }
4339
4340 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4341                                 OpndItins itins, Predicate prd,
4342                                 bit IsCommutable = 0> {
4343   defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4344                               itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4345                               VEX_WIG;
4346 }
4347
4348 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4349                                  SDNode OpNode, OpndItins itins, Predicate prd,
4350                                  bit IsCommutable = 0> {
4351   defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
4352                                    IsCommutable>;
4353
4354   defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
4355                                    IsCommutable>;
4356 }
4357
4358 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4359                                  SDNode OpNode, OpndItins itins, Predicate prd,
4360                                  bit IsCommutable = 0> {
4361   defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
4362                                    IsCommutable>;
4363
4364   defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
4365                                    IsCommutable>;
4366 }
4367
4368 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4369                                   bits<8> opc_d, bits<8> opc_q,
4370                                   string OpcodeStr, SDNode OpNode,
4371                                   OpndItins itins, bit IsCommutable = 0> {
4372   defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4373                                     itins, HasAVX512, IsCommutable>,
4374               avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4375                                     itins, HasBWI, IsCommutable>;
4376 }
4377
4378 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
4379                             SDNode OpNode,X86VectorVTInfo _Src,
4380                             X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4381                             bit IsCommutable = 0> {
4382   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4383                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4384                             "$src2, $src1","$src1, $src2",
4385                             (_Dst.VT (OpNode
4386                                          (_Src.VT _Src.RC:$src1),
4387                                          (_Src.VT _Src.RC:$src2))),
4388                             itins.rr, IsCommutable>,
4389                             AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
4390   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4391                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4392                         "$src2, $src1", "$src1, $src2",
4393                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4394                                       (bitconvert (_Src.LdFrag addr:$src2)))),
4395                         itins.rm>, AVX512BIBase, EVEX_4V,
4396                         Sched<[itins.Sched.Folded, ReadAfterLd]>;
4397
4398   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4399                     (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4400                     OpcodeStr,
4401                     "${src2}"##_Brdct.BroadcastStr##", $src1",
4402                      "$src1, ${src2}"##_Brdct.BroadcastStr,
4403                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4404                                  (_Brdct.VT (X86VBroadcast
4405                                           (_Brdct.ScalarLdFrag addr:$src2)))))),
4406                     itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4407                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
4408 }
4409
4410 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4411                                     SSE_INTALU_ITINS_P, 1>;
4412 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4413                                     SSE_INTALU_ITINS_P, 0>;
4414 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4415                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
4416 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4417                                     SSE_INTALU_ITINS_P, HasBWI, 0>;
4418 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4419                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4420 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4421                                      SSE_INTALU_ITINS_P, HasBWI, 0>;
4422 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4423                                     SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4424 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4425                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
4426 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4427                                     SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
4428 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
4429                                     HasBWI, 1>;
4430 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
4431                                      HasBWI, 1>;
4432 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
4433                                       HasBWI, 1>, T8PD;
4434 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4435                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
4436
4437 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
4438                             AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4439                             SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4440   let Predicates = [prd] in
4441     defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4442                                  _SrcVTInfo.info512, _DstVTInfo.info512,
4443                                  v8i64_info, IsCommutable>,
4444                                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4445   let Predicates = [HasVLX, prd] in {
4446     defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4447                                       _SrcVTInfo.info256, _DstVTInfo.info256,
4448                                       v4i64x_info, IsCommutable>,
4449                                       EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4450     defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4451                                       _SrcVTInfo.info128, _DstVTInfo.info128,
4452                                       v2i64x_info, IsCommutable>,
4453                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4454   }
4455 }
4456
4457 defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
4458                                 avx512vl_i32_info, avx512vl_i64_info,
4459                                 X86pmuldq, HasAVX512, 1>,T8PD;
4460 defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
4461                                 avx512vl_i32_info, avx512vl_i64_info,
4462                                 X86pmuludq, HasAVX512, 1>;
4463 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4464                                 avx512vl_i8_info, avx512vl_i8_info,
4465                                 X86multishift, HasVBMI, 0>, T8PD;
4466
4467 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4468                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4469                             OpndItins itins> {
4470   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4471                     (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4472                     OpcodeStr,
4473                     "${src2}"##_Src.BroadcastStr##", $src1",
4474                      "$src1, ${src2}"##_Src.BroadcastStr,
4475                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4476                                  (_Src.VT (X86VBroadcast
4477                                           (_Src.ScalarLdFrag addr:$src2)))))),
4478                     itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4479                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
4480 }
4481
4482 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4483                             SDNode OpNode,X86VectorVTInfo _Src,
4484                             X86VectorVTInfo _Dst, OpndItins itins,
4485                             bit IsCommutable = 0> {
4486   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4487                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4488                             "$src2, $src1","$src1, $src2",
4489                             (_Dst.VT (OpNode
4490                                          (_Src.VT _Src.RC:$src1),
4491                                          (_Src.VT _Src.RC:$src2))),
4492                             itins.rr, IsCommutable>,
4493                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>;
4494   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4495                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4496                         "$src2, $src1", "$src1, $src2",
4497                         (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4498                                       (bitconvert (_Src.LdFrag addr:$src2)))), itins.rm>,
4499                          EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4500                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
4501 }
4502
4503 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4504                                     SDNode OpNode> {
4505   let Predicates = [HasBWI] in
4506   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4507                                  v32i16_info, SSE_PACK>,
4508                 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4509                                  v32i16_info, SSE_PACK>, EVEX_V512;
4510   let Predicates = [HasBWI, HasVLX] in {
4511     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4512                                      v16i16x_info, SSE_PACK>,
4513                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4514                                      v16i16x_info, SSE_PACK>, EVEX_V256;
4515     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4516                                      v8i16x_info, SSE_PACK>,
4517                      avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4518                                      v8i16x_info, SSE_PACK>, EVEX_V128;
4519   }
4520 }
4521 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4522                             SDNode OpNode> {
4523   let Predicates = [HasBWI] in
4524   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
4525                                 v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG;
4526   let Predicates = [HasBWI, HasVLX] in {
4527     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4528                                     v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG;
4529     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4530                                     v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG;
4531   }
4532 }
4533
4534 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4535                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
4536                             AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4537   let Predicates = [HasBWI] in
4538   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4539                                 _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512;
4540   let Predicates = [HasBWI, HasVLX] in {
4541     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4542                                      _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256;
4543     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4544                                      _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128;
4545   }
4546 }
4547
4548 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4549 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4550 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4551 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4552
4553 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4554                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4555 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4556                      avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4557
4558 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4559                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4560 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4561                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4562 defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
4563                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4564
4565 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4566                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4567 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4568                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4569 defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
4570                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4571
4572 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4573                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4574 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4575                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4576 defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
4577                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4578
4579 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4580                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
4581 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4582                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4583 defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
4584                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4585
4586 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4587 let Predicates = [HasDQI, NoVLX] in {
4588   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4589             (EXTRACT_SUBREG
4590                 (VPMULLQZrr
4591                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4592                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4593              sub_ymm)>;
4594
4595   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4596             (EXTRACT_SUBREG
4597                 (VPMULLQZrr
4598                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4599                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4600              sub_xmm)>;
4601 }
4602
4603 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4604 let Predicates = [HasDQI, NoVLX] in {
4605   def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4606             (EXTRACT_SUBREG
4607                 (VPMULLQZrr
4608                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4609                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4610              sub_ymm)>;
4611
4612   def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4613             (EXTRACT_SUBREG
4614                 (VPMULLQZrr
4615                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4616                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4617              sub_xmm)>;
4618 }
4619
4620 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4621   def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4622             (EXTRACT_SUBREG
4623                 (Instr
4624                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4625                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4626              sub_ymm)>;
4627
4628   def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4629             (EXTRACT_SUBREG
4630                 (Instr
4631                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4632                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4633              sub_xmm)>;
4634 }
4635
4636 let Predicates = [HasAVX512] in {
4637   defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4638   defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4639   defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4640   defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4641 }
4642
4643 //===----------------------------------------------------------------------===//
4644 // AVX-512  Logical Instructions
4645 //===----------------------------------------------------------------------===//
4646
4647 // OpNodeMsk is the OpNode to use when element size is important. OpNode will
4648 // be set to null_frag for 32-bit elements.
4649 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
4650                            SDPatternOperator OpNode,
4651                            SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
4652                            bit IsCommutable = 0> {
4653   let hasSideEffects = 0 in
4654   defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4655                     (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4656                     "$src2, $src1", "$src1, $src2",
4657                     (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4658                                      (bitconvert (_.VT _.RC:$src2)))),
4659                     (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4660                                                           _.RC:$src2)))),
4661                     itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4662                     Sched<[itins.Sched]>;
4663
4664   let hasSideEffects = 0, mayLoad = 1 in
4665   defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4666                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4667                   "$src2, $src1", "$src1, $src2",
4668                   (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4669                                    (bitconvert (_.LdFrag addr:$src2)))),
4670                   (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4671                                      (bitconvert (_.LdFrag addr:$src2)))))),
4672                   itins.rm>, AVX512BIBase, EVEX_4V,
4673                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
4674 }
4675
4676 // OpNodeMsk is the OpNode to use where element size is important. So use
4677 // for all of the broadcast patterns.
4678 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
4679                             SDPatternOperator OpNode,
4680                             SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
4681                             bit IsCommutable = 0> :
4682            avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _,
4683                            IsCommutable> {
4684   defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4685                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4686                   "${src2}"##_.BroadcastStr##", $src1",
4687                   "$src1, ${src2}"##_.BroadcastStr,
4688                   (_.i64VT (OpNodeMsk _.RC:$src1,
4689                                    (bitconvert
4690                                     (_.VT (X86VBroadcast
4691                                             (_.ScalarLdFrag addr:$src2)))))),
4692                   (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4693                                      (bitconvert
4694                                       (_.VT (X86VBroadcast
4695                                              (_.ScalarLdFrag addr:$src2)))))))),
4696                   itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4697                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
4698 }
4699
4700 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
4701                                SDPatternOperator OpNode,
4702                                SDNode OpNodeMsk, OpndItins itins,
4703                                AVX512VLVectorVTInfo VTInfo,
4704                                bit IsCommutable = 0> {
4705   let Predicates = [HasAVX512] in
4706     defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4707                               VTInfo.info512, IsCommutable>, EVEX_V512;
4708
4709   let Predicates = [HasAVX512, HasVLX] in {
4710     defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4711                                  VTInfo.info256, IsCommutable>, EVEX_V256;
4712     defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4713                                  VTInfo.info128, IsCommutable>, EVEX_V128;
4714   }
4715 }
4716
4717 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4718                                  SDNode OpNode, OpndItins itins,
4719                                  bit IsCommutable = 0> {
4720   defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins,
4721                                avx512vl_i64_info, IsCommutable>,
4722                                VEX_W, EVEX_CD8<64, CD8VF>;
4723   defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins,
4724                                avx512vl_i32_info, IsCommutable>,
4725                                EVEX_CD8<32, CD8VF>;
4726 }
4727
4728 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>;
4729 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>;
4730 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>;
4731 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>;
4732
4733 //===----------------------------------------------------------------------===//
4734 // AVX-512  FP arithmetic
4735 //===----------------------------------------------------------------------===//
4736 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4737                          SDNode OpNode, SDNode VecNode, OpndItins itins,
4738                          bit IsCommutable> {
4739   let ExeDomain = _.ExeDomain in {
4740   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4741                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4742                            "$src2, $src1", "$src1, $src2",
4743                            (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4744                                           (i32 FROUND_CURRENT))),
4745                            itins.rr>, Sched<[itins.Sched]>;
4746
4747   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4748                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4749                          "$src2, $src1", "$src1, $src2",
4750                          (_.VT (VecNode _.RC:$src1,
4751                                         _.ScalarIntMemCPat:$src2,
4752                                         (i32 FROUND_CURRENT))),
4753                          itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4754   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4755   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4756                          (ins _.FRC:$src1, _.FRC:$src2),
4757                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4758                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4759                           itins.rr>, Sched<[itins.Sched]> {
4760     let isCommutable = IsCommutable;
4761   }
4762   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4763                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4764                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4765                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4766                          (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4767                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
4768   }
4769   }
4770 }
4771
4772 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4773                          SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
4774   let ExeDomain = _.ExeDomain in
4775   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4776                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4777                           "$rc, $src2, $src1", "$src1, $src2, $rc",
4778                           (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4779                           (i32 imm:$rc)), itins.rr, IsCommutable>,
4780                           EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
4781 }
4782 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4783                                 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4784                                 OpndItins itins, bit IsCommutable> {
4785   let ExeDomain = _.ExeDomain in {
4786   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4787                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4788                            "$src2, $src1", "$src1, $src2",
4789                            (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
4790                            itins.rr>, Sched<[itins.Sched]>;
4791
4792   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4793                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4794                          "$src2, $src1", "$src1, $src2",
4795                          (_.VT (VecNode _.RC:$src1,
4796                                         _.ScalarIntMemCPat:$src2)),
4797                          itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4798
4799   let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4800   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4801                          (ins _.FRC:$src1, _.FRC:$src2),
4802                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4803                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4804                           itins.rr>, Sched<[itins.Sched]> {
4805     let isCommutable = IsCommutable;
4806   }
4807   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4808                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4809                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4810                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4811                          (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4812                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
4813   }
4814
4815   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4816                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4817                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4818                             (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4819                             (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B,
4820                             Sched<[itins.Sched]>;
4821   }
4822 }
4823
4824 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4825                                   SDNode VecNode,
4826                                   SizeItins itins, bit IsCommutable> {
4827   defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4828                               itins.s, IsCommutable>,
4829              avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4830                               itins.s, IsCommutable>,
4831                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
4832   defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4833                               itins.d,                  IsCommutable>,
4834              avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4835                               itins.d, IsCommutable>,
4836                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4837 }
4838
4839 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
4840                                   SDNode VecNode, SDNode SaeNode,
4841                                   SizeItins itins, bit IsCommutable> {
4842   defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4843                               VecNode, SaeNode, itins.s, IsCommutable>,
4844                               XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
4845   defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4846                               VecNode, SaeNode, itins.d, IsCommutable>,
4847                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4848 }
4849 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4850 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4851 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4852 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4853 defm VMIN : avx512_binop_s_sae  <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
4854                                  SSE_ALU_ITINS_S, 0>;
4855 defm VMAX : avx512_binop_s_sae  <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
4856                                  SSE_ALU_ITINS_S, 0>;
4857
4858 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4859 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
4860 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4861                           X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
4862   let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
4863   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4864                          (ins _.FRC:$src1, _.FRC:$src2),
4865                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4866                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4867                           itins.rr>, Sched<[itins.Sched]> {
4868     let isCommutable = 1;
4869   }
4870   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4871                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4872                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4873                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4874                          (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4875                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
4876   }
4877 }
4878 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
4879                                 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4880                                 EVEX_CD8<32, CD8VT1>;
4881
4882 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
4883                                 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4884                                 EVEX_CD8<64, CD8VT1>;
4885
4886 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
4887                                 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4888                                 EVEX_CD8<32, CD8VT1>;
4889
4890 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
4891                                 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4892                                 EVEX_CD8<64, CD8VT1>;
4893
4894 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4895                             X86VectorVTInfo _, OpndItins itins,
4896                             bit IsCommutable> {
4897   let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
4898   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4899                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4900                   "$src2, $src1", "$src1, $src2",
4901                   (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
4902                   IsCommutable>, EVEX_4V, Sched<[itins.Sched]>;
4903   let mayLoad = 1 in {
4904     defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4905                     (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
4906                     "$src2, $src1", "$src1, $src2",
4907                     (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
4908                     EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4909     defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4910                      (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
4911                      "${src2}"##_.BroadcastStr##", $src1",
4912                      "$src1, ${src2}"##_.BroadcastStr,
4913                      (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
4914                                                 (_.ScalarLdFrag addr:$src2)))),
4915                      itins.rm>, EVEX_4V, EVEX_B,
4916                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
4917     }
4918   }
4919 }
4920
4921 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4922                                   OpndItins itins, X86VectorVTInfo _> {
4923   let ExeDomain = _.ExeDomain in
4924   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4925                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
4926                   "$rc, $src2, $src1", "$src1, $src2, $rc",
4927                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>,
4928                   EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
4929 }
4930
4931 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4932                                 OpndItins itins, X86VectorVTInfo _> {
4933   let ExeDomain = _.ExeDomain in
4934   defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4935                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4936                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4937                   (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>,
4938                   EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
4939 }
4940
4941 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4942                              Predicate prd, SizeItins itins,
4943                              bit IsCommutable = 0> {
4944   let Predicates = [prd] in {
4945   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
4946                               itins.s, IsCommutable>, EVEX_V512, PS,
4947                               EVEX_CD8<32, CD8VF>;
4948   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
4949                               itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
4950                               EVEX_CD8<64, CD8VF>;
4951   }
4952
4953     // Define only if AVX512VL feature is present.
4954   let Predicates = [prd, HasVLX] in {
4955     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
4956                                    itins.s, IsCommutable>, EVEX_V128, PS,
4957                                    EVEX_CD8<32, CD8VF>;
4958     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
4959                                    itins.s, IsCommutable>, EVEX_V256, PS,
4960                                    EVEX_CD8<32, CD8VF>;
4961     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
4962                                    itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
4963                                    EVEX_CD8<64, CD8VF>;
4964     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
4965                                    itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
4966                                    EVEX_CD8<64, CD8VF>;
4967   }
4968 }
4969
4970 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4971                                    SizeItins itins> {
4972   defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
4973                               EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
4974   defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
4975                               EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4976 }
4977
4978 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4979                                  SizeItins itins> {
4980   defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
4981                               EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
4982   defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
4983                               EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4984 }
4985
4986 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
4987                               SSE_ALU_ITINS_P, 1>,
4988             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>;
4989 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
4990                               SSE_MUL_ITINS_P, 1>,
4991             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>;
4992 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
4993             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>;
4994 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
4995             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>;
4996 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
4997                               SSE_ALU_ITINS_P, 0>,
4998             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>;
4999 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5000                               SSE_ALU_ITINS_P, 0>,
5001             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>;
5002 let isCodeGenOnly = 1 in {
5003   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5004                                  SSE_ALU_ITINS_P, 1>;
5005   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5006                                  SSE_ALU_ITINS_P, 1>;
5007 }
5008 defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5009                                SSE_ALU_ITINS_P, 1>;
5010 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5011                                SSE_ALU_ITINS_P, 0>;
5012 defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5013                                SSE_ALU_ITINS_P, 1>;
5014 defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5015                                SSE_ALU_ITINS_P, 1>;
5016
5017 // Patterns catch floating point selects with bitcasted integer logic ops.
5018 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5019                                       X86VectorVTInfo _, Predicate prd> {
5020 let Predicates = [prd] in {
5021   // Masked register-register logical operations.
5022   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5023                    (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5024                    _.RC:$src0)),
5025             (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5026              _.RC:$src1, _.RC:$src2)>;
5027   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5028                    (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5029                    _.ImmAllZerosV)),
5030             (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5031              _.RC:$src2)>;
5032   // Masked register-memory logical operations.
5033   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5034                    (bitconvert (_.i64VT (OpNode _.RC:$src1,
5035                                          (load addr:$src2)))),
5036                    _.RC:$src0)),
5037             (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5038              _.RC:$src1, addr:$src2)>;
5039   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5040                    (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5041                    _.ImmAllZerosV)),
5042             (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5043              addr:$src2)>;
5044   // Register-broadcast logical operations.
5045   def : Pat<(_.i64VT (OpNode _.RC:$src1,
5046                       (bitconvert (_.VT (X86VBroadcast
5047                                          (_.ScalarLdFrag addr:$src2)))))),
5048             (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5049   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5050                    (bitconvert
5051                     (_.i64VT (OpNode _.RC:$src1,
5052                               (bitconvert (_.VT
5053                                            (X86VBroadcast
5054                                             (_.ScalarLdFrag addr:$src2))))))),
5055                    _.RC:$src0)),
5056             (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5057              _.RC:$src1, addr:$src2)>;
5058   def : Pat<(_.VT (vselect _.KRCWM:$mask,
5059                    (bitconvert
5060                     (_.i64VT (OpNode _.RC:$src1,
5061                               (bitconvert (_.VT
5062                                            (X86VBroadcast
5063                                             (_.ScalarLdFrag addr:$src2))))))),
5064                    _.ImmAllZerosV)),
5065             (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5066              _.RC:$src1, addr:$src2)>;
5067 }
5068 }
5069
5070 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5071   defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5072   defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5073   defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5074   defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5075   defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5076   defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5077 }
5078
5079 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5080 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5081 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5082 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5083
5084 let Predicates = [HasVLX,HasDQI] in {
5085   // Use packed logical operations for scalar ops.
5086   def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5087             (COPY_TO_REGCLASS (VANDPDZ128rr
5088                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5089                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5090   def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5091             (COPY_TO_REGCLASS (VORPDZ128rr
5092                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5093                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5094   def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5095             (COPY_TO_REGCLASS (VXORPDZ128rr
5096                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5097                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5098   def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5099             (COPY_TO_REGCLASS (VANDNPDZ128rr
5100                                (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5101                                (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5102
5103   def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5104             (COPY_TO_REGCLASS (VANDPSZ128rr
5105                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5106                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5107   def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5108             (COPY_TO_REGCLASS (VORPSZ128rr
5109                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5110                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5111   def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5112             (COPY_TO_REGCLASS (VXORPSZ128rr
5113                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5114                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5115   def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5116             (COPY_TO_REGCLASS (VANDNPSZ128rr
5117                                (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5118                                (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5119 }
5120
5121 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5122                               OpndItins itins, X86VectorVTInfo _> {
5123   let ExeDomain = _.ExeDomain in {
5124   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5125                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5126                   "$src2, $src1", "$src1, $src2",
5127                   (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))),
5128                   itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
5129   defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5130                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5131                   "$src2, $src1", "$src1, $src2",
5132                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)),
5133                   itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5134   defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5135                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5136                    "${src2}"##_.BroadcastStr##", $src1",
5137                    "$src1, ${src2}"##_.BroadcastStr,
5138                    (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5139                                               (_.ScalarLdFrag addr:$src2))),
5140                                               (i32 FROUND_CURRENT)), itins.rm>,
5141                    EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5142   }
5143 }
5144
5145 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5146                                    OpndItins itins, X86VectorVTInfo _> {
5147   let ExeDomain = _.ExeDomain in {
5148   defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5149                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5150                   "$src2, $src1", "$src1, $src2",
5151                   (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>,
5152                   Sched<[itins.Sched]>;
5153   defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5154                   (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5155                   "$src2, $src1", "$src1, $src2",
5156                   (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5157                           (i32 FROUND_CURRENT)), itins.rm>,
5158                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
5159   }
5160 }
5161
5162 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
5163   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5164              avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5165                               EVEX_V512, EVEX_CD8<32, CD8VF>;
5166   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5167              avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5168                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5169   defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>,
5170                 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
5171                               EVEX_4V,EVEX_CD8<32, CD8VT1>;
5172   defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>,
5173                 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
5174                               EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5175
5176   // Define only if AVX512VL feature is present.
5177   let Predicates = [HasVLX] in {
5178     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>,
5179                                    EVEX_V128, EVEX_CD8<32, CD8VF>;
5180     defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>,
5181                                    EVEX_V256, EVEX_CD8<32, CD8VF>;
5182     defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>,
5183                                    EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5184     defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>,
5185                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5186   }
5187 }
5188 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
5189
5190 //===----------------------------------------------------------------------===//
5191 // AVX-512  VPTESTM instructions
5192 //===----------------------------------------------------------------------===//
5193
5194 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
5195                          OpndItins itins, X86VectorVTInfo _> {
5196   let ExeDomain = _.ExeDomain in {
5197   let isCommutable = 1 in
5198   defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5199                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5200                       "$src2, $src1", "$src1, $src2",
5201                    (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
5202                    EVEX_4V, Sched<[itins.Sched]>;
5203   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5204                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5205                        "$src2, $src1", "$src1, $src2",
5206                    (OpNode (_.VT _.RC:$src1),
5207                     (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>,
5208                    EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5209                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
5210   }
5211 }
5212
5213 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5214                             OpndItins itins, X86VectorVTInfo _> {
5215   let ExeDomain = _.ExeDomain in
5216   defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5217                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5218                     "${src2}"##_.BroadcastStr##", $src1",
5219                     "$src1, ${src2}"##_.BroadcastStr,
5220                     (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
5221                                                 (_.ScalarLdFrag addr:$src2)))),
5222                     itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5223                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
5224 }
5225
5226 // Use 512bit version to implement 128/256 bit in case NoVLX.
5227 multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
5228                                   X86VectorVTInfo _, string Suffix> {
5229     def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
5230               (_.KVT (COPY_TO_REGCLASS
5231                        (!cast<Instruction>(NAME # Suffix # "Zrr")
5232                          (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5233                                         _.RC:$src1, _.SubRegIdx),
5234                          (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5235                                         _.RC:$src2, _.SubRegIdx)),
5236                      _.KRC))>;
5237 }
5238
5239 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5240                                   OpndItins itins, AVX512VLVectorVTInfo _,
5241                                   string Suffix> {
5242   let Predicates  = [HasAVX512] in
5243   defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>,
5244            avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5245
5246   let Predicates = [HasAVX512, HasVLX] in {
5247   defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>,
5248               avx512_vptest_mb<opc, OpcodeStr, OpNode,itins,  _.info256>, EVEX_V256;
5249   defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>,
5250               avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
5251   }
5252   let Predicates = [HasAVX512, NoVLX] in {
5253   defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5254   defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
5255   }
5256 }
5257
5258 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
5259                             OpndItins itins> {
5260   defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
5261                                  avx512vl_i32_info, "D">;
5262   defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
5263                                  avx512vl_i64_info, "Q">, VEX_W;
5264 }
5265
5266 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5267                                  SDNode OpNode, OpndItins itins> {
5268   let Predicates = [HasBWI] in {
5269   defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>,
5270               EVEX_V512, VEX_W;
5271   defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>,
5272               EVEX_V512;
5273   }
5274   let Predicates = [HasVLX, HasBWI] in {
5275
5276   defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>,
5277               EVEX_V256, VEX_W;
5278   defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>,
5279               EVEX_V128, VEX_W;
5280   defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>,
5281               EVEX_V256;
5282   defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>,
5283               EVEX_V128;
5284   }
5285
5286   let Predicates = [HasAVX512, NoVLX] in {
5287   defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
5288   defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
5289   defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
5290   defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
5291   }
5292 }
5293
5294 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5295                                    SDNode OpNode, OpndItins itins> :
5296   avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
5297   avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
5298
5299 defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm,
5300                                          SSE_BIT_ITINS_P>, T8PD;
5301 defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
5302                                          SSE_BIT_ITINS_P>, T8XS;
5303
5304
5305 //===----------------------------------------------------------------------===//
5306 // AVX-512  Shift instructions
5307 //===----------------------------------------------------------------------===//
5308 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5309                             string OpcodeStr, SDNode OpNode, OpndItins itins,
5310                             X86VectorVTInfo _> {
5311   let ExeDomain = _.ExeDomain in {
5312   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5313                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5314                       "$src2, $src1", "$src1, $src2",
5315                    (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
5316                    itins.rr>, Sched<[itins.Sched]>;
5317   defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5318                    (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5319                        "$src2, $src1", "$src1, $src2",
5320                    (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5321                           (i8 imm:$src2))),
5322                    itins.rm>, Sched<[itins.Sched.Folded]>;
5323   }
5324 }
5325
5326 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5327                              string OpcodeStr, SDNode OpNode, OpndItins itins,
5328                              X86VectorVTInfo _> {
5329   let ExeDomain = _.ExeDomain in
5330   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5331                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5332       "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5333      (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
5334      itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5335 }
5336
5337 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5338                             OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5339                             X86VectorVTInfo _> {
5340    // src2 is always 128-bit
5341   let ExeDomain = _.ExeDomain in {
5342   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5343                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5344                       "$src2, $src1", "$src1, $src2",
5345                    (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
5346                    itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
5347   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5348                    (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5349                        "$src2, $src1", "$src1, $src2",
5350                    (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
5351                    itins.rm>, AVX512BIBase,
5352                    EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5353   }
5354 }
5355
5356 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5357                               OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5358                               AVX512VLVectorVTInfo VTInfo, Predicate prd> {
5359   let Predicates = [prd] in
5360   defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5361                             VTInfo.info512>, EVEX_V512,
5362                             EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5363   let Predicates = [prd, HasVLX] in {
5364   defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5365                             VTInfo.info256>, EVEX_V256,
5366                             EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5367   defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5368                             VTInfo.info128>, EVEX_V128,
5369                             EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5370   }
5371 }
5372
5373 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5374                               string OpcodeStr, SDNode OpNode,
5375                               OpndItins itins> {
5376   defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
5377                               bc_v4i32, avx512vl_i32_info, HasAVX512>;
5378   defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
5379                               bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5380   defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
5381                               bc_v2i64, avx512vl_i16_info, HasBWI>;
5382 }
5383
5384 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5385                                   string OpcodeStr, SDNode OpNode,
5386                                   OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
5387   let Predicates = [HasAVX512] in
5388   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
5389                               VTInfo.info512>,
5390              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5391                               VTInfo.info512>, EVEX_V512;
5392   let Predicates = [HasAVX512, HasVLX] in {
5393   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
5394                               VTInfo.info256>,
5395              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5396                               VTInfo.info256>, EVEX_V256;
5397   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5398                               itins, VTInfo.info128>,
5399              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5400                               VTInfo.info128>, EVEX_V128;
5401   }
5402 }
5403
5404 multiclass avx512_shift_rmi_w<bits<8> opcw,
5405                                  Format ImmFormR, Format ImmFormM,
5406                                  string OpcodeStr, SDNode OpNode,
5407                                  OpndItins itins> {
5408   let Predicates = [HasBWI] in
5409   defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5410                                itins, v32i16_info>, EVEX_V512, VEX_WIG;
5411   let Predicates = [HasVLX, HasBWI] in {
5412   defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5413                                itins, v16i16x_info>, EVEX_V256, VEX_WIG;
5414   defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5415                                itins, v8i16x_info>, EVEX_V128, VEX_WIG;
5416   }
5417 }
5418
5419 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5420                                  Format ImmFormR, Format ImmFormM,
5421                                  string OpcodeStr, SDNode OpNode, OpndItins itins> {
5422   defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5423                                  itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5424   defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5425                                  itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5426 }
5427
5428 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5429                                  SSE_INTSHIFT_P>,
5430              avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5431                                 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5432
5433 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5434                                  SSE_INTSHIFT_P>,
5435              avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5436                                 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5437
5438 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5439                                  SSE_INTSHIFT_P>,
5440              avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5441                                 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5442
5443 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5444                                  SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5445 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5446                                  SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5447
5448 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
5449 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
5450 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
5451
5452 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5453 let Predicates = [HasAVX512, NoVLX] in {
5454   def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5455             (EXTRACT_SUBREG (v8i64
5456               (VPSRAQZrr
5457                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5458                  VR128X:$src2)), sub_ymm)>;
5459
5460   def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5461             (EXTRACT_SUBREG (v8i64
5462               (VPSRAQZrr
5463                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5464                  VR128X:$src2)), sub_xmm)>;
5465
5466   def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5467             (EXTRACT_SUBREG (v8i64
5468               (VPSRAQZri
5469                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5470                  imm:$src2)), sub_ymm)>;
5471
5472   def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5473             (EXTRACT_SUBREG (v8i64
5474               (VPSRAQZri
5475                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5476                  imm:$src2)), sub_xmm)>;
5477 }
5478
5479 //===-------------------------------------------------------------------===//
5480 // Variable Bit Shifts
5481 //===-------------------------------------------------------------------===//
5482 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5483                             OpndItins itins, X86VectorVTInfo _> {
5484   let ExeDomain = _.ExeDomain in {
5485   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5486                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5487                       "$src2, $src1", "$src1, $src2",
5488                    (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
5489                    itins.rr>, AVX5128IBase, EVEX_4V,
5490                    Sched<[itins.Sched]>;
5491   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5492                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5493                        "$src2, $src1", "$src1, $src2",
5494                    (_.VT (OpNode _.RC:$src1,
5495                    (_.VT (bitconvert (_.LdFrag addr:$src2))))),
5496                    itins.rm>, AVX5128IBase, EVEX_4V,
5497                    EVEX_CD8<_.EltSize, CD8VF>,
5498                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
5499   }
5500 }
5501
5502 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5503                                OpndItins itins, X86VectorVTInfo _> {
5504   let ExeDomain = _.ExeDomain in
5505   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5506                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5507                     "${src2}"##_.BroadcastStr##", $src1",
5508                     "$src1, ${src2}"##_.BroadcastStr,
5509                     (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5510                                                 (_.ScalarLdFrag addr:$src2))))),
5511                     itins.rm>, AVX5128IBase, EVEX_B,
5512                     EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5513                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
5514 }
5515
5516 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5517                                   OpndItins itins, AVX512VLVectorVTInfo _> {
5518   let Predicates  = [HasAVX512] in
5519   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5520            avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5521
5522   let Predicates = [HasAVX512, HasVLX] in {
5523   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5524               avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5525   defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5526               avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
5527   }
5528 }
5529
5530 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5531                                  SDNode OpNode, OpndItins itins> {
5532   defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
5533                                  avx512vl_i32_info>;
5534   defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
5535                                  avx512vl_i64_info>, VEX_W;
5536 }
5537
5538 // Use 512bit version to implement 128/256 bit in case NoVLX.
5539 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5540                                      SDNode OpNode, list<Predicate> p> {
5541   let Predicates = p in {
5542   def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5543                                   (_.info256.VT _.info256.RC:$src2))),
5544             (EXTRACT_SUBREG
5545                 (!cast<Instruction>(OpcodeStr#"Zrr")
5546                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5547                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5548              sub_ymm)>;
5549
5550   def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5551                                   (_.info128.VT _.info128.RC:$src2))),
5552             (EXTRACT_SUBREG
5553                 (!cast<Instruction>(OpcodeStr#"Zrr")
5554                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5555                     (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5556              sub_xmm)>;
5557   }
5558 }
5559 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
5560                               SDNode OpNode, OpndItins itins> {
5561   let Predicates = [HasBWI] in
5562   defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
5563               EVEX_V512, VEX_W;
5564   let Predicates = [HasVLX, HasBWI] in {
5565
5566   defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
5567               EVEX_V256, VEX_W;
5568   defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
5569               EVEX_V128, VEX_W;
5570   }
5571 }
5572
5573 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
5574               avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
5575
5576 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
5577               avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
5578
5579 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
5580               avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
5581
5582 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
5583 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
5584
5585 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5586 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5587 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5588 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5589
5590 // Special handing for handling VPSRAV intrinsics.
5591 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5592                                          list<Predicate> p> {
5593   let Predicates = p in {
5594     def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5595               (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5596                _.RC:$src2)>;
5597     def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5598               (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5599                _.RC:$src1, addr:$src2)>;
5600     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5601                      (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5602               (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5603                _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5604     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5605                      (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5606                      _.RC:$src0)),
5607               (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5608                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5609     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5610                      (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5611               (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5612                _.RC:$src1, _.RC:$src2)>;
5613     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5614                      (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5615                      _.ImmAllZerosV)),
5616               (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5617                _.RC:$src1, addr:$src2)>;
5618   }
5619 }
5620
5621 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5622                                          list<Predicate> p> :
5623            avx512_var_shift_int_lowering<InstrStr, _, p> {
5624   let Predicates = p in {
5625     def : Pat<(_.VT (X86vsrav _.RC:$src1,
5626                      (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5627               (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5628                _.RC:$src1, addr:$src2)>;
5629     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5630                      (X86vsrav _.RC:$src1,
5631                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5632                      _.RC:$src0)),
5633               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5634                _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5635     def : Pat<(_.VT (vselect _.KRCWM:$mask,
5636                      (X86vsrav _.RC:$src1,
5637                       (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5638                      _.ImmAllZerosV)),
5639               (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5640                _.RC:$src1, addr:$src2)>;
5641   }
5642 }
5643
5644 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5645 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5646 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5647 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5648 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5649 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5650 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5651 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5652 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5653
5654
5655 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5656 let Predicates = [HasAVX512, NoVLX] in {
5657   def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5658             (EXTRACT_SUBREG (v8i64
5659               (VPROLVQZrr
5660                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5661                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5662                        sub_xmm)>;
5663   def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5664             (EXTRACT_SUBREG (v8i64
5665               (VPROLVQZrr
5666                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5667                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5668                        sub_ymm)>;
5669
5670   def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5671             (EXTRACT_SUBREG (v16i32
5672               (VPROLVDZrr
5673                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5674                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5675                         sub_xmm)>;
5676   def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5677             (EXTRACT_SUBREG (v16i32
5678               (VPROLVDZrr
5679                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5680                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5681                         sub_ymm)>;
5682
5683   def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5684             (EXTRACT_SUBREG (v8i64
5685               (VPROLQZri
5686                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5687                         imm:$src2)), sub_xmm)>;
5688   def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5689             (EXTRACT_SUBREG (v8i64
5690               (VPROLQZri
5691                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5692                        imm:$src2)), sub_ymm)>;
5693
5694   def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5695             (EXTRACT_SUBREG (v16i32
5696               (VPROLDZri
5697                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5698                         imm:$src2)), sub_xmm)>;
5699   def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5700             (EXTRACT_SUBREG (v16i32
5701               (VPROLDZri
5702                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5703                         imm:$src2)), sub_ymm)>;
5704 }
5705
5706 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5707 let Predicates = [HasAVX512, NoVLX] in {
5708   def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5709             (EXTRACT_SUBREG (v8i64
5710               (VPRORVQZrr
5711                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5712                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5713                        sub_xmm)>;
5714   def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5715             (EXTRACT_SUBREG (v8i64
5716               (VPRORVQZrr
5717                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5718                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5719                        sub_ymm)>;
5720
5721   def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5722             (EXTRACT_SUBREG (v16i32
5723               (VPRORVDZrr
5724                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5725                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5726                         sub_xmm)>;
5727   def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5728             (EXTRACT_SUBREG (v16i32
5729               (VPRORVDZrr
5730                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5731                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5732                         sub_ymm)>;
5733
5734   def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5735             (EXTRACT_SUBREG (v8i64
5736               (VPRORQZri
5737                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5738                         imm:$src2)), sub_xmm)>;
5739   def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5740             (EXTRACT_SUBREG (v8i64
5741               (VPRORQZri
5742                 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5743                        imm:$src2)), sub_ymm)>;
5744
5745   def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5746             (EXTRACT_SUBREG (v16i32
5747               (VPRORDZri
5748                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5749                         imm:$src2)), sub_xmm)>;
5750   def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5751             (EXTRACT_SUBREG (v16i32
5752               (VPRORDZri
5753                 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5754                         imm:$src2)), sub_ymm)>;
5755 }
5756
5757 //===-------------------------------------------------------------------===//
5758 // 1-src variable permutation VPERMW/D/Q
5759 //===-------------------------------------------------------------------===//
5760 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5761                                  OpndItins itins, AVX512VLVectorVTInfo _> {
5762   let Predicates  = [HasAVX512] in
5763   defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5764            avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5765
5766   let Predicates = [HasAVX512, HasVLX] in
5767   defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5768               avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5769 }
5770
5771 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5772                                  string OpcodeStr, SDNode OpNode,
5773                                  OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
5774   let Predicates = [HasAVX512] in
5775   defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5776                               itins, VTInfo.info512>,
5777              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5778                                itins, VTInfo.info512>, EVEX_V512;
5779   let Predicates = [HasAVX512, HasVLX] in
5780   defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5781                               itins, VTInfo.info256>,
5782              avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5783                                itins, VTInfo.info256>, EVEX_V256;
5784 }
5785
5786 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5787                               Predicate prd, SDNode OpNode,
5788                               OpndItins itins, AVX512VLVectorVTInfo _> {
5789   let Predicates = [prd] in
5790   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5791               EVEX_V512 ;
5792   let Predicates = [HasVLX, prd] in {
5793   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5794               EVEX_V256 ;
5795   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5796               EVEX_V128 ;
5797   }
5798 }
5799
5800 defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
5801                                AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
5802 defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
5803                                AVX2_PERMV_I, avx512vl_i8_info>;
5804
5805 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
5806                                     AVX2_PERMV_I, avx512vl_i32_info>;
5807 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
5808                                     AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
5809 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
5810                                      AVX2_PERMV_F, avx512vl_f32_info>;
5811 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
5812                                      AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
5813
5814 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
5815                              X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
5816                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5817 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
5818                              X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
5819                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5820 //===----------------------------------------------------------------------===//
5821 // AVX-512 - VPERMIL
5822 //===----------------------------------------------------------------------===//
5823
5824 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
5825                              OpndItins itins, X86VectorVTInfo _,
5826                              X86VectorVTInfo Ctrl> {
5827   defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
5828                   (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
5829                   "$src2, $src1", "$src1, $src2",
5830                   (_.VT (OpNode _.RC:$src1,
5831                                (Ctrl.VT Ctrl.RC:$src2))), itins.rr>,
5832                   T8PD, EVEX_4V, Sched<[itins.Sched]>;
5833   defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5834                   (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
5835                   "$src2, $src1", "$src1, $src2",
5836                   (_.VT (OpNode
5837                            _.RC:$src1,
5838                            (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))),
5839                   itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5840                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
5841   defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5842                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5843                    "${src2}"##_.BroadcastStr##", $src1",
5844                    "$src1, ${src2}"##_.BroadcastStr,
5845                    (_.VT (OpNode
5846                             _.RC:$src1,
5847                             (Ctrl.VT (X86VBroadcast
5848                                        (Ctrl.ScalarLdFrag addr:$src2))))),
5849                    itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
5850                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
5851 }
5852
5853 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
5854                                     OpndItins itins, AVX512VLVectorVTInfo _,
5855                                     AVX512VLVectorVTInfo Ctrl> {
5856   let Predicates = [HasAVX512] in {
5857     defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5858                                   _.info512, Ctrl.info512>, EVEX_V512;
5859   }
5860   let Predicates = [HasAVX512, HasVLX] in {
5861     defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5862                                   _.info128, Ctrl.info128>, EVEX_V128;
5863     defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5864                                   _.info256, Ctrl.info256>, EVEX_V256;
5865   }
5866 }
5867
5868 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
5869                          AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
5870   defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
5871   defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
5872                                     X86VPermilpi, AVX_VPERMILV, _>,
5873                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
5874 }
5875
5876 let ExeDomain = SSEPackedSingle in
5877 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
5878                                avx512vl_i32_info>;
5879 let ExeDomain = SSEPackedDouble in
5880 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
5881                                avx512vl_i64_info>, VEX_W;
5882
5883 //===----------------------------------------------------------------------===//
5884 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
5885 //===----------------------------------------------------------------------===//
5886
5887 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
5888                              X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
5889                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
5890 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
5891                                   X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
5892 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
5893                                   X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
5894
5895 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5896                                OpndItins itins> {
5897   let Predicates = [HasBWI] in
5898   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
5899
5900   let Predicates = [HasVLX, HasBWI] in {
5901   defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
5902   defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
5903   }
5904 }
5905
5906 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
5907
5908 //===----------------------------------------------------------------------===//
5909 // Move Low to High and High to Low packed FP Instructions
5910 //===----------------------------------------------------------------------===//
5911 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
5912           (ins VR128X:$src1, VR128X:$src2),
5913           "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5914           [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
5915            IIC_SSE_MOV_LH>, EVEX_4V;
5916 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
5917           (ins VR128X:$src1, VR128X:$src2),
5918           "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5919           [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
5920           IIC_SSE_MOV_LH>, EVEX_4V;
5921
5922 //===----------------------------------------------------------------------===//
5923 // VMOVHPS/PD VMOVLPS Instructions
5924 // All patterns was taken from SSS implementation.
5925 //===----------------------------------------------------------------------===//
5926 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
5927                                   X86VectorVTInfo _> {
5928   let ExeDomain = _.ExeDomain in
5929   def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
5930                   (ins _.RC:$src1, f64mem:$src2),
5931                   !strconcat(OpcodeStr,
5932                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5933                   [(set _.RC:$dst,
5934                      (OpNode _.RC:$src1,
5935                        (_.VT (bitconvert
5936                          (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
5937                   IIC_SSE_MOV_LH>, EVEX_4V;
5938 }
5939
5940 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
5941                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5942 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
5943                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5944 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
5945                                   v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5946 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
5947                                   v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5948
5949 let Predicates = [HasAVX512] in {
5950   // VMOVHPS patterns
5951   def : Pat<(X86Movlhps VR128X:$src1,
5952                (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
5953           (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5954   def : Pat<(X86Movlhps VR128X:$src1,
5955                (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
5956           (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5957   // VMOVHPD patterns
5958   def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
5959                     (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
5960            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
5961   // VMOVLPS patterns
5962   def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
5963           (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
5964   // VMOVLPD patterns
5965   def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
5966           (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
5967   def : Pat<(v2f64 (X86Movsd VR128X:$src1,
5968                            (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
5969           (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
5970 }
5971
5972 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
5973                        (ins f64mem:$dst, VR128X:$src),
5974                        "vmovhps\t{$src, $dst|$dst, $src}",
5975                        [(store (f64 (extractelt
5976                                      (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
5977                                                 (bc_v2f64 (v4f32 VR128X:$src))),
5978                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5979                        EVEX, EVEX_CD8<32, CD8VT2>;
5980 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
5981                        (ins f64mem:$dst, VR128X:$src),
5982                        "vmovhpd\t{$src, $dst|$dst, $src}",
5983                        [(store (f64 (extractelt
5984                                      (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
5985                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5986                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
5987 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
5988                        (ins f64mem:$dst, VR128X:$src),
5989                        "vmovlps\t{$src, $dst|$dst, $src}",
5990                        [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
5991                                      (iPTR 0))), addr:$dst)],
5992                                      IIC_SSE_MOV_LH>,
5993                        EVEX, EVEX_CD8<32, CD8VT2>;
5994 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
5995                        (ins f64mem:$dst, VR128X:$src),
5996                        "vmovlpd\t{$src, $dst|$dst, $src}",
5997                        [(store (f64 (extractelt (v2f64 VR128X:$src),
5998                                      (iPTR 0))), addr:$dst)],
5999                                      IIC_SSE_MOV_LH>,
6000                        EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6001
6002 let Predicates = [HasAVX512] in {
6003   // VMOVHPD patterns
6004   def : Pat<(store (f64 (extractelt
6005                            (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6006                            (iPTR 0))), addr:$dst),
6007            (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6008   // VMOVLPS patterns
6009   def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
6010                    addr:$src1),
6011             (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
6012   // VMOVLPD patterns
6013   def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6014                    addr:$src1),
6015             (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
6016 }
6017 //===----------------------------------------------------------------------===//
6018 // FMA - Fused Multiply Operations
6019 //
6020
6021 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6022                                X86VectorVTInfo _, string Suff> {
6023   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6024   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6025           (ins _.RC:$src2, _.RC:$src3),
6026           OpcodeStr, "$src3, $src2", "$src2, $src3",
6027           (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>,
6028           AVX512FMA3Base, Sched<[WriteFMA]>;
6029
6030   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6031           (ins _.RC:$src2, _.MemOp:$src3),
6032           OpcodeStr, "$src3, $src2", "$src2, $src3",
6033           (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6034           NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6035
6036   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6037             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6038             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6039             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6040             (OpNode _.RC:$src2,
6041              _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))),
6042              NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
6043              Sched<[WriteFMALd, ReadAfterLd]>;
6044   }
6045 }
6046
6047 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6048                                  X86VectorVTInfo _, string Suff> {
6049   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6050   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6051           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6052           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6053           (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))),
6054           NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6055 }
6056
6057 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6058                                    SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6059                                    string Suff> {
6060   let Predicates = [HasAVX512] in {
6061     defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6062                   avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6063                       Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6064   }
6065   let Predicates = [HasVLX, HasAVX512] in {
6066     defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6067                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6068     defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6069                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6070   }
6071 }
6072
6073 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6074                               SDNode OpNodeRnd > {
6075     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6076                                       avx512vl_f32_info, "PS">;
6077     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6078                                       avx512vl_f64_info, "PD">, VEX_W;
6079 }
6080
6081 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6082 defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6083 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6084 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6085 defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6086 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6087
6088
6089 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6090                                X86VectorVTInfo _, string Suff> {
6091   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6092   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6093           (ins _.RC:$src2, _.RC:$src3),
6094           OpcodeStr, "$src3, $src2", "$src2, $src3",
6095           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1,
6096           vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
6097
6098   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6099           (ins _.RC:$src2, _.MemOp:$src3),
6100           OpcodeStr, "$src3, $src2", "$src2, $src3",
6101           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6102           NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6103
6104   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6105          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6106          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6107          "$src2, ${src3}"##_.BroadcastStr,
6108          (_.VT (OpNode _.RC:$src2,
6109                       (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6110                       _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
6111          Sched<[WriteFMALd, ReadAfterLd]>;
6112   }
6113 }
6114
6115 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6116                                  X86VectorVTInfo _, string Suff> {
6117   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6118   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6119           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6120           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6121           (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6122           NoItinerary, 1, 1, vselect, 1>,
6123           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6124 }
6125
6126 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6127                                    SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6128                                    string Suff> {
6129   let Predicates = [HasAVX512] in {
6130     defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6131                   avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6132                       Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6133   }
6134   let Predicates = [HasVLX, HasAVX512] in {
6135     defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6136                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6137     defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6138                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6139   }
6140 }
6141
6142 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6143                               SDNode OpNodeRnd > {
6144     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6145                                       avx512vl_f32_info, "PS">;
6146     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6147                                       avx512vl_f64_info, "PD">, VEX_W;
6148 }
6149
6150 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6151 defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6152 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6153 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6154 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6155 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6156
6157 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6158                                X86VectorVTInfo _, string Suff> {
6159   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6160   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6161           (ins _.RC:$src2, _.RC:$src3),
6162           OpcodeStr, "$src3, $src2", "$src2, $src3",
6163           (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary,
6164           1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
6165
6166   // Pattern is 312 order so that the load is in a different place from the
6167   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6168   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6169           (ins _.RC:$src2, _.MemOp:$src3),
6170           OpcodeStr, "$src3, $src2", "$src2, $src3",
6171           (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6172           NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6173
6174   // Pattern is 312 order so that the load is in a different place from the
6175   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6176   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6177          (ins _.RC:$src2, _.ScalarMemOp:$src3),
6178          OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6179          "$src2, ${src3}"##_.BroadcastStr,
6180          (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6181                        _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>,
6182          AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>;
6183   }
6184 }
6185
6186 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6187                                  X86VectorVTInfo _, string Suff> {
6188   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6189   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6190           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6191           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6192           (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6193           NoItinerary, 1, 1, vselect, 1>,
6194           AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6195 }
6196
6197 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6198                                    SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6199                                    string Suff> {
6200   let Predicates = [HasAVX512] in {
6201     defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6202                   avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6203                       Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6204   }
6205   let Predicates = [HasVLX, HasAVX512] in {
6206     defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6207                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6208     defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6209                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6210   }
6211 }
6212
6213 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6214                               SDNode OpNodeRnd > {
6215     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6216                                       avx512vl_f32_info, "PS">;
6217     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6218                                       avx512vl_f64_info, "PD">, VEX_W;
6219 }
6220
6221 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6222 defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6223 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6224 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6225 defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6226 defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6227
6228 // Scalar FMA
6229 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6230                                dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
6231                                dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
6232 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6233   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6234           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6235           "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>,
6236           AVX512FMA3Base, Sched<[WriteFMA]>;
6237
6238   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6239           (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6240           "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>,
6241           AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6242
6243   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6244          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6245          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb,
6246          NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC,
6247          Sched<[WriteFMA]>;
6248
6249   let isCodeGenOnly = 1, isCommutable = 1 in {
6250     def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6251                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6252                      !strconcat(OpcodeStr,
6253                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6254                      !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>;
6255     def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6256                     (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6257                     !strconcat(OpcodeStr,
6258                                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6259                     [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>;
6260   }// isCodeGenOnly = 1
6261 }// Constraints = "$src1 = $dst"
6262 }
6263
6264 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6265                             string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6266                             SDNode OpNodeRnds1, SDNode OpNodes3,
6267                             SDNode OpNodeRnds3, X86VectorVTInfo _,
6268                             string SUFF> {
6269   let ExeDomain = _.ExeDomain in {
6270   defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6271                 // Operands for intrinsic are in 123 order to preserve passthu
6272                 // semantics.
6273                 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6274                 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6275                          _.ScalarIntMemCPat:$src3)),
6276                 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
6277                          (i32 imm:$rc))),
6278                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6279                          _.FRC:$src3))),
6280                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6281                          (_.ScalarLdFrag addr:$src3)))), 0>;
6282
6283   defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6284                 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6285                 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6286                               _.RC:$src1)),
6287                 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
6288                                   (i32 imm:$rc))),
6289                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6290                                           _.FRC:$src1))),
6291                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6292                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
6293
6294   // One pattern is 312 order so that the load is in a different place from the
6295   // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6296   defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6297                 (null_frag),
6298                 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6299                               _.RC:$src2)),
6300                 (null_frag),
6301                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6302                          _.FRC:$src2))),
6303                 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6304                                  _.FRC:$src1, _.FRC:$src2))), 1>;
6305   }
6306 }
6307
6308 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6309                         string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6310                         SDNode OpNodeRnds1, SDNode OpNodes3,
6311                         SDNode OpNodeRnds3> {
6312   let Predicates = [HasAVX512] in {
6313     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6314                                  OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6315                                  f32x_info, "SS">,
6316                                  EVEX_CD8<32, CD8VT1>, VEX_LIG;
6317     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6318                                  OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6319                                  f64x_info, "SD">,
6320                                  EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6321   }
6322 }
6323
6324 defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6325                             X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6326 defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6327                             X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6328 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6329                             X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6330 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6331                             X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
6332
6333 //===----------------------------------------------------------------------===//
6334 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6335 //===----------------------------------------------------------------------===//
6336 let Constraints = "$src1 = $dst" in {
6337 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6338                              OpndItins itins, X86VectorVTInfo _> {
6339   // NOTE: The SDNode have the multiply operands first with the add last.
6340   // This enables commuted load patterns to be autogenerated by tablegen.
6341   let ExeDomain = _.ExeDomain in {
6342   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6343           (ins _.RC:$src2, _.RC:$src3),
6344           OpcodeStr, "$src3, $src2", "$src2, $src3",
6345           (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>,
6346          AVX512FMA3Base, Sched<[itins.Sched]>;
6347
6348   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6349           (ins _.RC:$src2, _.MemOp:$src3),
6350           OpcodeStr, "$src3, $src2", "$src2, $src3",
6351           (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6352           itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6353
6354   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6355             (ins _.RC:$src2, _.ScalarMemOp:$src3),
6356             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6357             !strconcat("$src2, ${src3}", _.BroadcastStr ),
6358             (OpNode _.RC:$src2,
6359                     (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6360                     _.RC:$src1), itins.rm>,
6361             AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6362   }
6363 }
6364 } // Constraints = "$src1 = $dst"
6365
6366 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6367                                  OpndItins itins, AVX512VLVectorVTInfo _> {
6368   let Predicates = [HasIFMA] in {
6369     defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>,
6370                       EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6371   }
6372   let Predicates = [HasVLX, HasIFMA] in {
6373     defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>,
6374                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6375     defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>,
6376                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6377   }
6378 }
6379
6380 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6381                                   SSE_PMADD, avx512vl_i64_info>, VEX_W;
6382 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6383                                   SSE_PMADD, avx512vl_i64_info>, VEX_W;
6384
6385 //===----------------------------------------------------------------------===//
6386 // AVX-512  Scalar convert from sign integer to float/double
6387 //===----------------------------------------------------------------------===//
6388
6389 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins,
6390                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
6391                     X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6392   let hasSideEffects = 0 in {
6393     def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6394               (ins DstVT.FRC:$src1, SrcRC:$src),
6395               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6396               itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
6397     let mayLoad = 1 in
6398       def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6399               (ins DstVT.FRC:$src1, x86memop:$src),
6400               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6401               itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6402   } // hasSideEffects = 0
6403   let isCodeGenOnly = 1 in {
6404     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6405                   (ins DstVT.RC:$src1, SrcRC:$src2),
6406                   !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6407                   [(set DstVT.RC:$dst,
6408                         (OpNode (DstVT.VT DstVT.RC:$src1),
6409                                  SrcRC:$src2,
6410                                  (i32 FROUND_CURRENT)))], itins.rr>,
6411                  EVEX_4V, Sched<[itins.Sched]>;
6412
6413     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6414                   (ins DstVT.RC:$src1, x86memop:$src2),
6415                   !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6416                   [(set DstVT.RC:$dst,
6417                         (OpNode (DstVT.VT DstVT.RC:$src1),
6418                                  (ld_frag addr:$src2),
6419                                  (i32 FROUND_CURRENT)))], itins.rm>,
6420                   EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6421   }//isCodeGenOnly = 1
6422 }
6423
6424 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins,
6425                     RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> {
6426   def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6427               (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
6428               !strconcat(asm,
6429                   "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
6430               [(set DstVT.RC:$dst,
6431                     (OpNode (DstVT.VT DstVT.RC:$src1),
6432                              SrcRC:$src2,
6433                              (i32 imm:$rc)))], itins.rr>,
6434               EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
6435 }
6436
6437 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins,
6438                     RegisterClass SrcRC, X86VectorVTInfo DstVT,
6439                     X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6440   defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>,
6441               avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop,
6442                             ld_frag, asm>, VEX_LIG;
6443 }
6444
6445 let Predicates = [HasAVX512] in {
6446 defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32,
6447                                  v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6448                                  XS, EVEX_CD8<32, CD8VT1>;
6449 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64,
6450                                  v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6451                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6452 defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32,
6453                                  v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6454                                  XD, EVEX_CD8<32, CD8VT1>;
6455 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64,
6456                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6457                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6458
6459 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6460               (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6461 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6462               (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6463
6464 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6465           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6466 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
6467           (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6468 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6469           (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6470 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
6471           (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6472
6473 def : Pat<(f32 (sint_to_fp GR32:$src)),
6474           (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6475 def : Pat<(f32 (sint_to_fp GR64:$src)),
6476           (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6477 def : Pat<(f64 (sint_to_fp GR32:$src)),
6478           (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6479 def : Pat<(f64 (sint_to_fp GR64:$src)),
6480           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6481
6482 defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32,
6483                                   v4f32x_info, i32mem, loadi32,
6484                                   "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
6485 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64,
6486                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6487                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6488 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info,
6489                                   i32mem, loadi32, "cvtusi2sd{l}">,
6490                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
6491 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64,
6492                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6493                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6494
6495 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6496               (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6497 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6498               (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6499
6500 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6501           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6502 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6503           (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6504 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6505           (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6506 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6507           (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6508
6509 def : Pat<(f32 (uint_to_fp GR32:$src)),
6510           (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6511 def : Pat<(f32 (uint_to_fp GR64:$src)),
6512           (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6513 def : Pat<(f64 (uint_to_fp GR32:$src)),
6514           (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6515 def : Pat<(f64 (uint_to_fp GR64:$src)),
6516           (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6517 }
6518
6519 //===----------------------------------------------------------------------===//
6520 // AVX-512  Scalar convert from float/double to integer
6521 //===----------------------------------------------------------------------===//
6522
6523 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
6524                                   X86VectorVTInfo DstVT, SDNode OpNode,
6525                                   OpndItins itins, string asm> {
6526   let Predicates = [HasAVX512] in {
6527     def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
6528                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6529                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))],
6530                 itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>;
6531     def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
6532                  !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6533                  [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))],
6534                  itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC,
6535                  Sched<[itins.Sched]>;
6536     def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
6537                 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6538                 [(set DstVT.RC:$dst, (OpNode
6539                       (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
6540                       (i32 FROUND_CURRENT)))], itins.rm>,
6541                 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6542   } // Predicates = [HasAVX512]
6543 }
6544
6545 // Convert float/double to signed/unsigned int 32/64
6546 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
6547                                    X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si">,
6548                                    XS, EVEX_CD8<32, CD8VT1>;
6549 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
6550                                    X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si">,
6551                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
6552 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info,
6553                                    X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi">,
6554                                    XS, EVEX_CD8<32, CD8VT1>;
6555 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info,
6556                                    X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi">,
6557                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
6558 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
6559                                    X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">,
6560                                    XD, EVEX_CD8<64, CD8VT1>;
6561 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
6562                                    X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">,
6563                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6564 defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info,
6565                                    X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">,
6566                                    XD, EVEX_CD8<64, CD8VT1>;
6567 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
6568                                    X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">,
6569                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6570
6571 // The SSE version of these instructions are disabled for AVX512.
6572 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6573 let Predicates = [HasAVX512] in {
6574   def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
6575             (VCVTSS2SIZrr_Int VR128X:$src)>;
6576   def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
6577             (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
6578   def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
6579             (VCVTSS2SI64Zrr_Int VR128X:$src)>;
6580   def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
6581             (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
6582   def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
6583             (VCVTSD2SIZrr_Int VR128X:$src)>;
6584   def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
6585             (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
6586   def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
6587             (VCVTSD2SI64Zrr_Int VR128X:$src)>;
6588   def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
6589             (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
6590 } // HasAVX512
6591
6592 let Predicates = [HasAVX512] in {
6593   def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6594             (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6595   def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6596             (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6597   def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6598             (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6599   def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6600             (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6601   def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6602             (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6603   def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6604             (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6605   def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6606             (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6607   def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6608             (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6609   def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6610             (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6611   def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6612             (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6613 } // Predicates = [HasAVX512]
6614
6615 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6616 // which produce unnecessary vmovs{s,d} instructions
6617 let Predicates = [HasAVX512] in {
6618 def : Pat<(v4f32 (X86Movss
6619                    (v4f32 VR128X:$dst),
6620                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6621           (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6622
6623 def : Pat<(v4f32 (X86Movss
6624                    (v4f32 VR128X:$dst),
6625                    (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6626           (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6627
6628 def : Pat<(v2f64 (X86Movsd
6629                    (v2f64 VR128X:$dst),
6630                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6631           (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6632
6633 def : Pat<(v2f64 (X86Movsd
6634                    (v2f64 VR128X:$dst),
6635                    (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6636           (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6637 } // Predicates = [HasAVX512]
6638
6639 // Convert float/double to signed/unsigned int 32/64 with truncation
6640 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6641                             X86VectorVTInfo _DstRC, SDNode OpNode,
6642                             SDNode OpNodeRnd, OpndItins itins, string aliasStr>{
6643 let Predicates = [HasAVX512] in {
6644   def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6645               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6646               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>,
6647               EVEX, Sched<[itins.Sched]>;
6648   let hasSideEffects = 0 in
6649   def rrb : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6650                 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6651                 [], itins.rr>, EVEX, EVEX_B, Sched<[itins.Sched]>;
6652   def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
6653               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6654               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))],
6655               itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6656
6657   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6658           (!cast<Instruction>(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6659   def : InstAlias<asm # aliasStr # "\t\t{{sae}, $src, $dst|$dst, $src, {sae}}",
6660           (!cast<Instruction>(NAME # "rrb") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6661   def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6662           (!cast<Instruction>(NAME # "rm") _DstRC.RC:$dst,
6663                                           _SrcRC.ScalarMemOp:$src), 0>;
6664
6665   let isCodeGenOnly = 1 in {
6666     def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6667               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6668              [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6669                                    (i32 FROUND_CURRENT)))], itins.rr>,
6670              EVEX, VEX_LIG, Sched<[itins.Sched]>;
6671     def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6672               !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6673               [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6674                                     (i32 FROUND_NO_EXC)))], itins.rr>,
6675                                     EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>;
6676     let mayLoad = 1, hasSideEffects = 0 in
6677       def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6678                   (ins _SrcRC.IntScalarMemOp:$src),
6679                   !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6680                   [], itins.rm>, EVEX, VEX_LIG,
6681                   Sched<[itins.Sched.Folded, ReadAfterLd]>;
6682   } // isCodeGenOnly = 1
6683 } //HasAVX512
6684 }
6685
6686
6687 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
6688                         fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">,
6689                         XS, EVEX_CD8<32, CD8VT1>;
6690 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
6691                         fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">,
6692                         VEX_W, XS, EVEX_CD8<32, CD8VT1>;
6693 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
6694                         fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">,
6695                         XD, EVEX_CD8<64, CD8VT1>;
6696 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
6697                         fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">,
6698                         VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6699
6700 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
6701                         fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">,
6702                         XS, EVEX_CD8<32, CD8VT1>;
6703 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
6704                         fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">,
6705                         XS,VEX_W, EVEX_CD8<32, CD8VT1>;
6706 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
6707                         fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">,
6708                         XD, EVEX_CD8<64, CD8VT1>;
6709 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
6710                         fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">,
6711                         XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6712 let Predicates = [HasAVX512] in {
6713   def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
6714             (VCVTTSS2SIZrr_Int VR128X:$src)>;
6715   def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6716             (VCVTTSS2SIZrm_Int ssmem:$src)>;
6717   def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
6718             (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
6719   def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6720             (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
6721   def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
6722             (VCVTTSD2SIZrr_Int VR128X:$src)>;
6723   def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6724             (VCVTTSD2SIZrm_Int sdmem:$src)>;
6725   def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
6726             (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
6727   def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6728             (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
6729 } // HasAVX512
6730
6731 //===----------------------------------------------------------------------===//
6732 // AVX-512  Convert form float to double and back
6733 //===----------------------------------------------------------------------===//
6734
6735 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6736                          X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> {
6737   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6738                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6739                          "$src2, $src1", "$src1, $src2",
6740                          (_.VT (OpNode (_.VT _.RC:$src1),
6741                                        (_Src.VT _Src.RC:$src2),
6742                                        (i32 FROUND_CURRENT))), itins.rr>,
6743                          EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
6744   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6745                          (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
6746                          "$src2, $src1", "$src1, $src2",
6747                          (_.VT (OpNode (_.VT _.RC:$src1),
6748                                   (_Src.VT _Src.ScalarIntMemCPat:$src2),
6749                                   (i32 FROUND_CURRENT))), itins.rm>,
6750                          EVEX_4V, VEX_LIG,
6751                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
6752
6753   let isCodeGenOnly = 1, hasSideEffects = 0 in {
6754     def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6755                (ins _.FRC:$src1, _Src.FRC:$src2),
6756                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6757                itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
6758     let mayLoad = 1 in
6759     def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6760                (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
6761                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6762                itins.rm>, EVEX_4V, VEX_LIG,
6763                Sched<[itins.Sched.Folded, ReadAfterLd]>;
6764   }
6765 }
6766
6767 // Scalar Coversion with SAE - suppress all exceptions
6768 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6769                          X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
6770   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6771                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6772                         "{sae}, $src2, $src1", "$src1, $src2, {sae}",
6773                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6774                                          (_Src.VT _Src.RC:$src2),
6775                                          (i32 FROUND_NO_EXC))), itins.rr>,
6776                         EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>;
6777 }
6778
6779 // Scalar Conversion with rounding control (RC)
6780 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6781                          X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
6782   defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6783                         (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
6784                         "$rc, $src2, $src1", "$src1, $src2, $rc",
6785                         (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6786                                          (_Src.VT _Src.RC:$src2), (i32 imm:$rc))),
6787                                          itins.rr>,
6788                         EVEX_4V, VEX_LIG, Sched<[itins.Sched]>,
6789                         EVEX_B, EVEX_RC;
6790 }
6791 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
6792                                   SDNode OpNodeRnd, OpndItins itins,
6793                                   X86VectorVTInfo _src, X86VectorVTInfo _dst> {
6794   let Predicates = [HasAVX512] in {
6795     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6796              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
6797                                OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
6798   }
6799 }
6800
6801 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
6802                                     SDNode OpNodeRnd, OpndItins itins,
6803                                     X86VectorVTInfo _src, X86VectorVTInfo _dst> {
6804   let Predicates = [HasAVX512] in {
6805     defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6806              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6807              EVEX_CD8<32, CD8VT1>, XS;
6808   }
6809 }
6810 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
6811                                          X86froundRnd, SSE_CVT_SD2SS, f64x_info,
6812                                          f32x_info>, NotMemoryFoldable;
6813 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
6814                                           X86fpextRnd, SSE_CVT_SS2SD, f32x_info,
6815                                           f64x_info>, NotMemoryFoldable;
6816
6817 def : Pat<(f64 (fpextend FR32X:$src)),
6818           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
6819           Requires<[HasAVX512]>;
6820 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
6821           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6822           Requires<[HasAVX512]>;
6823
6824 def : Pat<(f64 (extloadf32 addr:$src)),
6825           (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6826       Requires<[HasAVX512, OptForSize]>;
6827
6828 def : Pat<(f64 (extloadf32 addr:$src)),
6829           (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
6830           Requires<[HasAVX512, OptForSpeed]>;
6831
6832 def : Pat<(f32 (fpround FR64X:$src)),
6833           (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
6834            Requires<[HasAVX512]>;
6835
6836 def : Pat<(v4f32 (X86Movss
6837                    (v4f32 VR128X:$dst),
6838                    (v4f32 (scalar_to_vector
6839                      (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
6840           (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
6841           Requires<[HasAVX512]>;
6842
6843 def : Pat<(v2f64 (X86Movsd
6844                    (v2f64 VR128X:$dst),
6845                    (v2f64 (scalar_to_vector
6846                      (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
6847           (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
6848           Requires<[HasAVX512]>;
6849
6850 //===----------------------------------------------------------------------===//
6851 // AVX-512  Vector convert from signed/unsigned integer to float/double
6852 //          and from float/double to signed/unsigned integer
6853 //===----------------------------------------------------------------------===//
6854
6855 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6856                          X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins,
6857                          string Broadcast = _.BroadcastStr,
6858                          string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
6859
6860   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6861                          (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
6862                          (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>,
6863                          EVEX, Sched<[itins.Sched]>;
6864
6865   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6866                          (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
6867                          (_.VT (OpNode (_Src.VT
6868                              (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>,
6869                          EVEX, Sched<[itins.Sched.Folded]>;
6870
6871   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6872                          (ins _Src.ScalarMemOp:$src), OpcodeStr,
6873                          "${src}"##Broadcast, "${src}"##Broadcast,
6874                          (_.VT (OpNode (_Src.VT
6875                                   (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
6876                             )), itins.rm>, EVEX, EVEX_B,
6877                          Sched<[itins.Sched.Folded]>;
6878 }
6879 // Coversion with SAE - suppress all exceptions
6880 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6881                               X86VectorVTInfo _Src, SDNode OpNodeRnd,
6882                               OpndItins itins> {
6883   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6884                         (ins _Src.RC:$src), OpcodeStr,
6885                         "{sae}, $src", "$src, {sae}",
6886                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
6887                                (i32 FROUND_NO_EXC))), itins.rr>,
6888                         EVEX, EVEX_B, Sched<[itins.Sched]>;
6889 }
6890
6891 // Conversion with rounding control (RC)
6892 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6893                          X86VectorVTInfo _Src, SDNode OpNodeRnd,
6894                          OpndItins itins> {
6895   defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6896                         (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
6897                         "$rc, $src", "$src, $rc",
6898                         (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))),
6899                         itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
6900 }
6901
6902 // Extend Float to Double
6903 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
6904                            OpndItins itins> {
6905   let Predicates = [HasAVX512] in {
6906     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
6907                             fpextend, itins>,
6908              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
6909                                 X86vfpextRnd, itins>, EVEX_V512;
6910   }
6911   let Predicates = [HasVLX] in {
6912     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
6913                                X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128;
6914     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
6915                                itins>, EVEX_V256;
6916   }
6917 }
6918
6919 // Truncate Double to Float
6920 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> {
6921   let Predicates = [HasAVX512] in {
6922     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>,
6923              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
6924                                X86vfproundRnd, itins>, EVEX_V512;
6925   }
6926   let Predicates = [HasVLX] in {
6927     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
6928                                X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128;
6929     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
6930                                itins, "{1to4}", "{y}">, EVEX_V256;
6931
6932     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6933                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
6934     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6935                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
6936     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6937                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
6938     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6939                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
6940   }
6941 }
6942
6943 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>,
6944                                   VEX_W, PD, EVEX_CD8<64, CD8VF>;
6945 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>,
6946                                   PS, EVEX_CD8<32, CD8VH>;
6947
6948 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
6949             (VCVTPS2PDZrm addr:$src)>;
6950
6951 let Predicates = [HasVLX] in {
6952   let AddedComplexity = 15 in {
6953     def : Pat<(X86vzmovl (v2f64 (bitconvert
6954                                  (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
6955               (VCVTPD2PSZ128rr VR128X:$src)>;
6956     def : Pat<(X86vzmovl (v2f64 (bitconvert
6957                                  (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
6958               (VCVTPD2PSZ128rm addr:$src)>;
6959   }
6960   def : Pat<(v2f64 (extloadv2f32 addr:$src)),
6961               (VCVTPS2PDZ128rm addr:$src)>;
6962   def : Pat<(v4f64 (extloadv4f32 addr:$src)),
6963               (VCVTPS2PDZ256rm addr:$src)>;
6964 }
6965
6966 // Convert Signed/Unsigned Doubleword to Double
6967 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
6968                            SDNode OpNode128, OpndItins itins> {
6969   // No rounding in this op
6970   let Predicates = [HasAVX512] in
6971     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
6972                             itins>, EVEX_V512;
6973
6974   let Predicates = [HasVLX] in {
6975     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
6976                                OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128;
6977     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
6978                                itins>, EVEX_V256;
6979   }
6980 }
6981
6982 // Convert Signed/Unsigned Doubleword to Float
6983 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
6984                            SDNode OpNodeRnd, OpndItins itins> {
6985   let Predicates = [HasAVX512] in
6986     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
6987                             itins>,
6988              avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
6989                                OpNodeRnd, itins>, EVEX_V512;
6990
6991   let Predicates = [HasVLX] in {
6992     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
6993                                itins>, EVEX_V128;
6994     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
6995                                itins>, EVEX_V256;
6996   }
6997 }
6998
6999 // Convert Float to Signed/Unsigned Doubleword with truncation
7000 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7001                             SDNode OpNodeRnd, OpndItins itins> {
7002   let Predicates = [HasAVX512] in {
7003     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7004                             itins>,
7005              avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7006                                 OpNodeRnd, itins>, EVEX_V512;
7007   }
7008   let Predicates = [HasVLX] in {
7009     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7010                                itins>, EVEX_V128;
7011     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7012                                itins>, EVEX_V256;
7013   }
7014 }
7015
7016 // Convert Float to Signed/Unsigned Doubleword
7017 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7018                            SDNode OpNodeRnd, OpndItins itins> {
7019   let Predicates = [HasAVX512] in {
7020     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7021                             itins>,
7022              avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7023                                 OpNodeRnd, itins>, EVEX_V512;
7024   }
7025   let Predicates = [HasVLX] in {
7026     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7027                                itins>, EVEX_V128;
7028     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7029                                itins>, EVEX_V256;
7030   }
7031 }
7032
7033 // Convert Double to Signed/Unsigned Doubleword with truncation
7034 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7035                             SDNode OpNode128, SDNode OpNodeRnd,
7036                             OpndItins itins> {
7037   let Predicates = [HasAVX512] in {
7038     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7039                             itins>,
7040              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7041                                 OpNodeRnd, itins>, EVEX_V512;
7042   }
7043   let Predicates = [HasVLX] in {
7044     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7045     // memory forms of these instructions in Asm Parser. They have the same
7046     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7047     // due to the same reason.
7048     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7049                                OpNode128, itins, "{1to2}", "{x}">, EVEX_V128;
7050     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7051                                itins, "{1to4}", "{y}">, EVEX_V256;
7052
7053     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7054                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7055     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7056                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7057     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7058                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7059     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7060                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
7061   }
7062 }
7063
7064 // Convert Double to Signed/Unsigned Doubleword
7065 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7066                            SDNode OpNodeRnd, OpndItins itins> {
7067   let Predicates = [HasAVX512] in {
7068     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7069                             itins>,
7070              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7071                                OpNodeRnd, itins>, EVEX_V512;
7072   }
7073   let Predicates = [HasVLX] in {
7074     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7075     // memory forms of these instructions in Asm Parcer. They have the same
7076     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7077     // due to the same reason.
7078     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7079                                itins, "{1to2}", "{x}">, EVEX_V128;
7080     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7081                                itins, "{1to4}", "{y}">, EVEX_V256;
7082
7083     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7084                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7085     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7086                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7087     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7088                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7089     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7090                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
7091   }
7092 }
7093
7094 // Convert Double to Signed/Unsigned Quardword
7095 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7096                            SDNode OpNodeRnd, OpndItins itins> {
7097   let Predicates = [HasDQI] in {
7098     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7099                             itins>,
7100              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7101                                OpNodeRnd,itins>, EVEX_V512;
7102   }
7103   let Predicates = [HasDQI, HasVLX] in {
7104     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7105                                itins>, EVEX_V128;
7106     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7107                                itins>, EVEX_V256;
7108   }
7109 }
7110
7111 // Convert Double to Signed/Unsigned Quardword with truncation
7112 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7113                             SDNode OpNodeRnd, OpndItins itins> {
7114   let Predicates = [HasDQI] in {
7115     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7116                             itins>,
7117              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7118                                 OpNodeRnd, itins>, EVEX_V512;
7119   }
7120   let Predicates = [HasDQI, HasVLX] in {
7121     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7122                                itins>, EVEX_V128;
7123     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7124                                itins>, EVEX_V256;
7125   }
7126 }
7127
7128 // Convert Signed/Unsigned Quardword to Double
7129 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7130                            SDNode OpNodeRnd, OpndItins itins> {
7131   let Predicates = [HasDQI] in {
7132     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7133                             itins>,
7134              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7135                                OpNodeRnd, itins>, EVEX_V512;
7136   }
7137   let Predicates = [HasDQI, HasVLX] in {
7138     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7139                                itins>, EVEX_V128;
7140     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7141                                itins>, EVEX_V256;
7142   }
7143 }
7144
7145 // Convert Float to Signed/Unsigned Quardword
7146 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7147                            SDNode OpNodeRnd, OpndItins itins> {
7148   let Predicates = [HasDQI] in {
7149     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7150                             itins>,
7151              avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7152                                OpNodeRnd, itins>, EVEX_V512;
7153   }
7154   let Predicates = [HasDQI, HasVLX] in {
7155     // Explicitly specified broadcast string, since we take only 2 elements
7156     // from v4f32x_info source
7157     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7158                                itins, "{1to2}", "", f64mem>, EVEX_V128;
7159     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7160                                itins>, EVEX_V256;
7161   }
7162 }
7163
7164 // Convert Float to Signed/Unsigned Quardword with truncation
7165 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7166                             SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
7167   let Predicates = [HasDQI] in {
7168     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7169                             itins>,
7170              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7171                                 OpNodeRnd, itins>, EVEX_V512;
7172   }
7173   let Predicates = [HasDQI, HasVLX] in {
7174     // Explicitly specified broadcast string, since we take only 2 elements
7175     // from v4f32x_info source
7176     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
7177                                itins, "{1to2}", "", f64mem>, EVEX_V128;
7178     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7179                                itins>, EVEX_V256;
7180   }
7181 }
7182
7183 // Convert Signed/Unsigned Quardword to Float
7184 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7185                            SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
7186   let Predicates = [HasDQI] in {
7187     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7188                             itins>,
7189              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7190                                OpNodeRnd, itins>, EVEX_V512;
7191   }
7192   let Predicates = [HasDQI, HasVLX] in {
7193     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7194     // memory forms of these instructions in Asm Parcer. They have the same
7195     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7196     // due to the same reason.
7197     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7198                                itins, "{1to2}", "{x}">, EVEX_V128;
7199     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7200                                itins, "{1to4}", "{y}">, EVEX_V256;
7201
7202     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7203                     (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7204     def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7205                     (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7206     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7207                     (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7208     def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7209                     (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
7210   }
7211 }
7212
7213 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7214                                  SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>;
7215
7216 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
7217                                 X86VSintToFpRnd, SSE_CVT_I2PS>,
7218                                 PS, EVEX_CD8<32, CD8VF>;
7219
7220 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
7221                                 X86cvttp2siRnd, SSE_CVT_PS2I>,
7222                                 XS, EVEX_CD8<32, CD8VF>;
7223
7224 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
7225                                  X86cvttp2siRnd, SSE_CVT_PD2I>,
7226                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
7227
7228 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
7229                                  X86cvttp2uiRnd, SSE_CVT_PS2I>, PS,
7230                                  EVEX_CD8<32, CD8VF>;
7231
7232 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
7233                                  X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>,
7234                                  PS, VEX_W, EVEX_CD8<64, CD8VF>;
7235
7236 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7237                                   X86VUintToFP, SSE_CVT_I2PD>, XS,
7238                                   EVEX_CD8<32, CD8VH>;
7239
7240 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
7241                                  X86VUintToFpRnd, SSE_CVT_I2PS>, XD,
7242                                  EVEX_CD8<32, CD8VF>;
7243
7244 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
7245                                  X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7246                                  EVEX_CD8<32, CD8VF>;
7247
7248 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
7249                                  X86cvtp2IntRnd, SSE_CVT_PD2I>, XD,
7250                                  VEX_W, EVEX_CD8<64, CD8VF>;
7251
7252 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
7253                                  X86cvtp2UIntRnd, SSE_CVT_PS2I>,
7254                                  PS, EVEX_CD8<32, CD8VF>;
7255
7256 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
7257                                  X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
7258                                  PS, EVEX_CD8<64, CD8VF>;
7259
7260 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
7261                                  X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W,
7262                                  PD, EVEX_CD8<64, CD8VF>;
7263
7264 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
7265                                  X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7266                                  EVEX_CD8<32, CD8VH>;
7267
7268 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
7269                                  X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
7270                                  PD, EVEX_CD8<64, CD8VF>;
7271
7272 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
7273                                  X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD,
7274                                  EVEX_CD8<32, CD8VH>;
7275
7276 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
7277                                  X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W,
7278                                  PD, EVEX_CD8<64, CD8VF>;
7279
7280 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
7281                                  X86cvttp2siRnd, SSE_CVT_PS2I>, PD,
7282                                  EVEX_CD8<32, CD8VH>;
7283
7284 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
7285                                  X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W,
7286                                  PD, EVEX_CD8<64, CD8VF>;
7287
7288 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
7289                                  X86cvttp2uiRnd, SSE_CVT_PS2I>, PD,
7290                                  EVEX_CD8<32, CD8VH>;
7291
7292 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
7293                             X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7294                             EVEX_CD8<64, CD8VF>;
7295
7296 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
7297                             X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7298                             EVEX_CD8<64, CD8VF>;
7299
7300 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
7301                             X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS,
7302                             EVEX_CD8<64, CD8VF>;
7303
7304 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
7305                             X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD,
7306                             EVEX_CD8<64, CD8VF>;
7307
7308 let Predicates = [HasAVX512, NoVLX] in {
7309 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
7310           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7311            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7312                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7313
7314 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7315           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7316            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7317                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7318
7319 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7320           (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
7321            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7322                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
7323
7324 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7325           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7326            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7327                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7328
7329 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7330           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7331            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7332                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7333
7334 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7335           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7336            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7337                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
7338
7339 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
7340           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7341            (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7342                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
7343 }
7344
7345 let Predicates = [HasAVX512, HasVLX] in {
7346   let AddedComplexity = 15 in {
7347     def : Pat<(X86vzmovl (v2i64 (bitconvert
7348                                 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
7349               (VCVTPD2DQZ128rr VR128X:$src)>;
7350     def : Pat<(X86vzmovl (v2i64 (bitconvert
7351                                 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
7352               (VCVTPD2DQZ128rm addr:$src)>;
7353     def : Pat<(X86vzmovl (v2i64 (bitconvert
7354                                  (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
7355               (VCVTPD2UDQZ128rr VR128X:$src)>;
7356     def : Pat<(X86vzmovl (v2i64 (bitconvert
7357                                 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
7358               (VCVTTPD2DQZ128rr VR128X:$src)>;
7359     def : Pat<(X86vzmovl (v2i64 (bitconvert
7360                                 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
7361               (VCVTTPD2DQZ128rm addr:$src)>;
7362     def : Pat<(X86vzmovl (v2i64 (bitconvert
7363                                  (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
7364               (VCVTTPD2UDQZ128rr VR128X:$src)>;
7365   }
7366
7367   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7368             (VCVTDQ2PDZ128rm addr:$src)>;
7369   def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7370             (VCVTDQ2PDZ128rm addr:$src)>;
7371
7372   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7373             (VCVTUDQ2PDZ128rm addr:$src)>;
7374   def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7375             (VCVTUDQ2PDZ128rm addr:$src)>;
7376 }
7377
7378 let Predicates = [HasAVX512] in {
7379   def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7380             (VCVTPD2PSZrm addr:$src)>;
7381   def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7382             (VCVTPS2PDZrm addr:$src)>;
7383 }
7384
7385 let Predicates = [HasDQI, HasVLX] in {
7386   let AddedComplexity = 15 in {
7387     def : Pat<(X86vzmovl (v2f64 (bitconvert
7388                                 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
7389               (VCVTQQ2PSZ128rr VR128X:$src)>;
7390     def : Pat<(X86vzmovl (v2f64 (bitconvert
7391                                 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
7392               (VCVTUQQ2PSZ128rr VR128X:$src)>;
7393   }
7394 }
7395
7396 let Predicates = [HasDQI, NoVLX] in {
7397 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7398           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7399            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7400                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7401
7402 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7403           (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7404            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7405                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
7406
7407 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7408           (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7409            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7410                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7411
7412 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7413           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7414            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7415                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7416
7417 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7418           (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7419            (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7420                                   VR128X:$src1, sub_xmm)))), sub_ymm)>;
7421
7422 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7423           (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7424            (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7425                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7426
7427 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7428           (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7429            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7430                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
7431
7432 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7433           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7434            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7435                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7436
7437 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7438           (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7439            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7440                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7441
7442 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7443           (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7444            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7445                                   VR256X:$src1, sub_ymm)))), sub_xmm)>;
7446
7447 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7448           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7449            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7450                                   VR128X:$src1, sub_xmm)))), sub_xmm)>;
7451
7452 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7453           (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7454            (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7455                                   VR256X:$src1, sub_ymm)))), sub_ymm)>;
7456 }
7457
7458 //===----------------------------------------------------------------------===//
7459 // Half precision conversion instructions
7460 //===----------------------------------------------------------------------===//
7461
7462 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7463                            X86MemOperand x86memop, PatFrag ld_frag,
7464                            OpndItins itins> {
7465   defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7466                             (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
7467                             (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>,
7468                             T8PD, Sched<[itins.Sched]>;
7469   defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7470                             (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7471                             (X86cvtph2ps (_src.VT
7472                                           (bitconvert
7473                                            (ld_frag addr:$src)))), itins.rm>,
7474                             T8PD, Sched<[itins.Sched.Folded]>;
7475 }
7476
7477 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7478                                OpndItins itins> {
7479   defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7480                              (ins _src.RC:$src), "vcvtph2ps",
7481                              "{sae}, $src", "$src, {sae}",
7482                              (X86cvtph2psRnd (_src.VT _src.RC:$src),
7483                                              (i32 FROUND_NO_EXC)), itins.rr>,
7484                              T8PD, EVEX_B, Sched<[itins.Sched]>;
7485 }
7486
7487 let Predicates = [HasAVX512] in
7488   defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
7489                                     SSE_CVT_PH2PS>,
7490                     avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>,
7491                     EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
7492
7493 let Predicates = [HasVLX] in {
7494   defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
7495                        loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256,
7496                        EVEX_CD8<32, CD8VH>;
7497   defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
7498                        loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128,
7499                        EVEX_CD8<32, CD8VH>;
7500
7501   // Pattern match vcvtph2ps of a scalar i64 load.
7502   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7503             (VCVTPH2PSZ128rm addr:$src)>;
7504   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7505             (VCVTPH2PSZ128rm addr:$src)>;
7506   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7507               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7508             (VCVTPH2PSZ128rm addr:$src)>;
7509 }
7510
7511 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7512                            X86MemOperand x86memop, OpndItins itins> {
7513   defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
7514                    (ins _src.RC:$src1, i32u8imm:$src2),
7515                    "vcvtps2ph", "$src2, $src1", "$src1, $src2",
7516                    (X86cvtps2ph (_src.VT _src.RC:$src1),
7517                                 (i32 imm:$src2)),
7518                    itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>;
7519   let hasSideEffects = 0, mayStore = 1 in {
7520     def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7521                (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7522                "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7523                [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7524     def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7525                (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7526                "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
7527                 [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7528   }
7529 }
7530
7531 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7532                                OpndItins itins> {
7533   let hasSideEffects = 0 in
7534   defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
7535                    (outs _dest.RC:$dst),
7536                    (ins _src.RC:$src1, i32u8imm:$src2),
7537                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
7538                    [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>;
7539 }
7540
7541 let Predicates = [HasAVX512] in {
7542   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
7543                                     SSE_CVT_PS2PH>,
7544                     avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
7545                                         SSE_CVT_PS2PH>, EVEX, EVEX_V512,
7546                                         EVEX_CD8<32, CD8VH>;
7547   let Predicates = [HasVLX] in {
7548     defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
7549                                          SSE_CVT_PS2PH>, EVEX, EVEX_V256,
7550                                          EVEX_CD8<32, CD8VH>;
7551     defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
7552                                          SSE_CVT_PS2PH>, EVEX, EVEX_V128,
7553                                          EVEX_CD8<32, CD8VH>;
7554   }
7555
7556   def : Pat<(store (f64 (extractelt
7557                          (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7558                          (iPTR 0))), addr:$dst),
7559             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7560   def : Pat<(store (i64 (extractelt
7561                          (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7562                          (iPTR 0))), addr:$dst),
7563             (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7564   def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7565             (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7566   def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7567             (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
7568 }
7569
7570 // Patterns for matching conversions from float to half-float and vice versa.
7571 let Predicates = [HasVLX] in {
7572   // Use MXCSR.RC for rounding instead of explicitly specifying the default
7573   // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7574   // configurations we support (the default). However, falling back to MXCSR is
7575   // more consistent with other instructions, which are always controlled by it.
7576   // It's encoded as 0b100.
7577   def : Pat<(fp_to_f16 FR32X:$src),
7578             (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7579               (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7580
7581   def : Pat<(f16_to_fp GR16:$src),
7582             (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7583               (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7584
7585   def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7586             (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7587               (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7588 }
7589
7590 //  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
7591 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
7592                             string OpcodeStr, OpndItins itins> {
7593   let hasSideEffects = 0 in
7594   def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7595                   !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7596                   [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7597                   Sched<[itins.Sched]>;
7598 }
7599
7600 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7601   defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>,
7602                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7603   defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>,
7604                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7605   defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>,
7606                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7607   defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>,
7608                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7609 }
7610
7611 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7612   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
7613                                  "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7614                                  EVEX_CD8<32, CD8VT1>;
7615   defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
7616                                   "ucomisd", SSE_COMIS>, PD, EVEX,
7617                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7618   let Pattern = []<dag> in {
7619     defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
7620                                    "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7621                                    EVEX_CD8<32, CD8VT1>;
7622     defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
7623                                    "comisd", SSE_COMIS>, PD, EVEX,
7624                                     VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7625   }
7626   let isCodeGenOnly = 1 in {
7627     defm Int_VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
7628                               sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7629                               EVEX_CD8<32, CD8VT1>;
7630     defm Int_VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
7631                               sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX,
7632                               VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7633
7634     defm Int_VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
7635                               sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7636                               EVEX_CD8<32, CD8VT1>;
7637     defm Int_VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
7638                               sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX,
7639                               VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7640   }
7641 }
7642
7643 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
7644 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7645                          OpndItins itins, X86VectorVTInfo _> {
7646   let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
7647   defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7648                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7649                            "$src2, $src1", "$src1, $src2",
7650                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
7651                            EVEX_4V, Sched<[itins.Sched]>;
7652   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7653                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7654                          "$src2, $src1", "$src1, $src2",
7655                          (OpNode (_.VT _.RC:$src1),
7656                           _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
7657                           Sched<[itins.Sched.Folded, ReadAfterLd]>;
7658 }
7659 }
7660
7661 defm VRCP14SS   : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
7662                   EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
7663 defm VRCP14SD   : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
7664                   VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
7665 defm VRSQRT14SS   : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
7666                   EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
7667 defm VRSQRT14SD   : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
7668                   VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
7669
7670 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7671 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
7672                          OpndItins itins, X86VectorVTInfo _> {
7673   let ExeDomain = _.ExeDomain in {
7674   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7675                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
7676                          (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD,
7677                          Sched<[itins.Sched]>;
7678   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7679                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7680                          (OpNode (_.FloatVT
7681                            (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD,
7682                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
7683   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7684                           (ins _.ScalarMemOp:$src), OpcodeStr,
7685                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7686                           (OpNode (_.FloatVT
7687                             (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7688                           EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7689   }
7690 }
7691
7692 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
7693                                 SizeItins itins> {
7694   defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
7695                            v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
7696   defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
7697                            v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7698
7699   // Define only if AVX512VL feature is present.
7700   let Predicates = [HasVLX] in {
7701     defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7702                                 OpNode, itins.s, v4f32x_info>,
7703                                EVEX_V128, EVEX_CD8<32, CD8VF>;
7704     defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7705                                 OpNode, itins.s, v8f32x_info>,
7706                                EVEX_V256, EVEX_CD8<32, CD8VF>;
7707     defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7708                                 OpNode, itins.d, v2f64x_info>,
7709                                EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7710     defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7711                                 OpNode, itins.d, v4f64x_info>,
7712                                EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7713   }
7714 }
7715
7716 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
7717 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
7718
7719 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
7720 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
7721                          SDNode OpNode, OpndItins itins> {
7722   let ExeDomain = _.ExeDomain in {
7723   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7724                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7725                            "$src2, $src1", "$src1, $src2",
7726                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7727                            (i32 FROUND_CURRENT)), itins.rr>,
7728                            Sched<[itins.Sched]>;
7729
7730   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7731                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7732                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7733                             (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7734                             (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B,
7735                             Sched<[itins.Sched]>;
7736
7737   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7738                          (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7739                          "$src2, $src1", "$src1, $src2",
7740                          (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
7741                          (i32 FROUND_CURRENT)), itins.rm>,
7742                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
7743   }
7744 }
7745
7746 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7747                         SizeItins itins> {
7748   defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
7749               EVEX_CD8<32, CD8VT1>;
7750   defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
7751               EVEX_CD8<64, CD8VT1>, VEX_W;
7752 }
7753
7754 let Predicates = [HasERI] in {
7755   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28",   X86rcp28s, SSE_RCP_S>,
7756                               T8PD, EVEX_4V;
7757   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
7758                               T8PD, EVEX_4V;
7759 }
7760
7761 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
7762                              T8PD, EVEX_4V;
7763 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
7764
7765 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7766                          SDNode OpNode, OpndItins itins> {
7767   let ExeDomain = _.ExeDomain in {
7768   defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7769                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
7770                          (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)),
7771                          itins.rr>, Sched<[itins.Sched]>;
7772
7773   defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7774                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7775                          (OpNode (_.FloatVT
7776                              (bitconvert (_.LdFrag addr:$src))),
7777                           (i32 FROUND_CURRENT)), itins.rm>,
7778                           Sched<[itins.Sched.Folded, ReadAfterLd]>;
7779
7780   defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7781                          (ins _.ScalarMemOp:$src), OpcodeStr,
7782                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7783                          (OpNode (_.FloatVT
7784                                   (X86VBroadcast (_.ScalarLdFrag addr:$src))),
7785                                  (i32 FROUND_CURRENT)), itins.rm>, EVEX_B,
7786                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
7787   }
7788 }
7789 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7790                          SDNode OpNode, OpndItins itins> {
7791   let ExeDomain = _.ExeDomain in
7792   defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7793                         (ins _.RC:$src), OpcodeStr,
7794                         "{sae}, $src", "$src, {sae}",
7795                         (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
7796                         itins.rr>, EVEX_B, Sched<[itins.Sched]>;
7797 }
7798
7799 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
7800                        SizeItins itins> {
7801    defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7802              avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7803              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
7804    defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7805              avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7806              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7807 }
7808
7809 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
7810                                   SDNode OpNode, SizeItins itins> {
7811   // Define only if AVX512VL feature is present.
7812   let Predicates = [HasVLX] in {
7813     defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
7814                                      EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
7815     defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
7816                                      EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
7817     defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
7818                                      EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7819     defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
7820                                      EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7821   }
7822 }
7823 let Predicates = [HasERI] in {
7824
7825  defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
7826  defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
7827  defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
7828 }
7829 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
7830                  avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
7831                                           SSE_ALU_ITINS_P>, EVEX;
7832
7833 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
7834                                     X86VectorVTInfo _>{
7835   let ExeDomain = _.ExeDomain in
7836   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7837                          (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
7838                          (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc))), itins.rr>,
7839                          EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
7840 }
7841
7842 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins,
7843                               X86VectorVTInfo _>{
7844   let ExeDomain = _.ExeDomain in {
7845   defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7846                          (ins _.RC:$src), OpcodeStr, "$src", "$src",
7847                          (_.FloatVT (fsqrt _.RC:$src)), itins.rr>, EVEX,
7848                          Sched<[itins.Sched]>;
7849   defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7850                          (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7851                          (fsqrt (_.FloatVT
7852                            (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX,
7853                            Sched<[itins.Sched.Folded, ReadAfterLd]>;
7854   defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7855                           (ins _.ScalarMemOp:$src), OpcodeStr,
7856                           "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7857                           (fsqrt (_.FloatVT
7858                             (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7859                           EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7860   }
7861 }
7862
7863 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
7864   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>,
7865                                 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7866   defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>,
7867                                 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7868   // Define only if AVX512VL feature is present.
7869   let Predicates = [HasVLX] in {
7870     defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7871                                      SSE_SQRTPS, v4f32x_info>,
7872                                      EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
7873     defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7874                                      SSE_SQRTPS, v8f32x_info>,
7875                                      EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
7876     defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7877                                      SSE_SQRTPD, v2f64x_info>,
7878                                      EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7879     defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7880                                      SSE_SQRTPD, v4f64x_info>,
7881                                      EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7882   }
7883 }
7884
7885 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
7886   defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS,
7887                                 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7888   defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD,
7889                                 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7890 }
7891
7892 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins,
7893                               X86VectorVTInfo _, string SUFF, Intrinsic Intr> {
7894   let ExeDomain = _.ExeDomain in {
7895   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7896                          (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7897                          "$src2, $src1", "$src1, $src2",
7898                          (X86fsqrtRnds (_.VT _.RC:$src1),
7899                                     (_.VT _.RC:$src2),
7900                                     (i32 FROUND_CURRENT)), itins.rr>,
7901                          Sched<[itins.Sched]>;
7902   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7903                        (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7904                        "$src2, $src1", "$src1, $src2",
7905                        (X86fsqrtRnds (_.VT _.RC:$src1),
7906                                   _.ScalarIntMemCPat:$src2,
7907                                   (i32 FROUND_CURRENT)), itins.rm>,
7908                        Sched<[itins.Sched.Folded, ReadAfterLd]>;
7909   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7910                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
7911                          "$rc, $src2, $src1", "$src1, $src2, $rc",
7912                          (X86fsqrtRnds (_.VT _.RC:$src1),
7913                                      (_.VT _.RC:$src2),
7914                                      (i32 imm:$rc)), itins.rr>,
7915                          EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
7916
7917   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7918     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7919                (ins _.FRC:$src1, _.FRC:$src2),
7920                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], itins.rr>,
7921                Sched<[itins.Sched]>;
7922     let mayLoad = 1 in
7923       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7924                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
7925                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], itins.rm>,
7926                  Sched<[itins.Sched.Folded, ReadAfterLd]>;
7927   }
7928   }
7929
7930 let Predicates = [HasAVX512] in {
7931   def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
7932             (!cast<Instruction>(NAME#SUFF#Zr)
7933                 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
7934
7935    def : Pat<(Intr VR128X:$src),
7936              (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
7937                                  VR128X:$src)>;
7938 }
7939
7940 let Predicates = [HasAVX512, OptForSize] in {
7941   def : Pat<(_.EltVT (fsqrt (load addr:$src))),
7942             (!cast<Instruction>(NAME#SUFF#Zm)
7943                 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
7944
7945   def : Pat<(Intr _.ScalarIntMemCPat:$src2),
7946             (!cast<Instruction>(NAME#SUFF#Zm_Int)
7947                   (_.VT (IMPLICIT_DEF)), addr:$src2)>;
7948 }
7949
7950 }
7951
7952 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
7953   defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS",
7954                         int_x86_sse_sqrt_ss>,
7955                         EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
7956   defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD",
7957                         int_x86_sse2_sqrt_sd>,
7958                         EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
7959                         NotMemoryFoldable;
7960 }
7961
7962 defm VSQRT   : avx512_sqrt_packed_all<0x51, "vsqrt">,
7963                avx512_sqrt_packed_all_round<0x51, "vsqrt">;
7964
7965 defm VSQRT   : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
7966
7967 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
7968                                   OpndItins itins, X86VectorVTInfo _> {
7969   let ExeDomain = _.ExeDomain in {
7970   defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7971                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7972                            "$src3, $src2, $src1", "$src1, $src2, $src3",
7973                            (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7974                            (i32 imm:$src3))), itins.rr>,
7975                            Sched<[itins.Sched]>;
7976
7977   defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7978                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7979                          "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
7980                          (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7981                          (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B,
7982                          Sched<[itins.Sched]>;
7983
7984   defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7985                          (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
7986                          OpcodeStr,
7987                          "$src3, $src2, $src1", "$src1, $src2, $src3",
7988                          (_.VT (X86RndScales _.RC:$src1,
7989                                 _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>,
7990                          Sched<[itins.Sched.Folded, ReadAfterLd]>;
7991
7992   let isCodeGenOnly = 1, hasSideEffects = 0 in {
7993     def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7994                (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
7995                OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7996                [], itins.rr>, Sched<[itins.Sched]>;
7997
7998     let mayLoad = 1 in
7999       def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8000                  (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8001                  OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8002                  [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
8003   }
8004   }
8005
8006   let Predicates = [HasAVX512] in {
8007     def : Pat<(ffloor _.FRC:$src),
8008               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8009                _.FRC:$src, (i32 0x9)))>;
8010     def : Pat<(fceil _.FRC:$src),
8011               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8012                _.FRC:$src, (i32 0xa)))>;
8013     def : Pat<(ftrunc _.FRC:$src),
8014               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8015                _.FRC:$src, (i32 0xb)))>;
8016     def : Pat<(frint _.FRC:$src),
8017               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8018                _.FRC:$src, (i32 0x4)))>;
8019     def : Pat<(fnearbyint _.FRC:$src),
8020               (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8021                _.FRC:$src, (i32 0xc)))>;
8022   }
8023
8024   let Predicates = [HasAVX512, OptForSize] in {
8025     def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8026               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8027                addr:$src, (i32 0x9)))>;
8028     def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8029               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8030                addr:$src, (i32 0xa)))>;
8031     def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8032               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8033                addr:$src, (i32 0xb)))>;
8034     def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8035               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8036                addr:$src, (i32 0x4)))>;
8037     def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8038               (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8039                addr:$src, (i32 0xc)))>;
8040   }
8041 }
8042
8043 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S,
8044                       f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
8045
8046 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
8047                       f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
8048                       EVEX_CD8<64, CD8VT1>;
8049
8050 //-------------------------------------------------
8051 // Integer truncate and extend operations
8052 //-------------------------------------------------
8053
8054 let Sched = WriteShuffle256 in
8055 def AVX512_EXTEND : OpndItins<
8056   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8057 >;
8058
8059 let Sched = WriteShuffle256 in
8060 def AVX512_TRUNCATE : OpndItins<
8061   IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8062 >;
8063
8064 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
8065                               OpndItins itins, X86VectorVTInfo SrcInfo,
8066                               X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
8067   let ExeDomain = DestInfo.ExeDomain in
8068   defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8069                       (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
8070                       (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8071                       itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>;
8072
8073   let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8074       ExeDomain = DestInfo.ExeDomain in {
8075     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8076                (ins x86memop:$dst, SrcInfo.RC:$src),
8077                OpcodeStr # "\t{$src, $dst|$dst, $src}",
8078                [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>;
8079
8080     def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8081                (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
8082                OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8083                [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>;
8084   }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
8085 }
8086
8087 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8088                                     X86VectorVTInfo DestInfo,
8089                                     PatFrag truncFrag, PatFrag mtruncFrag > {
8090
8091   def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8092             (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8093                                     addr:$dst, SrcInfo.RC:$src)>;
8094
8095   def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8096                                                (SrcInfo.VT SrcInfo.RC:$src)),
8097             (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8098                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8099 }
8100
8101 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
8102          OpndItins itins, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
8103          X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8104          X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8105          X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
8106                                                      Predicate prd = HasAVX512>{
8107
8108   let Predicates = [HasVLX, prd] in {
8109     defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8110                              VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
8111                 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8112                              truncFrag, mtruncFrag>, EVEX_V128;
8113
8114     defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8115                              VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
8116                 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8117                              truncFrag, mtruncFrag>, EVEX_V256;
8118   }
8119   let Predicates = [prd] in
8120     defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8121                              VTSrcInfo.info512, DestInfoZ, x86memopZ>,
8122                 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8123                              truncFrag, mtruncFrag>, EVEX_V512;
8124 }
8125
8126 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8127                            OpndItins itins, PatFrag StoreNode,
8128                            PatFrag MaskedStoreNode> {
8129   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8130                v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
8131                StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
8132 }
8133
8134 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8135                            OpndItins itins, PatFrag StoreNode,
8136                            PatFrag MaskedStoreNode> {
8137   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8138                v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
8139                StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
8140 }
8141
8142 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8143                            OpndItins itins, PatFrag StoreNode,
8144                            PatFrag MaskedStoreNode> {
8145   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8146                v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
8147                StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
8148 }
8149
8150 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
8151                            OpndItins itins, PatFrag StoreNode,
8152                            PatFrag MaskedStoreNode> {
8153   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i32_info,
8154                v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
8155                StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
8156 }
8157
8158 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8159                            OpndItins itins, PatFrag StoreNode,
8160                            PatFrag MaskedStoreNode> {
8161   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i32_info,
8162               v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
8163               StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
8164 }
8165
8166 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8167                            OpndItins itins, PatFrag StoreNode,
8168                            PatFrag MaskedStoreNode> {
8169   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i16_info,
8170               v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
8171               StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
8172 }
8173
8174 defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   X86vtrunc, AVX512_TRUNCATE,
8175                                   truncstorevi8, masked_truncstorevi8>;
8176 defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, AVX512_TRUNCATE,
8177                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
8178 defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE,
8179                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
8180
8181 defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw",   X86vtrunc, AVX512_TRUNCATE,
8182                                   truncstorevi16, masked_truncstorevi16>;
8183 defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, AVX512_TRUNCATE,
8184                                   truncstore_s_vi16, masked_truncstore_s_vi16>;
8185 defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE,
8186                                   truncstore_us_vi16, masked_truncstore_us_vi16>;
8187
8188 defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd",   X86vtrunc, AVX512_TRUNCATE,
8189                                   truncstorevi32, masked_truncstorevi32>;
8190 defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, AVX512_TRUNCATE,
8191                                   truncstore_s_vi32, masked_truncstore_s_vi32>;
8192 defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE,
8193                                   truncstore_us_vi32, masked_truncstore_us_vi32>;
8194
8195 defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc, AVX512_TRUNCATE,
8196                                   truncstorevi8, masked_truncstorevi8>;
8197 defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb",   X86vtruncs, AVX512_TRUNCATE,
8198                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
8199 defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus, AVX512_TRUNCATE,
8200                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
8201
8202 defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc, AVX512_TRUNCATE,
8203                                   truncstorevi16, masked_truncstorevi16>;
8204 defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw",   X86vtruncs, AVX512_TRUNCATE,
8205                                   truncstore_s_vi16, masked_truncstore_s_vi16>;
8206 defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw",  X86vtruncus, AVX512_TRUNCATE,
8207                                   truncstore_us_vi16, masked_truncstore_us_vi16>;
8208
8209 defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc, AVX512_TRUNCATE,
8210                                   truncstorevi8, masked_truncstorevi8>;
8211 defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb",   X86vtruncs, AVX512_TRUNCATE,
8212                                   truncstore_s_vi8, masked_truncstore_s_vi8>;
8213 defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus, AVX512_TRUNCATE,
8214                                   truncstore_us_vi8, masked_truncstore_us_vi8>;
8215
8216 let Predicates = [HasAVX512, NoVLX] in {
8217 def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
8218          (v8i16 (EXTRACT_SUBREG
8219                  (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8220                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
8221 def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
8222          (v4i32 (EXTRACT_SUBREG
8223                  (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8224                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
8225 }
8226
8227 let Predicates = [HasBWI, NoVLX] in {
8228 def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
8229          (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
8230                                             VR256X:$src, sub_ymm))), sub_xmm))>;
8231 }
8232
8233 multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins,
8234               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
8235               X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
8236   let ExeDomain = DestInfo.ExeDomain in {
8237   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8238                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
8239                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>,
8240                   EVEX, Sched<[itins.Sched]>;
8241
8242   defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8243                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
8244                   (DestInfo.VT (LdFrag addr:$src)), itins.rm>,
8245                 EVEX, Sched<[itins.Sched.Folded]>;
8246   }
8247 }
8248
8249 multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
8250           SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8251           OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8252   let Predicates = [HasVLX, HasBWI] in {
8253     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info,
8254                     v16i8x_info, i64mem, LdFrag, InVecNode>,
8255                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
8256
8257     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info,
8258                     v16i8x_info, i128mem, LdFrag, OpNode>,
8259                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
8260   }
8261   let Predicates = [HasBWI] in {
8262     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v32i16_info,
8263                     v32i8x_info, i256mem, LdFrag, OpNode>,
8264                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
8265   }
8266 }
8267
8268 multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
8269           SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8270           OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8271   let Predicates = [HasVLX, HasAVX512] in {
8272     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
8273                    v16i8x_info, i32mem, LdFrag, InVecNode>,
8274                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
8275
8276     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
8277                    v16i8x_info, i64mem, LdFrag, OpNode>,
8278                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
8279   }
8280   let Predicates = [HasAVX512] in {
8281     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
8282                    v16i8x_info, i128mem, LdFrag, OpNode>,
8283                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
8284   }
8285 }
8286
8287 multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
8288           SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8289           OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8290   let Predicates = [HasVLX, HasAVX512] in {
8291     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8292                    v16i8x_info, i16mem, LdFrag, InVecNode>,
8293                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
8294
8295     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8296                    v16i8x_info, i32mem, LdFrag, OpNode>,
8297                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
8298   }
8299   let Predicates = [HasAVX512] in {
8300     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8301                    v16i8x_info, i64mem, LdFrag, OpNode>,
8302                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
8303   }
8304 }
8305
8306 multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
8307          SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8308          OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8309   let Predicates = [HasVLX, HasAVX512] in {
8310     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
8311                    v8i16x_info, i64mem, LdFrag, InVecNode>,
8312                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
8313
8314     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
8315                    v8i16x_info, i128mem, LdFrag, OpNode>,
8316                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
8317   }
8318   let Predicates = [HasAVX512] in {
8319     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
8320                    v16i16x_info, i256mem, LdFrag, OpNode>,
8321                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
8322   }
8323 }
8324
8325 multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
8326          SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8327          OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8328   let Predicates = [HasVLX, HasAVX512] in {
8329     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8330                    v8i16x_info, i32mem, LdFrag, InVecNode>,
8331                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
8332
8333     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8334                    v8i16x_info, i64mem, LdFrag, OpNode>,
8335                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
8336   }
8337   let Predicates = [HasAVX512] in {
8338     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8339                    v8i16x_info, i128mem, LdFrag, OpNode>,
8340                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
8341   }
8342 }
8343
8344 multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
8345          SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8346          OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
8347
8348   let Predicates = [HasVLX, HasAVX512] in {
8349     defm Z128:  avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8350                    v4i32x_info, i64mem, LdFrag, InVecNode>,
8351                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8352
8353     defm Z256:  avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8354                    v4i32x_info, i128mem, LdFrag, OpNode>,
8355                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8356   }
8357   let Predicates = [HasAVX512] in {
8358     defm Z   :  avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8359                    v8i32x_info, i256mem, LdFrag, OpNode>,
8360                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8361   }
8362 }
8363
8364 defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8365 defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8366 defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8367 defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8368 defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8369 defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8370
8371 defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8372 defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8373 defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8374 defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8375 defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8376 defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8377
8378
8379 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8380                                  SDNode InVecOp, PatFrag ExtLoad16> {
8381   // 128-bit patterns
8382   let Predicates = [HasVLX, HasBWI] in {
8383   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8384             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8385   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8386             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8387   def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8388             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8389   def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8390             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8391   def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8392             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8393   }
8394   let Predicates = [HasVLX] in {
8395   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8396             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8397   def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8398             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8399   def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8400             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8401   def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8402             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8403
8404   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
8405             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8406   def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8407             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8408   def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8409             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8410   def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8411             (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8412
8413   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8414             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8415   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8416             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8417   def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8418             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8419   def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8420             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8421   def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8422             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8423
8424   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8425             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8426   def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
8427             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8428   def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8429             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8430   def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8431             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8432
8433   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8434             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8435   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8436             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8437   def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8438             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8439   def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
8440             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8441   def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
8442             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8443   }
8444   // 256-bit patterns
8445   let Predicates = [HasVLX, HasBWI] in {
8446   def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8447             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8448   def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8449             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8450   def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8451             (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8452   }
8453   let Predicates = [HasVLX] in {
8454   def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8455             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8456   def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8457             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8458   def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8459             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8460   def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8461             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8462
8463   def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8464             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8465   def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8466             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8467   def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8468             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8469   def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8470             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8471
8472   def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8473             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8474   def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8475             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8476   def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8477             (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8478
8479   def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8480             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8481   def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8482             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8483   def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8484             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8485   def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8486             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8487
8488   def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8489             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8490   def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8491             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8492   def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8493             (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8494   }
8495   // 512-bit patterns
8496   let Predicates = [HasBWI] in {
8497   def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8498             (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8499   }
8500   let Predicates = [HasAVX512] in {
8501   def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8502             (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8503
8504   def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8505             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8506   def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8507             (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8508
8509   def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8510             (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8511
8512   def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8513             (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8514
8515   def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8516             (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8517   }
8518 }
8519
8520 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8521 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
8522
8523 //===----------------------------------------------------------------------===//
8524 // GATHER - SCATTER Operations
8525
8526 // FIXME: Improve scheduling of gather/scatter instructions.
8527 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8528                          X86MemOperand memop, PatFrag GatherNode,
8529                          RegisterClass MaskRC = _.KRCWM> {
8530   let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8531       ExeDomain = _.ExeDomain in
8532   def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
8533             (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
8534             !strconcat(OpcodeStr#_.Suffix,
8535             "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
8536             [(set _.RC:$dst, MaskRC:$mask_wb,
8537               (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
8538                      vectoraddr:$src2))]>, EVEX, EVEX_K,
8539              EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
8540 }
8541
8542 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8543                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8544   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
8545                                       vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
8546   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
8547                                       vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
8548 let Predicates = [HasVLX] in {
8549   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8550                               vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
8551   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
8552                               vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
8553   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8554                               vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
8555   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8556                               vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
8557 }
8558 }
8559
8560 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8561                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8562   defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
8563                                        mgatherv16i32>, EVEX_V512;
8564   defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8565                                        mgatherv8i64>, EVEX_V512;
8566 let Predicates = [HasVLX] in {
8567   defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8568                                           vy256xmem, mgatherv8i32>, EVEX_V256;
8569   defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8570                                           vy128xmem, mgatherv4i64>, EVEX_V256;
8571   defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8572                                           vx128xmem, mgatherv4i32>, EVEX_V128;
8573   defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8574                                           vx64xmem, mgatherv2i64, VK2WM>,
8575                                           EVEX_V128;
8576 }
8577 }
8578
8579
8580 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8581                avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8582
8583 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8584                 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
8585
8586 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8587                           X86MemOperand memop, PatFrag ScatterNode> {
8588
8589 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
8590
8591   def mr  : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
8592             (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
8593             !strconcat(OpcodeStr#_.Suffix,
8594             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
8595             [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8596                                      _.KRCWM:$mask,  vectoraddr:$dst))]>,
8597             EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8598             Sched<[WriteStore]>;
8599 }
8600
8601 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8602                         AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8603   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
8604                                       vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
8605   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
8606                                       vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
8607 let Predicates = [HasVLX] in {
8608   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8609                               vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
8610   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
8611                               vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
8612   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8613                               vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
8614   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8615                               vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
8616 }
8617 }
8618
8619 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8620                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8621   defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
8622                                        mscatterv16i32>, EVEX_V512;
8623   defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8624                                        mscatterv8i64>, EVEX_V512;
8625 let Predicates = [HasVLX] in {
8626   defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8627                                           vy256xmem, mscatterv8i32>, EVEX_V256;
8628   defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8629                                           vy128xmem, mscatterv4i64>, EVEX_V256;
8630   defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8631                                           vx128xmem, mscatterv4i32>, EVEX_V128;
8632   defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8633                                           vx64xmem, mscatterv2i64>, EVEX_V128;
8634 }
8635 }
8636
8637 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8638                avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
8639
8640 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8641                 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
8642
8643 // prefetch
8644 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8645                        RegisterClass KRC, X86MemOperand memop> {
8646   let Predicates = [HasPFI], hasSideEffects = 1 in
8647   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
8648             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
8649             [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
8650 }
8651
8652 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
8653                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8654
8655 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
8656                      VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8657
8658 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
8659                      VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8660
8661 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
8662                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8663
8664 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
8665                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8666
8667 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
8668                      VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8669
8670 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
8671                      VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8672
8673 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
8674                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8675
8676 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
8677                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8678
8679 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
8680                      VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8681
8682 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
8683                      VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8684
8685 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
8686                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8687
8688 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
8689                      VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8690
8691 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
8692                      VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8693
8694 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
8695                      VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8696
8697 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
8698                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8699
8700 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
8701 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
8702                   !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
8703                   [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))],
8704                   IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
8705 }
8706
8707 // Use 512bit version to implement 128/256 bit in case NoVLX.
8708 multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
8709                                                             X86VectorVTInfo _> {
8710
8711   def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
8712             (X86Info.VT (EXTRACT_SUBREG
8713                            (_.VT (!cast<Instruction>(NAME#"Zrr")
8714                              (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
8715                            X86Info.SubRegIdx))>;
8716 }
8717
8718 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8719                                  string OpcodeStr, Predicate prd> {
8720 let Predicates = [prd] in
8721   defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8722
8723   let Predicates = [prd, HasVLX] in {
8724     defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8725     defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8726   }
8727 let Predicates = [prd, NoVLX] in {
8728    defm Z256_Alt :   avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
8729    defm Z128_Alt :   avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
8730   }
8731
8732 }
8733
8734 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8735 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8736 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8737 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
8738
8739 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
8740     def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8741                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8742                         [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))],
8743                         IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
8744 }
8745
8746 // Use 512bit version to implement 128/256 bit in case NoVLX.
8747 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
8748                                                             X86VectorVTInfo _> {
8749
8750   def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))),
8751             (_.KVT (COPY_TO_REGCLASS
8752                      (!cast<Instruction>(NAME#"Zrr")
8753                        (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
8754                                       _.RC:$src, _.SubRegIdx)),
8755                    _.KRC))>;
8756 }
8757
8758 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
8759                                    AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8760   let Predicates = [prd] in
8761     defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8762                                             EVEX_V512;
8763
8764   let Predicates = [prd, HasVLX] in {
8765     defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
8766                                               EVEX_V256;
8767     defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
8768                                                EVEX_V128;
8769   }
8770   let Predicates = [prd, NoVLX] in {
8771     defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8772     defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
8773   }
8774 }
8775
8776 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8777                                               avx512vl_i8_info, HasBWI>;
8778 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8779                                               avx512vl_i16_info, HasBWI>, VEX_W;
8780 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8781                                               avx512vl_i32_info, HasDQI>;
8782 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8783                                               avx512vl_i64_info, HasDQI>, VEX_W;
8784
8785 //===----------------------------------------------------------------------===//
8786 // AVX-512 - COMPRESS and EXPAND
8787 //
8788
8789 // FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
8790 let Sched = WriteShuffle256 in {
8791 def AVX512_COMPRESS : OpndItins<
8792   IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8793 >;
8794 def AVX512_EXPAND : OpndItins<
8795   IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8796 >;
8797 }
8798
8799 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
8800                                  string OpcodeStr, OpndItins itins> {
8801   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
8802               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8803               (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase,
8804               Sched<[itins.Sched]>;
8805
8806   let mayStore = 1, hasSideEffects = 0 in
8807   def mr : AVX5128I<opc, MRMDestMem, (outs),
8808               (ins _.MemOp:$dst, _.RC:$src),
8809               OpcodeStr # "\t{$src, $dst|$dst, $src}",
8810               []>, EVEX_CD8<_.EltSize, CD8VT1>,
8811               Sched<[itins.Sched.Folded]>;
8812
8813   def mrk : AVX5128I<opc, MRMDestMem, (outs),
8814               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
8815               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8816               []>,
8817               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8818               Sched<[itins.Sched.Folded]>;
8819 }
8820
8821 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
8822   def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
8823                                                (_.VT _.RC:$src)),
8824             (!cast<Instruction>(NAME#_.ZSuffix##mrk)
8825                             addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
8826 }
8827
8828 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
8829                                  OpndItins itins,
8830                                  AVX512VLVectorVTInfo VTInfo,
8831                                  Predicate Pred = HasAVX512> {
8832   let Predicates = [Pred] in
8833   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>,
8834            compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8835
8836   let Predicates = [Pred, HasVLX] in {
8837     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>,
8838                 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8839     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>,
8840                 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8841   }
8842 }
8843
8844 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS,
8845                                           avx512vl_i32_info>, EVEX;
8846 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS,
8847                                           avx512vl_i64_info>, EVEX, VEX_W;
8848 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS,
8849                                           avx512vl_f32_info>, EVEX;
8850 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS,
8851                                           avx512vl_f64_info>, EVEX, VEX_W;
8852
8853 // expand
8854 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
8855                                  string OpcodeStr, OpndItins itins> {
8856   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8857               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8858               (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase,
8859               Sched<[itins.Sched]>;
8860
8861   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8862               (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
8863               (_.VT (X86expand (_.VT (bitconvert
8864                                       (_.LdFrag addr:$src1))))), itins.rm>,
8865             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
8866             Sched<[itins.Sched.Folded, ReadAfterLd]>;
8867 }
8868
8869 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
8870
8871   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
8872             (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
8873                                         _.KRCWM:$mask, addr:$src)>;
8874
8875   def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
8876                                                (_.VT _.RC:$src0))),
8877             (!cast<Instruction>(NAME#_.ZSuffix##rmk)
8878                             _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
8879 }
8880
8881 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
8882                                OpndItins itins,
8883                                AVX512VLVectorVTInfo VTInfo,
8884                                Predicate Pred = HasAVX512> {
8885   let Predicates = [Pred] in
8886   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>,
8887            expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8888
8889   let Predicates = [Pred, HasVLX] in {
8890     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>,
8891                 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8892     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>,
8893                 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8894   }
8895 }
8896
8897 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND,
8898                                       avx512vl_i32_info>, EVEX;
8899 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND,
8900                                       avx512vl_i64_info>, EVEX, VEX_W;
8901 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND,
8902                                       avx512vl_f32_info>, EVEX;
8903 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND,
8904                                       avx512vl_f64_info>, EVEX, VEX_W;
8905
8906 //handle instruction  reg_vec1 = op(reg_vec,imm)
8907 //                               op(mem_vec,imm)
8908 //                               op(broadcast(eltVt),imm)
8909 //all instruction created with FROUND_CURRENT
8910 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8911                                       OpndItins itins, X86VectorVTInfo _> {
8912   let ExeDomain = _.ExeDomain in {
8913   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8914                       (ins _.RC:$src1, i32u8imm:$src2),
8915                       OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8916                       (OpNode (_.VT _.RC:$src1),
8917                               (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>;
8918   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8919                     (ins _.MemOp:$src1, i32u8imm:$src2),
8920                     OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8921                     (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
8922                             (i32 imm:$src2)), itins.rm>,
8923                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8924   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8925                     (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
8926                     OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
8927                     "${src1}"##_.BroadcastStr##", $src2",
8928                     (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
8929                             (i32 imm:$src2)), itins.rm>, EVEX_B,
8930                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8931   }
8932 }
8933
8934 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8935 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
8936                                           SDNode OpNode, OpndItins itins,
8937                                           X86VectorVTInfo _> {
8938   let ExeDomain = _.ExeDomain in
8939   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8940                       (ins _.RC:$src1, i32u8imm:$src2),
8941                       OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
8942                       "$src1, {sae}, $src2",
8943                       (OpNode (_.VT _.RC:$src1),
8944                               (i32 imm:$src2),
8945                               (i32 FROUND_NO_EXC)), itins.rr>,
8946                       EVEX_B, Sched<[itins.Sched]>;
8947 }
8948
8949 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
8950             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
8951             SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
8952   let Predicates = [prd] in {
8953     defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8954                                            _.info512>,
8955                 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
8956                                                itins, _.info512>, EVEX_V512;
8957   }
8958   let Predicates = [prd, HasVLX] in {
8959     defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8960                                            _.info128>, EVEX_V128;
8961     defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8962                                            _.info256>, EVEX_V256;
8963   }
8964 }
8965
8966 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
8967 //                               op(reg_vec2,mem_vec,imm)
8968 //                               op(reg_vec2,broadcast(eltVt),imm)
8969 //all instruction created with FROUND_CURRENT
8970 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8971                                 OpndItins itins, X86VectorVTInfo _>{
8972   let ExeDomain = _.ExeDomain in {
8973   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8974                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8975                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8976                       (OpNode (_.VT _.RC:$src1),
8977                               (_.VT _.RC:$src2),
8978                               (i32 imm:$src3)), itins.rr>,
8979                       Sched<[itins.Sched]>;
8980   defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8981                     (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
8982                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8983                     (OpNode (_.VT _.RC:$src1),
8984                             (_.VT (bitconvert (_.LdFrag addr:$src2))),
8985                             (i32 imm:$src3)), itins.rm>,
8986                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8987   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8988                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8989                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
8990                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
8991                     (OpNode (_.VT _.RC:$src1),
8992                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
8993                             (i32 imm:$src3)), itins.rm>, EVEX_B,
8994                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
8995   }
8996 }
8997
8998 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
8999 //                               op(reg_vec2,mem_vec,imm)
9000 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9001                               OpndItins itins, X86VectorVTInfo DestInfo,
9002                               X86VectorVTInfo SrcInfo>{
9003   let ExeDomain = DestInfo.ExeDomain in {
9004   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9005                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9006                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9007                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9008                                (SrcInfo.VT SrcInfo.RC:$src2),
9009                                (i8 imm:$src3))), itins.rr>,
9010                   Sched<[itins.Sched]>;
9011   defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9012                 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9013                 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9014                 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9015                              (SrcInfo.VT (bitconvert
9016                                                 (SrcInfo.LdFrag addr:$src2))),
9017                              (i8 imm:$src3))), itins.rm>,
9018                 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9019   }
9020 }
9021
9022 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9023 //                               op(reg_vec2,mem_vec,imm)
9024 //                               op(reg_vec2,broadcast(eltVt),imm)
9025 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9026                            OpndItins itins, X86VectorVTInfo _>:
9027   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
9028
9029   let ExeDomain = _.ExeDomain in
9030   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9031                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9032                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9033                     "$src1, ${src2}"##_.BroadcastStr##", $src3",
9034                     (OpNode (_.VT _.RC:$src1),
9035                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9036                             (i8 imm:$src3)), itins.rm>, EVEX_B,
9037                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
9038 }
9039
9040 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9041 //                                      op(reg_vec2,mem_scalar,imm)
9042 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9043                                 OpndItins itins, X86VectorVTInfo _> {
9044   let ExeDomain = _.ExeDomain in {
9045   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9046                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9047                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9048                       (OpNode (_.VT _.RC:$src1),
9049                               (_.VT _.RC:$src2),
9050                               (i32 imm:$src3)), itins.rr>,
9051                       Sched<[itins.Sched]>;
9052   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9053                     (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9054                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9055                     (OpNode (_.VT _.RC:$src1),
9056                             (_.VT (scalar_to_vector
9057                                       (_.ScalarLdFrag addr:$src2))),
9058                             (i32 imm:$src3)), itins.rm>,
9059                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
9060   }
9061 }
9062
9063 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9064 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9065                                     SDNode OpNode, OpndItins itins,
9066                                     X86VectorVTInfo _> {
9067   let ExeDomain = _.ExeDomain in
9068   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9069                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9070                       OpcodeStr, "$src3, {sae}, $src2, $src1",
9071                       "$src1, $src2, {sae}, $src3",
9072                       (OpNode (_.VT _.RC:$src1),
9073                               (_.VT _.RC:$src2),
9074                               (i32 imm:$src3),
9075                               (i32 FROUND_NO_EXC)), itins.rr>,
9076                       EVEX_B, Sched<[itins.Sched]>;
9077 }
9078
9079 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9080 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9081                                     OpndItins itins, X86VectorVTInfo _> {
9082   let ExeDomain = _.ExeDomain in
9083   defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9084                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9085                       OpcodeStr, "$src3, {sae}, $src2, $src1",
9086                       "$src1, $src2, {sae}, $src3",
9087                       (OpNode (_.VT _.RC:$src1),
9088                               (_.VT _.RC:$src2),
9089                               (i32 imm:$src3),
9090                               (i32 FROUND_NO_EXC)), itins.rr>,
9091                       EVEX_B, Sched<[itins.Sched]>;
9092 }
9093
9094 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
9095             AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9096             SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
9097   let Predicates = [prd] in {
9098     defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>,
9099                 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>,
9100                                   EVEX_V512;
9101
9102   }
9103   let Predicates = [prd, HasVLX] in {
9104     defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>,
9105                                   EVEX_V128;
9106     defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>,
9107                                   EVEX_V256;
9108   }
9109 }
9110
9111 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
9112                    OpndItins itins, AVX512VLVectorVTInfo DestInfo,
9113                    AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
9114   let Predicates = [Pred] in {
9115     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
9116                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9117   }
9118   let Predicates = [Pred, HasVLX] in {
9119     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
9120                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
9121     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins,  DestInfo.info256,
9122                            SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9123   }
9124 }
9125
9126 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
9127                                   bits<8> opc, SDNode OpNode, OpndItins itins,
9128                                   Predicate Pred = HasAVX512> {
9129   let Predicates = [Pred] in {
9130     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
9131   }
9132   let Predicates = [Pred, HasVLX] in {
9133     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
9134     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
9135   }
9136 }
9137
9138 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
9139                   X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
9140                   SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
9141   let Predicates = [prd] in {
9142      defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>,
9143                  avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>;
9144   }
9145 }
9146
9147 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
9148                     bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
9149                     SDNode OpNodeRnd, SizeItins itins, Predicate prd>{
9150   defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
9151                             opcPs, OpNode, OpNodeRnd, itins.s, prd>,
9152                             EVEX_CD8<32, CD8VF>;
9153   defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
9154                             opcPd, OpNode, OpNodeRnd, itins.d, prd>,
9155                             EVEX_CD8<64, CD8VF>, VEX_W;
9156 }
9157
9158 defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
9159                               X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>,
9160                               AVX512AIi8Base, EVEX;
9161 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
9162                               X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>,
9163                               AVX512AIi8Base, EVEX;
9164 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
9165                               X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>,
9166                               AVX512AIi8Base, EVEX;
9167
9168 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
9169                                                 0x50, X86VRange, X86VRangeRnd,
9170                                                 SSE_ALU_F64P, HasDQI>,
9171       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9172 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
9173                                                 0x50, X86VRange, X86VRangeRnd,
9174                                                 SSE_ALU_F32P, HasDQI>,
9175       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9176
9177 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
9178       f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>,
9179       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9180 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
9181       0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>,
9182       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9183
9184 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
9185       0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>,
9186       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9187 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
9188       0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>,
9189       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9190
9191 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
9192       0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>,
9193       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9194 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
9195       0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
9196       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9197
9198 let Predicates = [HasAVX512] in {
9199 def : Pat<(v16f32 (ffloor VR512:$src)),
9200           (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
9201 def : Pat<(v16f32 (fnearbyint VR512:$src)),
9202           (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9203 def : Pat<(v16f32 (fceil VR512:$src)),
9204           (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
9205 def : Pat<(v16f32 (frint VR512:$src)),
9206           (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9207 def : Pat<(v16f32 (ftrunc VR512:$src)),
9208           (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
9209
9210 def : Pat<(v8f64 (ffloor VR512:$src)),
9211           (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
9212 def : Pat<(v8f64 (fnearbyint VR512:$src)),
9213           (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9214 def : Pat<(v8f64 (fceil VR512:$src)),
9215           (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
9216 def : Pat<(v8f64 (frint VR512:$src)),
9217           (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9218 def : Pat<(v8f64 (ftrunc VR512:$src)),
9219           (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
9220 }
9221
9222 let Predicates = [HasVLX] in {
9223 def : Pat<(v4f32 (ffloor VR128X:$src)),
9224           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
9225 def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9226           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9227 def : Pat<(v4f32 (fceil VR128X:$src)),
9228           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
9229 def : Pat<(v4f32 (frint VR128X:$src)),
9230           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9231 def : Pat<(v4f32 (ftrunc VR128X:$src)),
9232           (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
9233
9234 def : Pat<(v2f64 (ffloor VR128X:$src)),
9235           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
9236 def : Pat<(v2f64 (fnearbyint VR128X:$src)),
9237           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
9238 def : Pat<(v2f64 (fceil VR128X:$src)),
9239           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
9240 def : Pat<(v2f64 (frint VR128X:$src)),
9241           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
9242 def : Pat<(v2f64 (ftrunc VR128X:$src)),
9243           (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
9244
9245 def : Pat<(v8f32 (ffloor VR256X:$src)),
9246           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
9247 def : Pat<(v8f32 (fnearbyint VR256X:$src)),
9248           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
9249 def : Pat<(v8f32 (fceil VR256X:$src)),
9250           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
9251 def : Pat<(v8f32 (frint VR256X:$src)),
9252           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
9253 def : Pat<(v8f32 (ftrunc VR256X:$src)),
9254           (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
9255
9256 def : Pat<(v4f64 (ffloor VR256X:$src)),
9257           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
9258 def : Pat<(v4f64 (fnearbyint VR256X:$src)),
9259           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
9260 def : Pat<(v4f64 (fceil VR256X:$src)),
9261           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
9262 def : Pat<(v4f64 (frint VR256X:$src)),
9263           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
9264 def : Pat<(v4f64 (ftrunc VR256X:$src)),
9265           (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
9266 }
9267
9268 multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
9269                                    AVX512VLVectorVTInfo _, bits<8> opc>{
9270   let Predicates = [HasAVX512] in {
9271     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info512>, EVEX_V512;
9272
9273   }
9274   let Predicates = [HasAVX512, HasVLX] in {
9275      defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info256>, EVEX_V256;
9276   }
9277 }
9278
9279 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
9280       avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9281 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
9282       avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9283 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
9284       avx512vl_i32_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9285 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
9286       avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9287
9288 let Predicates = [HasAVX512] in {
9289 // Provide fallback in case the load node that is used in the broadcast
9290 // patterns above is used by additional users, which prevents the pattern
9291 // selection.
9292 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9293           (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9294                           (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9295                           0)>;
9296 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9297           (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9298                           (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9299                           0)>;
9300
9301 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9302           (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9303                           (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9304                           0)>;
9305 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9306           (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9307                           (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9308                           0)>;
9309
9310 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9311           (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9312                           (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9313                           0)>;
9314
9315 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9316           (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9317                           (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9318                           0)>;
9319 }
9320
9321 multiclass avx512_valign<string OpcodeStr, OpndItins itins,
9322                          AVX512VLVectorVTInfo VTInfo_I> {
9323   defm NAME:       avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
9324                            AVX512AIi8Base, EVEX_4V;
9325 }
9326
9327 defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
9328                                                   EVEX_CD8<32, CD8VF>;
9329 defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
9330                                                   EVEX_CD8<64, CD8VF>, VEX_W;
9331
9332 defm VPALIGNR:   avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
9333                                           avx512vl_i8_info, avx512vl_i8_info>,
9334                 EVEX_CD8<8, CD8VF>;
9335
9336 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
9337 // into vpalignr.
9338 def ValignqImm32XForm : SDNodeXForm<imm, [{
9339   return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
9340 }]>;
9341 def ValignqImm8XForm : SDNodeXForm<imm, [{
9342   return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
9343 }]>;
9344 def ValigndImm8XForm : SDNodeXForm<imm, [{
9345   return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
9346 }]>;
9347
9348 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
9349                                         X86VectorVTInfo From, X86VectorVTInfo To,
9350                                         SDNodeXForm ImmXForm> {
9351   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9352                             (bitconvert
9353                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9354                                               imm:$src3))),
9355                             To.RC:$src0)),
9356             (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
9357                                                   To.RC:$src1, To.RC:$src2,
9358                                                   (ImmXForm imm:$src3))>;
9359
9360   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9361                             (bitconvert
9362                              (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9363                                               imm:$src3))),
9364                             To.ImmAllZerosV)),
9365             (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
9366                                                    To.RC:$src1, To.RC:$src2,
9367                                                    (ImmXForm imm:$src3))>;
9368
9369   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9370                             (bitconvert
9371                              (From.VT (OpNode From.RC:$src1,
9372                                       (bitconvert (To.LdFrag addr:$src2)),
9373                                       imm:$src3))),
9374                             To.RC:$src0)),
9375             (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
9376                                                   To.RC:$src1, addr:$src2,
9377                                                   (ImmXForm imm:$src3))>;
9378
9379   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9380                             (bitconvert
9381                              (From.VT (OpNode From.RC:$src1,
9382                                       (bitconvert (To.LdFrag addr:$src2)),
9383                                       imm:$src3))),
9384                             To.ImmAllZerosV)),
9385             (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
9386                                                    To.RC:$src1, addr:$src2,
9387                                                    (ImmXForm imm:$src3))>;
9388 }
9389
9390 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
9391                                            X86VectorVTInfo From,
9392                                            X86VectorVTInfo To,
9393                                            SDNodeXForm ImmXForm> :
9394       avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
9395   def : Pat<(From.VT (OpNode From.RC:$src1,
9396                              (bitconvert (To.VT (X86VBroadcast
9397                                                 (To.ScalarLdFrag addr:$src2)))),
9398                              imm:$src3)),
9399             (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
9400                                                   (ImmXForm imm:$src3))>;
9401
9402   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9403                             (bitconvert
9404                              (From.VT (OpNode From.RC:$src1,
9405                                       (bitconvert
9406                                        (To.VT (X86VBroadcast
9407                                                (To.ScalarLdFrag addr:$src2)))),
9408                                       imm:$src3))),
9409                             To.RC:$src0)),
9410             (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
9411                                                    To.RC:$src1, addr:$src2,
9412                                                    (ImmXForm imm:$src3))>;
9413
9414   def : Pat<(To.VT (vselect To.KRCWM:$mask,
9415                             (bitconvert
9416                              (From.VT (OpNode From.RC:$src1,
9417                                       (bitconvert
9418                                        (To.VT (X86VBroadcast
9419                                                (To.ScalarLdFrag addr:$src2)))),
9420                                       imm:$src3))),
9421                             To.ImmAllZerosV)),
9422             (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
9423                                                     To.RC:$src1, addr:$src2,
9424                                                     (ImmXForm imm:$src3))>;
9425 }
9426
9427 let Predicates = [HasAVX512] in {
9428   // For 512-bit we lower to the widest element type we can. So we only need
9429   // to handle converting valignq to valignd.
9430   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
9431                                          v16i32_info, ValignqImm32XForm>;
9432 }
9433
9434 let Predicates = [HasVLX] in {
9435   // For 128-bit we lower to the widest element type we can. So we only need
9436   // to handle converting valignq to valignd.
9437   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
9438                                          v4i32x_info, ValignqImm32XForm>;
9439   // For 256-bit we lower to the widest element type we can. So we only need
9440   // to handle converting valignq to valignd.
9441   defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
9442                                          v8i32x_info, ValignqImm32XForm>;
9443 }
9444
9445 let Predicates = [HasVLX, HasBWI] in {
9446   // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
9447   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
9448                                       v16i8x_info, ValignqImm8XForm>;
9449   defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
9450                                       v16i8x_info, ValigndImm8XForm>;
9451 }
9452
9453 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
9454                 SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
9455                 EVEX_CD8<8, CD8VF>;
9456
9457 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9458                            OpndItins itins, X86VectorVTInfo _> {
9459   let ExeDomain = _.ExeDomain in {
9460   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9461                     (ins _.RC:$src1), OpcodeStr,
9462                     "$src1", "$src1",
9463                     (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
9464                     Sched<[itins.Sched]>;
9465
9466   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9467                   (ins _.MemOp:$src1), OpcodeStr,
9468                   "$src1", "$src1",
9469                   (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
9470             EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
9471             Sched<[itins.Sched.Folded]>;
9472   }
9473 }
9474
9475 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9476                             OpndItins itins, X86VectorVTInfo _> :
9477            avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
9478   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9479                   (ins _.ScalarMemOp:$src1), OpcodeStr,
9480                   "${src1}"##_.BroadcastStr,
9481                   "${src1}"##_.BroadcastStr,
9482                   (_.VT (OpNode (X86VBroadcast
9483                                     (_.ScalarLdFrag addr:$src1)))), itins.rm>,
9484              EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
9485              Sched<[itins.Sched.Folded]>;
9486 }
9487
9488 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9489                               OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9490                               Predicate prd> {
9491   let Predicates = [prd] in
9492     defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9493                              EVEX_V512;
9494
9495   let Predicates = [prd, HasVLX] in {
9496     defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
9497                               EVEX_V256;
9498     defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
9499                               EVEX_V128;
9500   }
9501 }
9502
9503 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9504                                OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9505                                Predicate prd> {
9506   let Predicates = [prd] in
9507     defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9508                               EVEX_V512;
9509
9510   let Predicates = [prd, HasVLX] in {
9511     defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
9512                                  EVEX_V256;
9513     defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
9514                                  EVEX_V128;
9515   }
9516 }
9517
9518 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
9519                                  SDNode OpNode, OpndItins itins, Predicate prd> {
9520   defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
9521                                avx512vl_i64_info, prd>, VEX_W;
9522   defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
9523                                avx512vl_i32_info, prd>;
9524 }
9525
9526 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
9527                                  SDNode OpNode, OpndItins itins, Predicate prd> {
9528   defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
9529                               avx512vl_i16_info, prd>, VEX_WIG;
9530   defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
9531                               avx512vl_i8_info, prd>, VEX_WIG;
9532 }
9533
9534 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9535                                   bits<8> opc_d, bits<8> opc_q,
9536                                   string OpcodeStr, SDNode OpNode,
9537                                   OpndItins itins> {
9538   defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
9539                                     HasAVX512>,
9540               avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
9541                                     HasBWI>;
9542 }
9543
9544 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
9545
9546 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9547 let Predicates = [HasAVX512, NoVLX] in {
9548   def : Pat<(v4i64 (abs VR256X:$src)),
9549             (EXTRACT_SUBREG
9550                 (VPABSQZrr
9551                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9552              sub_ymm)>;
9553   def : Pat<(v2i64 (abs VR128X:$src)),
9554             (EXTRACT_SUBREG
9555                 (VPABSQZrr
9556                     (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9557              sub_xmm)>;
9558 }
9559
9560 // Use 512bit version to implement 128/256 bit.
9561 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
9562                                  AVX512VLVectorVTInfo _, Predicate prd> {
9563   let Predicates = [prd, NoVLX] in {
9564     def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9565               (EXTRACT_SUBREG
9566                 (!cast<Instruction>(InstrStr # "Zrr")
9567                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9568                                  _.info256.RC:$src1,
9569                                  _.info256.SubRegIdx)),
9570               _.info256.SubRegIdx)>;
9571
9572     def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9573               (EXTRACT_SUBREG
9574                 (!cast<Instruction>(InstrStr # "Zrr")
9575                   (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9576                                  _.info128.RC:$src1,
9577                                  _.info128.SubRegIdx)),
9578               _.info128.SubRegIdx)>;
9579   }
9580 }
9581
9582 // FIXME: Is there a better scheduler itinerary for VPLZCNT?
9583 defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
9584                                         SSE_INTALU_ITINS_P, HasCDI>;
9585
9586 // FIXME: Is there a better scheduler itinerary for VPCONFLICT?
9587 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
9588                                         SSE_INTALU_ITINS_P, HasCDI>;
9589
9590 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
9591 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
9592 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
9593
9594 //===---------------------------------------------------------------------===//
9595 // Counts number of ones - VPOPCNTD and VPOPCNTQ
9596 //===---------------------------------------------------------------------===//
9597
9598 // FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
9599 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
9600                                      SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
9601
9602 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
9603 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
9604
9605 //===---------------------------------------------------------------------===//
9606 // Replicate Single FP - MOVSHDUP and MOVSLDUP
9607 //===---------------------------------------------------------------------===//
9608 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
9609                             OpndItins itins> {
9610   defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
9611                                       avx512vl_f32_info, HasAVX512>, XS;
9612 }
9613
9614 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
9615 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
9616
9617 //===----------------------------------------------------------------------===//
9618 // AVX-512 - MOVDDUP
9619 //===----------------------------------------------------------------------===//
9620
9621 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
9622                               OpndItins itins, X86VectorVTInfo _> {
9623   let ExeDomain = _.ExeDomain in {
9624   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9625                    (ins _.RC:$src), OpcodeStr, "$src", "$src",
9626                    (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
9627                    Sched<[itins.Sched]>;
9628   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9629                  (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9630                  (_.VT (OpNode (_.VT (scalar_to_vector
9631                                        (_.ScalarLdFrag addr:$src))))),
9632                  itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
9633                  Sched<[itins.Sched.Folded]>;
9634   }
9635 }
9636
9637 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9638                                  OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
9639
9640   defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
9641
9642   let Predicates = [HasAVX512, HasVLX] in {
9643     defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
9644                                EVEX_V256;
9645     defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
9646                                   EVEX_V128;
9647   }
9648 }
9649
9650 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
9651                           OpndItins itins> {
9652   defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
9653                                         avx512vl_f64_info>, XD, VEX_W;
9654 }
9655
9656 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
9657
9658 let Predicates = [HasVLX] in {
9659 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
9660           (VMOVDDUPZ128rm addr:$src)>;
9661 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9662           (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9663 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9664           (VMOVDDUPZ128rm addr:$src)>;
9665
9666 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9667                    (v2f64 VR128X:$src0)),
9668           (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9669                            (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9670 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9671                    (bitconvert (v4i32 immAllZerosV))),
9672           (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9673
9674 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9675                    (v2f64 VR128X:$src0)),
9676           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9677 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9678                    (bitconvert (v4i32 immAllZerosV))),
9679           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9680
9681 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9682                    (v2f64 VR128X:$src0)),
9683           (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9684 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9685                    (bitconvert (v4i32 immAllZerosV))),
9686           (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9687 }
9688
9689 //===----------------------------------------------------------------------===//
9690 // AVX-512 - Unpack Instructions
9691 //===----------------------------------------------------------------------===//
9692 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9693                                  SSE_ALU_ITINS_S>;
9694 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9695                                  SSE_ALU_ITINS_S>;
9696
9697 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9698                                        SSE_INTALU_ITINS_P, HasBWI>;
9699 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9700                                        SSE_INTALU_ITINS_P, HasBWI>;
9701 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9702                                        SSE_INTALU_ITINS_P, HasBWI>;
9703 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9704                                        SSE_INTALU_ITINS_P, HasBWI>;
9705
9706 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9707                                        SSE_INTALU_ITINS_P, HasAVX512>;
9708 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9709                                        SSE_INTALU_ITINS_P, HasAVX512>;
9710 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9711                                        SSE_INTALU_ITINS_P, HasAVX512>;
9712 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9713                                        SSE_INTALU_ITINS_P, HasAVX512>;
9714
9715 //===----------------------------------------------------------------------===//
9716 // AVX-512 - Extract & Insert Integer Instructions
9717 //===----------------------------------------------------------------------===//
9718
9719 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9720                                                             X86VectorVTInfo _> {
9721   def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9722               (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9723               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9724               [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
9725                        addr:$dst)]>,
9726               EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd]>;
9727 }
9728
9729 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9730   let Predicates = [HasBWI] in {
9731     def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9732                   (ins _.RC:$src1, u8imm:$src2),
9733                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9734                   [(set GR32orGR64:$dst,
9735                         (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
9736                   EVEX, TAPD, Sched<[WriteShuffle]>;
9737
9738     defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9739   }
9740 }
9741
9742 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9743   let Predicates = [HasBWI] in {
9744     def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9745                   (ins _.RC:$src1, u8imm:$src2),
9746                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9747                   [(set GR32orGR64:$dst,
9748                         (X86pextrw (_.VT _.RC:$src1), imm:$src2))],
9749                   IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>;
9750
9751     let hasSideEffects = 0 in
9752     def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
9753                    (ins _.RC:$src1, u8imm:$src2),
9754                    OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
9755                    IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData<NAME#rr>,
9756                    Sched<[WriteShuffle]>;
9757
9758     defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
9759   }
9760 }
9761
9762 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
9763                                                             RegisterClass GRC> {
9764   let Predicates = [HasDQI] in {
9765     def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
9766                   (ins _.RC:$src1, u8imm:$src2),
9767                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9768                   [(set GRC:$dst,
9769                       (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
9770                   EVEX, TAPD, Sched<[WriteShuffle]>;
9771
9772     def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
9773                 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9774                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9775                 [(store (extractelt (_.VT _.RC:$src1),
9776                                     imm:$src2),addr:$dst)]>,
9777                 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
9778                 Sched<[WriteShuffleLd]>;
9779   }
9780 }
9781
9782 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
9783 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
9784 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
9785 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
9786
9787 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9788                                             X86VectorVTInfo _, PatFrag LdFrag> {
9789   def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
9790       (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
9791       OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9792       [(set _.RC:$dst,
9793           (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
9794       EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
9795 }
9796
9797 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9798                                             X86VectorVTInfo _, PatFrag LdFrag> {
9799   let Predicates = [HasBWI] in {
9800     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9801         (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
9802         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9803         [(set _.RC:$dst,
9804             (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
9805         Sched<[WriteShuffle]>;
9806
9807     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
9808   }
9809 }
9810
9811 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
9812                                          X86VectorVTInfo _, RegisterClass GRC> {
9813   let Predicates = [HasDQI] in {
9814     def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9815         (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
9816         OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9817         [(set _.RC:$dst,
9818             (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
9819         EVEX_4V, TAPD, Sched<[WriteShuffle]>;
9820
9821     defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
9822                                     _.ScalarLdFrag>, TAPD;
9823   }
9824 }
9825
9826 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
9827                                      extloadi8>, TAPD, VEX_WIG;
9828 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
9829                                      extloadi16>, PD, VEX_WIG;
9830 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
9831 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
9832
9833 //===----------------------------------------------------------------------===//
9834 // VSHUFPS - VSHUFPD Operations
9835 //===----------------------------------------------------------------------===//
9836
9837 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
9838                                                 AVX512VLVectorVTInfo VTInfo_FP>{
9839   defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
9840                         SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
9841                         AVX512AIi8Base, EVEX_4V;
9842 }
9843
9844 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
9845 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
9846
9847 //===----------------------------------------------------------------------===//
9848 // AVX-512 - Byte shift Left/Right
9849 //===----------------------------------------------------------------------===//
9850
9851 let Sched = WriteVecShift in
9852 def AVX512_BYTESHIFT : OpndItins<
9853   IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI
9854 >;
9855
9856 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
9857                                Format MRMm, string OpcodeStr,
9858                                OpndItins itins, X86VectorVTInfo _>{
9859   def rr : AVX512<opc, MRMr,
9860              (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
9861              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9862              [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))],
9863              itins.rr>, Sched<[itins.Sched]>;
9864   def rm : AVX512<opc, MRMm,
9865            (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
9866            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9867            [(set _.RC:$dst,(_.VT (OpNode
9868                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
9869                                  (i8 imm:$src2))))], itins.rm>,
9870            Sched<[itins.Sched.Folded, ReadAfterLd]>;
9871 }
9872
9873 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
9874                                    Format MRMm, string OpcodeStr,
9875                                    OpndItins itins, Predicate prd>{
9876   let Predicates = [prd] in
9877     defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9878                                  OpcodeStr, itins, v64i8_info>, EVEX_V512;
9879   let Predicates = [prd, HasVLX] in {
9880     defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9881                                     OpcodeStr, itins, v32i8x_info>, EVEX_V256;
9882     defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9883                                     OpcodeStr, itins, v16i8x_info>, EVEX_V128;
9884   }
9885 }
9886 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
9887                                        AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9888                                        EVEX_4V, VEX_WIG;
9889 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
9890                                        AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9891                                        EVEX_4V, VEX_WIG;
9892
9893
9894 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
9895                                 string OpcodeStr, OpndItins itins,
9896                                 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
9897   def rr : AVX512BI<opc, MRMSrcReg,
9898              (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
9899              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9900              [(set _dst.RC:$dst,(_dst.VT
9901                                 (OpNode (_src.VT _src.RC:$src1),
9902                                         (_src.VT _src.RC:$src2))))], itins.rr>,
9903              Sched<[itins.Sched]>;
9904   def rm : AVX512BI<opc, MRMSrcMem,
9905            (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
9906            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9907            [(set _dst.RC:$dst,(_dst.VT
9908                               (OpNode (_src.VT _src.RC:$src1),
9909                               (_src.VT (bitconvert
9910                                         (_src.LdFrag addr:$src2))))))], itins.rm>,
9911            Sched<[itins.Sched.Folded, ReadAfterLd]>;
9912 }
9913
9914 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
9915                                     string OpcodeStr, OpndItins itins,
9916                                     Predicate prd> {
9917   let Predicates = [prd] in
9918     defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info,
9919                                   v64i8_info>, EVEX_V512;
9920   let Predicates = [prd, HasVLX] in {
9921     defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info,
9922                                     v32i8x_info>, EVEX_V256;
9923     defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info,
9924                                     v16i8x_info>, EVEX_V128;
9925   }
9926 }
9927
9928 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
9929                                         SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG;
9930
9931 // Transforms to swizzle an immediate to enable better matching when
9932 // memory operand isn't in the right place.
9933 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
9934   // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
9935   uint8_t Imm = N->getZExtValue();
9936   // Swap bits 1/4 and 3/6.
9937   uint8_t NewImm = Imm & 0xa5;
9938   if (Imm & 0x02) NewImm |= 0x10;
9939   if (Imm & 0x10) NewImm |= 0x02;
9940   if (Imm & 0x08) NewImm |= 0x40;
9941   if (Imm & 0x40) NewImm |= 0x08;
9942   return getI8Imm(NewImm, SDLoc(N));
9943 }]>;
9944 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
9945   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9946   uint8_t Imm = N->getZExtValue();
9947   // Swap bits 2/4 and 3/5.
9948   uint8_t NewImm = Imm & 0xc3;
9949   if (Imm & 0x04) NewImm |= 0x10;
9950   if (Imm & 0x10) NewImm |= 0x04;
9951   if (Imm & 0x08) NewImm |= 0x20;
9952   if (Imm & 0x20) NewImm |= 0x08;
9953   return getI8Imm(NewImm, SDLoc(N));
9954 }]>;
9955 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
9956   // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9957   uint8_t Imm = N->getZExtValue();
9958   // Swap bits 1/2 and 5/6.
9959   uint8_t NewImm = Imm & 0x99;
9960   if (Imm & 0x02) NewImm |= 0x04;
9961   if (Imm & 0x04) NewImm |= 0x02;
9962   if (Imm & 0x20) NewImm |= 0x40;
9963   if (Imm & 0x40) NewImm |= 0x20;
9964   return getI8Imm(NewImm, SDLoc(N));
9965 }]>;
9966 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
9967   // Convert a VPTERNLOG immediate by moving operand 1 to the end.
9968   uint8_t Imm = N->getZExtValue();
9969   // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
9970   uint8_t NewImm = Imm & 0x81;
9971   if (Imm & 0x02) NewImm |= 0x04;
9972   if (Imm & 0x04) NewImm |= 0x10;
9973   if (Imm & 0x08) NewImm |= 0x40;
9974   if (Imm & 0x10) NewImm |= 0x02;
9975   if (Imm & 0x20) NewImm |= 0x08;
9976   if (Imm & 0x40) NewImm |= 0x20;
9977   return getI8Imm(NewImm, SDLoc(N));
9978 }]>;
9979 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
9980   // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
9981   uint8_t Imm = N->getZExtValue();
9982   // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
9983   uint8_t NewImm = Imm & 0x81;
9984   if (Imm & 0x02) NewImm |= 0x10;
9985   if (Imm & 0x04) NewImm |= 0x02;
9986   if (Imm & 0x08) NewImm |= 0x20;
9987   if (Imm & 0x10) NewImm |= 0x04;
9988   if (Imm & 0x20) NewImm |= 0x40;
9989   if (Imm & 0x40) NewImm |= 0x08;
9990   return getI8Imm(NewImm, SDLoc(N));
9991 }]>;
9992
9993 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
9994                           OpndItins itins, X86VectorVTInfo _>{
9995   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
9996   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
9997                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
9998                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
9999                       (OpNode (_.VT _.RC:$src1),
10000                               (_.VT _.RC:$src2),
10001                               (_.VT _.RC:$src3),
10002                               (i8 imm:$src4)), itins.rr, 1, 1>,
10003                       AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
10004   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10005                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
10006                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
10007                     (OpNode (_.VT _.RC:$src1),
10008                             (_.VT _.RC:$src2),
10009                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
10010                             (i8 imm:$src4)), itins.rm, 1, 0>,
10011                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10012                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
10013   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10014                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
10015                     OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10016                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
10017                     (OpNode (_.VT _.RC:$src1),
10018                             (_.VT _.RC:$src2),
10019                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10020                             (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
10021                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10022                     Sched<[itins.Sched.Folded, ReadAfterLd]>;
10023   }// Constraints = "$src1 = $dst"
10024
10025   // Additional patterns for matching passthru operand in other positions.
10026   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10027                    (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10028                    _.RC:$src1)),
10029             (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10030              _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10031   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10032                    (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
10033                    _.RC:$src1)),
10034             (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10035              _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10036
10037   // Additional patterns for matching loads in other positions.
10038   def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
10039                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10040             (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10041                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10042   def : Pat<(_.VT (OpNode _.RC:$src1,
10043                           (bitconvert (_.LdFrag addr:$src3)),
10044                           _.RC:$src2, (i8 imm:$src4))),
10045             (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10046                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10047
10048   // Additional patterns for matching zero masking with loads in other
10049   // positions.
10050   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10051                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
10052                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10053                    _.ImmAllZerosV)),
10054             (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10055              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10056   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10057                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10058                     _.RC:$src2, (i8 imm:$src4)),
10059                    _.ImmAllZerosV)),
10060             (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10061              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10062
10063   // Additional patterns for matching masked loads with different
10064   // operand orders.
10065   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10066                    (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10067                     _.RC:$src2, (i8 imm:$src4)),
10068                    _.RC:$src1)),
10069             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10070              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10071   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10072                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
10073                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10074                    _.RC:$src1)),
10075             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10076              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10077   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10078                    (OpNode _.RC:$src2, _.RC:$src1,
10079                     (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
10080                    _.RC:$src1)),
10081             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10082              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10083   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10084                    (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
10085                     _.RC:$src1, (i8 imm:$src4)),
10086                    _.RC:$src1)),
10087             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10088              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10089   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10090                    (OpNode (bitconvert (_.LdFrag addr:$src3)),
10091                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10092                    _.RC:$src1)),
10093             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10094              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
10095
10096   // Additional patterns for matching broadcasts in other positions.
10097   def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10098                           _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10099             (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10100                                    addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10101   def : Pat<(_.VT (OpNode _.RC:$src1,
10102                           (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10103                           _.RC:$src2, (i8 imm:$src4))),
10104             (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10105                                    addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10106
10107   // Additional patterns for matching zero masking with broadcasts in other
10108   // positions.
10109   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10110                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10111                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10112                    _.ImmAllZerosV)),
10113             (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10114              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10115              (VPTERNLOG321_imm8 imm:$src4))>;
10116   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10117                    (OpNode _.RC:$src1,
10118                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10119                     _.RC:$src2, (i8 imm:$src4)),
10120                    _.ImmAllZerosV)),
10121             (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10122              _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10123              (VPTERNLOG132_imm8 imm:$src4))>;
10124
10125   // Additional patterns for matching masked broadcasts with different
10126   // operand orders.
10127   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10128                    (OpNode _.RC:$src1,
10129                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10130                     _.RC:$src2, (i8 imm:$src4)),
10131                    _.RC:$src1)),
10132             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10133              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10134   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10135                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10136                     _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10137                    _.RC:$src1)),
10138             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10139              _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10140   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10141                    (OpNode _.RC:$src2, _.RC:$src1,
10142                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10143                     (i8 imm:$src4)), _.RC:$src1)),
10144             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10145              _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10146   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10147                    (OpNode _.RC:$src2,
10148                     (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10149                     _.RC:$src1, (i8 imm:$src4)),
10150                    _.RC:$src1)),
10151             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10152              _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10153   def : Pat<(_.VT (vselect _.KRCWM:$mask,
10154                    (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10155                     _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10156                    _.RC:$src1)),
10157             (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10158              _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
10159 }
10160
10161 multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
10162                                  AVX512VLVectorVTInfo _> {
10163   let Predicates = [HasAVX512] in
10164     defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
10165   let Predicates = [HasAVX512, HasVLX] in {
10166     defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
10167     defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
10168   }
10169 }
10170
10171 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
10172                                         avx512vl_i32_info>;
10173 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
10174                                         avx512vl_i64_info>, VEX_W;
10175
10176 //===----------------------------------------------------------------------===//
10177 // AVX-512 - FixupImm
10178 //===----------------------------------------------------------------------===//
10179
10180 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
10181                                   OpndItins itins, X86VectorVTInfo _>{
10182   let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
10183     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10184                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10185                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10186                         (OpNode (_.VT _.RC:$src1),
10187                                 (_.VT _.RC:$src2),
10188                                 (_.IntVT _.RC:$src3),
10189                                 (i32 imm:$src4),
10190                                 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
10191     defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10192                       (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
10193                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10194                       (OpNode (_.VT _.RC:$src1),
10195                               (_.VT _.RC:$src2),
10196                               (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
10197                               (i32 imm:$src4),
10198                               (i32 FROUND_CURRENT)), itins.rm>,
10199                       Sched<[itins.Sched.Folded, ReadAfterLd]>;
10200     defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10201                       (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10202                     OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10203                     "$src2, ${src3}"##_.BroadcastStr##", $src4",
10204                       (OpNode (_.VT _.RC:$src1),
10205                               (_.VT _.RC:$src2),
10206                               (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10207                               (i32 imm:$src4),
10208                               (i32 FROUND_CURRENT)), itins.rm>,
10209                     EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10210   } // Constraints = "$src1 = $dst"
10211 }
10212
10213 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
10214                                       SDNode OpNode, OpndItins itins,
10215                                       X86VectorVTInfo _>{
10216 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
10217   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10218                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10219                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10220                       "$src2, $src3, {sae}, $src4",
10221                       (OpNode (_.VT _.RC:$src1),
10222                                 (_.VT _.RC:$src2),
10223                                 (_.IntVT _.RC:$src3),
10224                                 (i32 imm:$src4),
10225                                 (i32 FROUND_NO_EXC)), itins.rr>,
10226                       EVEX_B, Sched<[itins.Sched]>;
10227   }
10228 }
10229
10230 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
10231                                   OpndItins itins, X86VectorVTInfo _,
10232                                   X86VectorVTInfo _src3VT> {
10233   let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10234       ExeDomain = _.ExeDomain in {
10235     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10236                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10237                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10238                       (OpNode (_.VT _.RC:$src1),
10239                               (_.VT _.RC:$src2),
10240                               (_src3VT.VT _src3VT.RC:$src3),
10241                               (i32 imm:$src4),
10242                               (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
10243     defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10244                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10245                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10246                       "$src2, $src3, {sae}, $src4",
10247                       (OpNode (_.VT _.RC:$src1),
10248                               (_.VT _.RC:$src2),
10249                               (_src3VT.VT _src3VT.RC:$src3),
10250                               (i32 imm:$src4),
10251                               (i32 FROUND_NO_EXC)), itins.rm>,
10252                       EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10253     defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10254                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10255                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10256                      (OpNode (_.VT _.RC:$src1),
10257                              (_.VT _.RC:$src2),
10258                              (_src3VT.VT (scalar_to_vector
10259                                        (_src3VT.ScalarLdFrag addr:$src3))),
10260                              (i32 imm:$src4),
10261                              (i32 FROUND_CURRENT)), itins.rm>,
10262                      Sched<[itins.Sched.Folded, ReadAfterLd]>;
10263   }
10264 }
10265
10266 multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> {
10267   let Predicates = [HasAVX512] in
10268     defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10269                                        _Vec.info512>,
10270                 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins,
10271                                 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
10272   let Predicates = [HasAVX512, HasVLX] in {
10273     defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10274                             _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
10275     defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10276                             _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
10277   }
10278 }
10279
10280 defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10281                                           SSE_ALU_F32S, f32x_info, v4i32x_info>,
10282                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10283 defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10284                                           SSE_ALU_F64S, f64x_info, v2i64x_info>,
10285                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10286 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>,
10287                          EVEX_CD8<32, CD8VF>;
10288 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>,
10289                          EVEX_CD8<64, CD8VF>, VEX_W;
10290
10291
10292
10293 // Patterns used to select SSE scalar fp arithmetic instructions from
10294 // either:
10295 //
10296 // (1) a scalar fp operation followed by a blend
10297 //
10298 // The effect is that the backend no longer emits unnecessary vector
10299 // insert instructions immediately after SSE scalar fp instructions
10300 // like addss or mulss.
10301 //
10302 // For example, given the following code:
10303 //   __m128 foo(__m128 A, __m128 B) {
10304 //     A[0] += B[0];
10305 //     return A;
10306 //   }
10307 //
10308 // Previously we generated:
10309 //   addss %xmm0, %xmm1
10310 //   movss %xmm1, %xmm0
10311 //
10312 // We now generate:
10313 //   addss %xmm1, %xmm0
10314 //
10315 // (2) a vector packed single/double fp operation followed by a vector insert
10316 //
10317 // The effect is that the backend converts the packed fp instruction
10318 // followed by a vector insert into a single SSE scalar fp instruction.
10319 //
10320 // For example, given the following code:
10321 //   __m128 foo(__m128 A, __m128 B) {
10322 //     __m128 C = A + B;
10323 //     return (__m128) {c[0], a[1], a[2], a[3]};
10324 //   }
10325 //
10326 // Previously we generated:
10327 //   addps %xmm0, %xmm1
10328 //   movss %xmm1, %xmm0
10329 //
10330 // We now generate:
10331 //   addss %xmm1, %xmm0
10332
10333 // TODO: Some canonicalization in lowering would simplify the number of
10334 // patterns we have to try to match.
10335 multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10336   let Predicates = [HasAVX512] in {
10337     // extracted scalar math op with insert via movss
10338     def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10339           (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10340           FR32X:$src))))),
10341       (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
10342           (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
10343
10344     // vector math op with insert via movss
10345     def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10346           (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
10347       (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10348
10349     // extracted masked scalar math op with insert via movss
10350     def : Pat<(X86Movss (v4f32 VR128X:$src1),
10351                (scalar_to_vector
10352                 (X86selects VK1WM:$mask,
10353                             (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10354                                 FR32X:$src2),
10355                             FR32X:$src0))),
10356       (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10357           VK1WM:$mask, v4f32:$src1,
10358           (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
10359   }
10360 }
10361
10362 defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10363 defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10364 defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10365 defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10366
10367 multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10368   let Predicates = [HasAVX512] in {
10369     // extracted scalar math op with insert via movsd
10370     def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10371           (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10372           FR64X:$src))))),
10373       (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
10374           (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10375
10376     // vector math op with insert via movsd
10377     def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10378           (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
10379       (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10380
10381     // extracted masked scalar math op with insert via movss
10382     def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10383                (scalar_to_vector
10384                 (X86selects VK1WM:$mask,
10385                             (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10386                                 FR64X:$src2),
10387                             FR64X:$src0))),
10388       (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10389           VK1WM:$mask, v2f64:$src1,
10390           (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
10391   }
10392 }
10393
10394 defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10395 defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10396 defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10397 defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
10398
10399 //===----------------------------------------------------------------------===//
10400 // AES instructions
10401 //===----------------------------------------------------------------------===//
10402
10403 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
10404   let Predicates = [HasVLX, HasVAES] in {
10405     defm Z128 : AESI_binop_rm_int<Op, OpStr,
10406                                   !cast<Intrinsic>(IntPrefix),
10407                                   loadv2i64, 0, VR128X, i128mem>,
10408                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
10409     defm Z256 : AESI_binop_rm_int<Op, OpStr,
10410                                   !cast<Intrinsic>(IntPrefix##"_256"),
10411                                   loadv4i64, 0, VR256X, i256mem>,
10412                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
10413     }
10414     let Predicates = [HasAVX512, HasVAES] in
10415     defm Z    : AESI_binop_rm_int<Op, OpStr,
10416                                   !cast<Intrinsic>(IntPrefix##"_512"),
10417                                   loadv8i64, 0, VR512, i512mem>,
10418                   EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
10419 }
10420
10421 defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
10422 defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
10423 defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
10424 defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
10425
10426 //===----------------------------------------------------------------------===//
10427 // PCLMUL instructions - Carry less multiplication
10428 //===----------------------------------------------------------------------===//
10429
10430 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
10431 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
10432                               EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
10433
10434 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
10435 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
10436                               EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
10437
10438 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
10439                                 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
10440                                 EVEX_CD8<64, CD8VF>, VEX_WIG;
10441 }
10442
10443 // Aliases
10444 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
10445 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
10446 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
10447
10448 //===----------------------------------------------------------------------===//
10449 // VBMI2
10450 //===----------------------------------------------------------------------===//
10451
10452 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
10453                               OpndItins itins, X86VectorVTInfo VTI> {
10454   let Constraints = "$src1 = $dst",
10455       ExeDomain   = VTI.ExeDomain in {
10456     defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10457                 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10458                 "$src3, $src2", "$src2, $src3",
10459                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
10460                 itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
10461     defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10462                 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10463                 "$src3, $src2", "$src2, $src3",
10464                 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10465                         (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
10466                 itins.rm>, AVX512FMA3Base,
10467                 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10468   }
10469 }
10470
10471 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
10472                                OpndItins itins, X86VectorVTInfo VTI>
10473          : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
10474   let Constraints = "$src1 = $dst",
10475       ExeDomain   = VTI.ExeDomain in
10476   defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10477               (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
10478               "${src3}"##VTI.BroadcastStr##", $src2",
10479               "$src2, ${src3}"##VTI.BroadcastStr,
10480               (OpNode VTI.RC:$src1, VTI.RC:$src2,
10481                (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
10482               itins.rm>, AVX512FMA3Base, EVEX_B,
10483               Sched<[itins.Sched.Folded, ReadAfterLd]>;
10484 }
10485
10486 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
10487                                      OpndItins itins, AVX512VLVectorVTInfo VTI> {
10488   let Predicates = [HasVBMI2] in
10489   defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
10490   let Predicates = [HasVBMI2, HasVLX] in {
10491     defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10492     defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
10493   }
10494 }
10495
10496 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
10497                                       OpndItins itins, AVX512VLVectorVTInfo VTI> {
10498   let Predicates = [HasVBMI2] in
10499   defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
10500   let Predicates = [HasVBMI2, HasVLX] in {
10501     defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10502     defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
10503   }
10504 }
10505 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
10506                            SDNode OpNode, OpndItins itins> {
10507   defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
10508              avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
10509   defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
10510              avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10511   defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
10512              avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
10513 }
10514
10515 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
10516                            SDNode OpNode, OpndItins itins> {
10517   defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
10518              avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
10519              VEX_W, EVEX_CD8<16, CD8VF>;
10520   defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
10521              OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10522   defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
10523              itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10524 }
10525
10526 // Concat & Shift
10527 defm VPSHLDV     : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
10528 defm VPSHRDV     : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
10529 defm VPSHLD      : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
10530 defm VPSHRD      : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
10531
10532 // Compress
10533 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS,
10534                                          avx512vl_i8_info, HasVBMI2>, EVEX;
10535 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS,
10536                                           avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
10537 // Expand
10538 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND,
10539                                       avx512vl_i8_info, HasVBMI2>, EVEX;
10540 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND,
10541                                       avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
10542
10543 //===----------------------------------------------------------------------===//
10544 // VNNI
10545 //===----------------------------------------------------------------------===//
10546
10547 let Constraints = "$src1 = $dst" in
10548 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
10549                     OpndItins itins, X86VectorVTInfo VTI> {
10550   defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10551                                    (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10552                                    "$src3, $src2", "$src2, $src3",
10553                                    (VTI.VT (OpNode VTI.RC:$src1,
10554                                             VTI.RC:$src2, VTI.RC:$src3)),
10555                                    itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>;
10556   defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10557                                    (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10558                                    "$src3, $src2", "$src2, $src3",
10559                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10560                                             (VTI.VT (bitconvert
10561                                                      (VTI.LdFrag addr:$src3))))),
10562                                    itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
10563                                    Sched<[itins.Sched.Folded, ReadAfterLd]>;
10564   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10565                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
10566                                    OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
10567                                    "$src2, ${src3}"##VTI.BroadcastStr,
10568                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
10569                                     (VTI.VT (X86VBroadcast
10570                                              (VTI.ScalarLdFrag addr:$src3)))),
10571                                    itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
10572                                    T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10573 }
10574
10575 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> {
10576   let Predicates = [HasVNNI] in
10577   defm Z      :   VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512;
10578   let Predicates = [HasVNNI, HasVLX] in {
10579     defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256;
10580     defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128;
10581   }
10582 }
10583
10584 // FIXME: Is there a better scheduler itinerary for VPDP?
10585 defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>;
10586 defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>;
10587 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>;
10588 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
10589
10590 //===----------------------------------------------------------------------===//
10591 // Bit Algorithms
10592 //===----------------------------------------------------------------------===//
10593
10594 // FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
10595 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
10596                                    avx512vl_i8_info, HasBITALG>;
10597 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
10598                                    avx512vl_i16_info, HasBITALG>, VEX_W;
10599
10600 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
10601 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
10602
10603 multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
10604   defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
10605                                 (ins VTI.RC:$src1, VTI.RC:$src2),
10606                                 "vpshufbitqmb",
10607                                 "$src2, $src1", "$src1, $src2",
10608                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
10609                                 (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD,
10610                                 Sched<[itins.Sched]>;
10611   defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
10612                                 (ins VTI.RC:$src1, VTI.MemOp:$src2),
10613                                 "vpshufbitqmb",
10614                                 "$src2, $src1", "$src1, $src2",
10615                                 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
10616                                 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))),
10617                                 itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
10618                                 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10619 }
10620
10621 multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> {
10622   let Predicates = [HasBITALG] in
10623   defm Z      : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512;
10624   let Predicates = [HasBITALG, HasVLX] in {
10625     defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256;
10626     defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128;
10627   }
10628 }
10629
10630 // FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB?
10631 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>;
10632
10633 //===----------------------------------------------------------------------===//
10634 // GFNI
10635 //===----------------------------------------------------------------------===//
10636
10637 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
10638   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10639   defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info,
10640                                 SSE_INTALU_ITINS_P, 1>, EVEX_V512;
10641   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10642     defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info,
10643                                 SSE_INTALU_ITINS_P, 1>, EVEX_V256;
10644     defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info,
10645                                 SSE_INTALU_ITINS_P, 1>, EVEX_V128;
10646   }
10647 }
10648
10649 defm GF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>,
10650                  EVEX_CD8<8, CD8VF>, T8PD;
10651
10652 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
10653                                       OpndItins itins, X86VectorVTInfo VTI,
10654                                       X86VectorVTInfo BcstVTI>
10655            : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
10656   let ExeDomain = VTI.ExeDomain in
10657   defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10658                 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
10659                 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
10660                 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
10661                 (OpNode (VTI.VT VTI.RC:$src1),
10662                  (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
10663                  (i8 imm:$src3)), itins.rm>, EVEX_B,
10664                  Sched<[itins.Sched.Folded, ReadAfterLd]>;
10665 }
10666
10667 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
10668                                      OpndItins itins> {
10669   let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10670   defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
10671                                            v8i64_info>, EVEX_V512;
10672   let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10673     defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
10674                                            v4i64x_info>, EVEX_V256;
10675     defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
10676                                            v2i64x_info>, EVEX_V128;
10677   }
10678 }
10679
10680 defm GF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
10681                         X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
10682                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10683 defm GF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
10684                         X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
10685                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10686