1 //===-- X86InstrFragmentsSIMD.td - x86 SIMD ISA ------------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file provides pattern fragments useful for SIMD instructions.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // MMX specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
18 // Low word of MMX to GPR.
19 def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
20 [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
21 // GPR to low word of MMX.
22 def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
23 [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;
25 //===----------------------------------------------------------------------===//
26 // MMX Pattern Fragments
27 //===----------------------------------------------------------------------===//
29 def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
31 //===----------------------------------------------------------------------===//
32 // SSE specific DAG Nodes.
33 //===----------------------------------------------------------------------===//
35 def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
36 SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
39 def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
40 def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
41 def X86fmins : SDNode<"X86ISD::FMINS", SDTFPBinOp>;
42 def X86fmaxs : SDNode<"X86ISD::FMAXS", SDTFPBinOp>;
44 // Commutative and Associative FMIN and FMAX.
45 def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
46 [SDNPCommutative, SDNPAssociative]>;
47 def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
48 [SDNPCommutative, SDNPAssociative]>;
50 def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
51 [SDNPCommutative, SDNPAssociative]>;
52 def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
53 [SDNPCommutative, SDNPAssociative]>;
54 def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
55 [SDNPCommutative, SDNPAssociative]>;
56 def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp>;
57 def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
58 def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
59 def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
60 def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
61 def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
62 def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
63 def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
64 def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
65 def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
66 def X86pshufb : SDNode<"X86ISD::PSHUFB",
67 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
69 def X86psadbw : SDNode<"X86ISD::PSADBW",
70 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
71 SDTCVecEltisVT<1, i8>,
72 SDTCisSameSizeAs<0,1>,
73 SDTCisSameAs<1,2>]>, [SDNPCommutative]>;
74 def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
75 SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
76 SDTCVecEltisVT<1, i8>,
77 SDTCisSameSizeAs<0,1>,
78 SDTCisSameAs<1,2>, SDTCisInt<3>]>>;
79 def X86andnp : SDNode<"X86ISD::ANDNP",
80 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
82 def X86multishift : SDNode<"X86ISD::MULTISHIFT",
83 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
85 def X86pextrb : SDNode<"X86ISD::PEXTRB",
86 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>,
88 def X86pextrw : SDNode<"X86ISD::PEXTRW",
89 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v8i16>,
91 def X86pinsrb : SDNode<"X86ISD::PINSRB",
92 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
93 SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
94 def X86pinsrw : SDNode<"X86ISD::PINSRW",
95 SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
96 SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
97 def X86insertps : SDNode<"X86ISD::INSERTPS",
98 SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
99 SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>;
100 def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
101 SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
103 def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
104 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
106 def X86vzext : SDNode<"X86ISD::VZEXT",
107 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
108 SDTCisInt<0>, SDTCisInt<1>,
109 SDTCisOpSmallerThanOp<1, 0>]>>;
111 def X86vsext : SDNode<"X86ISD::VSEXT",
112 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
113 SDTCisInt<0>, SDTCisInt<1>,
114 SDTCisOpSmallerThanOp<1, 0>]>>;
116 def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
117 SDTCisInt<0>, SDTCisInt<1>,
118 SDTCisOpSmallerThanOp<0, 1>]>;
120 def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>;
121 def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>;
122 def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
124 def X86vfpext : SDNode<"X86ISD::VFPEXT",
125 SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
126 SDTCVecEltisVT<1, f32>,
127 SDTCisSameSizeAs<0, 1>]>>;
128 def X86vfpround: SDNode<"X86ISD::VFPROUND",
129 SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
130 SDTCVecEltisVT<1, f64>,
131 SDTCisSameSizeAs<0, 1>]>>;
133 def X86froundRnd: SDNode<"X86ISD::VFPROUNDS_RND",
134 SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
136 SDTCVecEltisVT<2, f64>,
137 SDTCisSameSizeAs<0, 2>,
140 def X86fpextRnd : SDNode<"X86ISD::VFPEXTS_RND",
141 SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f64>,
143 SDTCVecEltisVT<2, f32>,
144 SDTCisSameSizeAs<0, 2>,
147 def X86vshiftimm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
148 SDTCisVT<2, i8>, SDTCisInt<0>]>;
150 def X86vshldq : SDNode<"X86ISD::VSHLDQ", X86vshiftimm>;
151 def X86vshrdq : SDNode<"X86ISD::VSRLDQ", X86vshiftimm>;
152 def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
153 def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
154 def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
157 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
158 SDTCisVec<1>, SDTCisSameAs<2, 1>,
159 SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>]>;
160 def X86CmpMaskCCRound :
161 SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>,
162 SDTCisVec<1>, SDTCisFP<1>, SDTCisSameAs<2, 1>,
163 SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>,
165 def X86CmpMaskCCScalar :
166 SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>,
169 def X86CmpMaskCCScalarRound :
170 SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>,
171 SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
173 def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
174 // Hack to make CMPM commutable in tablegen patterns for load folding.
175 def X86cmpm_c : SDNode<"X86ISD::CMPM", X86CmpMaskCC, [SDNPCommutative]>;
176 def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
177 def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>;
178 def X86cmpmsRnd : SDNode<"X86ISD::FSETCCM_RND", X86CmpMaskCCScalarRound>;
180 def X86phminpos: SDNode<"X86ISD::PHMINPOS",
181 SDTypeProfile<1, 1, [SDTCisVT<0, v8i16>, SDTCisVT<1, v8i16>]>>;
183 def X86vshiftuniform : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
184 SDTCisVec<2>, SDTCisInt<0>,
187 def X86vshl : SDNode<"X86ISD::VSHL", X86vshiftuniform>;
188 def X86vsrl : SDNode<"X86ISD::VSRL", X86vshiftuniform>;
189 def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftuniform>;
191 def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
192 SDTCisSameAs<0,2>, SDTCisInt<0>]>;
194 def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>;
196 def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>;
197 def X86vsrli : SDNode<"X86ISD::VSRLI", X86vshiftimm>;
198 def X86vsrai : SDNode<"X86ISD::VSRAI", X86vshiftimm>;
200 def X86kshiftl : SDNode<"X86ISD::KSHIFTL",
201 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
204 def X86kshiftr : SDNode<"X86ISD::KSHIFTR",
205 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
209 def X86kadd : SDNode<"X86ISD::KADD", SDTIntBinOp, [SDNPCommutative]>;
211 def X86vrotli : SDNode<"X86ISD::VROTLI", X86vshiftimm>;
212 def X86vrotri : SDNode<"X86ISD::VROTRI", X86vshiftimm>;
214 def X86vpshl : SDNode<"X86ISD::VPSHL", X86vshiftvariable>;
215 def X86vpsha : SDNode<"X86ISD::VPSHA", X86vshiftvariable>;
217 def X86vpcom : SDNode<"X86ISD::VPCOM",
218 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
220 SDTCisVT<3, i8>, SDTCisInt<0>]>>;
221 def X86vpcomu : SDNode<"X86ISD::VPCOMU",
222 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
224 SDTCisVT<3, i8>, SDTCisInt<0>]>>;
225 def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
226 SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
228 SDTCisFP<0>, SDTCisInt<3>,
229 SDTCisSameNumEltsAs<0, 3>,
230 SDTCisSameSizeAs<0,3>,
232 def X86vpperm : SDNode<"X86ISD::VPPERM",
233 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
234 SDTCisSameAs<0,2>, SDTCisSameAs<0, 3>]>>;
236 def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
238 SDTCisSameAs<2, 1>]>;
240 def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
241 def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
242 def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
243 def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
244 def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
245 def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
246 def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
247 def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
248 def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
249 def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
251 def X86movmsk : SDNode<"X86ISD::MOVMSK",
252 SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>;
254 def X86selects : SDNode<"X86ISD::SELECTS",
255 SDTypeProfile<1, 3, [SDTCisVT<1, v1i1>,
257 SDTCisSameAs<2, 3>]>>;
259 def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
260 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
264 def X86pmuldq : SDNode<"X86ISD::PMULDQ",
265 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
270 def X86extrqi : SDNode<"X86ISD::EXTRQI",
271 SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
272 SDTCisVT<2, i8>, SDTCisVT<3, i8>]>>;
273 def X86insertqi : SDNode<"X86ISD::INSERTQI",
274 SDTypeProfile<1, 4, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
275 SDTCisSameAs<1,2>, SDTCisVT<3, i8>,
278 // Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
279 // translated into one of the target nodes below during lowering.
280 // Note: this is a work in progress...
281 def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
282 def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
284 def SDTShuff2OpFP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>,
285 SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>;
287 def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
288 SDTCisFP<0>, SDTCisInt<2>,
289 SDTCisSameNumEltsAs<0,2>,
290 SDTCisSameSizeAs<0,2>]>;
291 def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
292 SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>;
293 def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
294 SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>;
295 def SDTFPBinOpImm: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
299 def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisFP<0>, SDTCisVec<0>,
304 def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisFP<0>, SDTCisSameAs<0,1>,
307 SDTCisSameSizeAs<0, 3>,
308 SDTCisSameNumEltsAs<0, 3>,
311 def SDTFPUnaryOpImm: SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
314 def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
319 def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
320 def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
321 SDTCisInt<0>, SDTCisInt<1>]>;
323 def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
324 SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
326 def SDTTernlog : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisVec<0>,
327 SDTCisSameAs<0,1>, SDTCisSameAs<0,2>,
328 SDTCisSameAs<0,3>, SDTCisVT<4, i8>]>;
330 def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
331 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisVT<3, i32>]>;
333 def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [ // fsqrt_round, fgetexp_round, etc.
334 SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisVT<2, i32>]>;
336 def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
337 SDTCisSameAs<1,2>, SDTCisSameAs<1,3>,
338 SDTCisFP<0>, SDTCisVT<4, i32>]>;
340 def X86PAlignr : SDNode<"X86ISD::PALIGNR",
341 SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i8>,
345 def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
347 def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>;
348 def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>;
349 def X86VShldv : SDNode<"X86ISD::VSHLDV",
350 SDTypeProfile<1, 3, [SDTCisVec<0>,
353 SDTCisSameAs<0,3>]>>;
354 def X86VShrdv : SDNode<"X86ISD::VSHRDV",
355 SDTypeProfile<1, 3, [SDTCisVec<0>,
358 SDTCisSameAs<0,3>]>>;
360 def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
362 def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
363 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
364 def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
366 def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
367 def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>;
369 def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
370 def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
371 def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
373 def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2OpFP>;
374 def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2OpFP>;
376 def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2OpFP>;
377 def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2OpFP>;
379 def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
380 SDTCisVec<1>, SDTCisInt<1>,
381 SDTCisSameSizeAs<0,1>,
383 SDTCisOpSmallerThanOp<0, 1>]>;
384 def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
385 def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
387 def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
388 def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
390 def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW",
391 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
392 SDTCVecEltisVT<1, i8>,
393 SDTCisSameSizeAs<0,1>,
394 SDTCisSameAs<1,2>]>>;
395 def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD",
396 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i32>,
397 SDTCVecEltisVT<1, i16>,
398 SDTCisSameSizeAs<0,1>,
402 def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
403 def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
404 def X86VPermv : SDNode<"X86ISD::VPERMV",
405 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>,
406 SDTCisSameNumEltsAs<0,1>,
407 SDTCisSameSizeAs<0,1>,
408 SDTCisSameAs<0,2>]>>;
409 def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
410 def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
411 SDTypeProfile<1, 3, [SDTCisVec<0>,
412 SDTCisSameAs<0,1>, SDTCisInt<2>,
413 SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>,
414 SDTCisSameSizeAs<0,2>,
415 SDTCisSameAs<0,3>]>, []>;
417 def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
419 def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
421 def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImmRound>;
422 def X86VFixupimmScalar : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImmRound>;
423 def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImm>;
424 def X86VRangeRnd : SDNode<"X86ISD::VRANGE_RND", SDTFPBinOpImmRound>;
425 def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>;
426 def X86VReduceRnd : SDNode<"X86ISD::VREDUCE_RND", SDTFPUnaryOpImmRound>;
427 def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>;
428 def X86VRndScaleRnd: SDNode<"X86ISD::VRNDSCALE_RND", SDTFPUnaryOpImmRound>;
429 def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImm>;
430 def X86VGetMantRnd : SDNode<"X86ISD::VGETMANT_RND", SDTFPUnaryOpImmRound>;
431 def X86Vfpclass : SDNode<"X86ISD::VFPCLASS",
432 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
434 SDTCisSameNumEltsAs<0,1>,
435 SDTCisVT<2, i32>]>, []>;
436 def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS",
437 SDTypeProfile<1, 2, [SDTCisVT<0, v1i1>,
438 SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>;
440 def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
441 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
442 SDTCisSubVecOfVec<1, 0>]>, []>;
444 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
445 def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
447 def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
449 def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
451 def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
452 def X86faddRnds : SDNode<"X86ISD::FADDS_RND", SDTFPBinOpRound>;
453 def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
454 def X86fsubRnds : SDNode<"X86ISD::FSUBS_RND", SDTFPBinOpRound>;
455 def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
456 def X86fmulRnds : SDNode<"X86ISD::FMULS_RND", SDTFPBinOpRound>;
457 def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
458 def X86fdivRnds : SDNode<"X86ISD::FDIVS_RND", SDTFPBinOpRound>;
459 def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
460 def X86fmaxRnds : SDNode<"X86ISD::FMAXS_RND", SDTFPBinOpRound>;
461 def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
462 def X86fminRnds : SDNode<"X86ISD::FMINS_RND", SDTFPBinOpRound>;
463 def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
464 def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>;
465 def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
466 def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>;
467 def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
468 def X86fgetexpRnds : SDNode<"X86ISD::FGETEXPS_RND", SDTFPBinOpRound>;
470 def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>;
471 def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
472 def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
473 def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
474 def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp, [SDNPCommutative]>;
475 def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp, [SDNPCommutative]>;
477 def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound, [SDNPCommutative]>;
478 def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound, [SDNPCommutative]>;
479 def X86FmsubRnd : SDNode<"X86ISD::FMSUB_RND", SDTFmaRound, [SDNPCommutative]>;
480 def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutative]>;
481 def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>;
482 def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>;
484 def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
485 SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
486 def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;
487 def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative]>;
489 def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>;
490 def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>;
493 def SDTVnni : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
494 SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
495 def X86Vpdpbusd : SDNode<"X86ISD::VPDPBUSD", SDTVnni>;
496 def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>;
497 def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>;
498 def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>;
500 def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>;
501 def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>;
502 def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOpRound>;
504 def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>;
505 def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>;
506 def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOpRound>;
507 def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOpRound>;
508 def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImm>;
509 def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>;
510 def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImm>;
511 def X86GetMants : SDNode<"X86ISD::VGETMANTS", SDTFPBinOpImm>;
512 def X86RangesRnd : SDNode<"X86ISD::VRANGES_RND", SDTFPBinOpImmRound>;
513 def X86RndScalesRnd : SDNode<"X86ISD::VRNDSCALES_RND", SDTFPBinOpImmRound>;
514 def X86ReducesRnd : SDNode<"X86ISD::VREDUCES_RND", SDTFPBinOpImmRound>;
515 def X86GetMantsRnd : SDNode<"X86ISD::VGETMANTS_RND", SDTFPBinOpImmRound>;
517 def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
518 [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
519 def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
520 [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
523 def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",
524 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
526 SDTCVecEltisVT<0,i1>,
527 SDTCisSameNumEltsAs<0,1>]>>;
529 def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
530 SDTCisSameAs<0,1>, SDTCisInt<2>,
533 def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
534 SDTCisInt<0>, SDTCisFP<1>]>;
535 def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
536 SDTCisInt<0>, SDTCisFP<1>,
538 def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
539 SDTCisVec<1>, SDTCisVT<2, i32>]>;
541 def SDTVintToFP: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
542 SDTCisFP<0>, SDTCisInt<1>]>;
543 def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
544 SDTCisFP<0>, SDTCisInt<1>,
548 def X86SintToFpRnd : SDNode<"X86ISD::SCALAR_SINT_TO_FP_RND", SDTintToFPRound>;
549 def X86UintToFpRnd : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND", SDTintToFPRound>;
551 def X86cvtts2IntRnd : SDNode<"X86ISD::CVTTS2SI_RND", SDTSFloatToIntRnd>;
552 def X86cvtts2UIntRnd : SDNode<"X86ISD::CVTTS2UI_RND", SDTSFloatToIntRnd>;
554 def X86cvts2si : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
555 def X86cvts2usi : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
557 // Vector with rounding mode
559 // cvtt fp-to-int staff
560 def X86cvttp2siRnd : SDNode<"X86ISD::CVTTP2SI_RND", SDTFloatToIntRnd>;
561 def X86cvttp2uiRnd : SDNode<"X86ISD::CVTTP2UI_RND", SDTFloatToIntRnd>;
563 def X86VSintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTVintToFPRound>;
564 def X86VUintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTVintToFPRound>;
566 // cvt fp-to-int staff
567 def X86cvtp2IntRnd : SDNode<"X86ISD::CVTP2SI_RND", SDTFloatToIntRnd>;
568 def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>;
570 // Vector without rounding mode
572 // cvtt fp-to-int staff
573 def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>;
574 def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>;
576 def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>;
577 def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>;
579 // cvt int-to-fp staff
580 def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>;
581 def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
584 def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
585 SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
586 SDTCVecEltisVT<1, i16>]> >;
588 def X86cvtph2psRnd : SDNode<"X86ISD::CVTPH2PS_RND",
589 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
590 SDTCVecEltisVT<1, i16>,
591 SDTCisVT<2, i32>]> >;
593 def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH",
594 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
595 SDTCVecEltisVT<1, f32>,
596 SDTCisVT<2, i32>]> >;
597 def X86vfpextRnd : SDNode<"X86ISD::VFPEXT_RND",
598 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
599 SDTCVecEltisVT<1, f32>,
600 SDTCisOpSmallerThanOp<1, 0>,
602 def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
603 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
604 SDTCVecEltisVT<1, f64>,
605 SDTCisOpSmallerThanOp<0, 1>,
608 // galois field arithmetic
609 def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>;
610 def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>;
611 def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>;
613 //===----------------------------------------------------------------------===//
614 // SSE Complex Patterns
615 //===----------------------------------------------------------------------===//
617 // These are 'extloads' from a scalar to the low element of a vector, zeroing
618 // the top elements. These are used for the SSE 'ss' and 'sd' instruction
620 def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
621 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
622 SDNPWantRoot, SDNPWantParent]>;
623 def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
624 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
625 SDNPWantRoot, SDNPWantParent]>;
627 def ssmem : Operand<v4f32> {
628 let PrintMethod = "printf32mem";
629 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
630 let ParserMatchClass = X86Mem32AsmOperand;
631 let OperandType = "OPERAND_MEMORY";
633 def sdmem : Operand<v2f64> {
634 let PrintMethod = "printf64mem";
635 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, SEGMENT_REG);
636 let ParserMatchClass = X86Mem64AsmOperand;
637 let OperandType = "OPERAND_MEMORY";
640 //===----------------------------------------------------------------------===//
641 // SSE pattern fragments
642 //===----------------------------------------------------------------------===//
644 // Vector load wrappers to prevent folding of non-temporal aligned loads on
645 // supporting targets.
646 def vecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
647 return !useNonTemporalLoad(cast<LoadSDNode>(N));
650 // 128-bit load pattern fragments
651 // NOTE: all 128-bit integer vector loads are promoted to v2i64
652 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vecload node:$ptr))>;
653 def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vecload node:$ptr))>;
654 def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vecload node:$ptr))>;
656 // 256-bit load pattern fragments
657 // NOTE: all 256-bit integer vector loads are promoted to v4i64
658 def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vecload node:$ptr))>;
659 def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vecload node:$ptr))>;
660 def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vecload node:$ptr))>;
662 // 512-bit load pattern fragments
663 def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vecload node:$ptr))>;
664 def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vecload node:$ptr))>;
665 def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vecload node:$ptr))>;
667 // 128-/256-/512-bit extload pattern fragments
668 def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
669 def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
670 def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
672 // Like 'store', but always requires vector size alignment.
673 def alignedstore : PatFrag<(ops node:$val, node:$ptr),
674 (store node:$val, node:$ptr), [{
675 auto *St = cast<StoreSDNode>(N);
676 return St->getAlignment() >= St->getMemoryVT().getStoreSize();
679 // Like 'load', but always requires 128-bit vector alignment.
680 def alignedvecload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
681 auto *Ld = cast<LoadSDNode>(N);
682 return Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize() &&
683 !useNonTemporalLoad(cast<LoadSDNode>(N));
686 // 128-bit aligned load pattern fragments
687 // NOTE: all 128-bit integer vector loads are promoted to v2i64
688 def alignedloadv4f32 : PatFrag<(ops node:$ptr),
689 (v4f32 (alignedvecload node:$ptr))>;
690 def alignedloadv2f64 : PatFrag<(ops node:$ptr),
691 (v2f64 (alignedvecload node:$ptr))>;
692 def alignedloadv2i64 : PatFrag<(ops node:$ptr),
693 (v2i64 (alignedvecload node:$ptr))>;
695 // 256-bit aligned load pattern fragments
696 // NOTE: all 256-bit integer vector loads are promoted to v4i64
697 def alignedloadv8f32 : PatFrag<(ops node:$ptr),
698 (v8f32 (alignedvecload node:$ptr))>;
699 def alignedloadv4f64 : PatFrag<(ops node:$ptr),
700 (v4f64 (alignedvecload node:$ptr))>;
701 def alignedloadv4i64 : PatFrag<(ops node:$ptr),
702 (v4i64 (alignedvecload node:$ptr))>;
704 // 512-bit aligned load pattern fragments
705 def alignedloadv16f32 : PatFrag<(ops node:$ptr),
706 (v16f32 (alignedvecload node:$ptr))>;
707 def alignedloadv8f64 : PatFrag<(ops node:$ptr),
708 (v8f64 (alignedvecload node:$ptr))>;
709 def alignedloadv8i64 : PatFrag<(ops node:$ptr),
710 (v8i64 (alignedvecload node:$ptr))>;
712 // Like 'vecload', but uses special alignment checks suitable for use in
713 // memory operands in most SSE instructions, which are required to
714 // be naturally aligned on some targets but not on others. If the subtarget
715 // allows unaligned accesses, match any load, though this may require
716 // setting a feature bit in the processor (on startup, for example).
717 // Opteron 10h and later implement such a feature.
718 def memop : PatFrag<(ops node:$ptr), (vecload node:$ptr), [{
719 auto *Ld = cast<LoadSDNode>(N);
720 return Subtarget->hasSSEUnalignedMem() ||
721 Ld->getAlignment() >= Ld->getMemoryVT().getStoreSize();
724 // 128-bit memop pattern fragments
725 // NOTE: all 128-bit integer vector loads are promoted to v2i64
726 def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
727 def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
728 def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
730 def X86masked_gather : SDNode<"X86ISD::MGATHER",
731 SDTypeProfile<2, 3, [SDTCisVec<0>,
732 SDTCisVec<1>, SDTCisInt<1>,
736 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
738 def X86masked_scatter : SDNode<"X86ISD::MSCATTER",
739 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
741 SDTCVecEltisVT<0, i1>,
743 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
745 def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
746 (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
747 X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
748 return Mgt->getIndex().getValueType() == MVT::v4i32;
751 def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
752 (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
753 X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
754 return Mgt->getIndex().getValueType() == MVT::v8i32;
757 def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
758 (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
759 X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
760 return Mgt->getIndex().getValueType() == MVT::v2i64;
762 def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
763 (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
764 X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
765 return Mgt->getIndex().getValueType() == MVT::v4i64;
767 def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
768 (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
769 X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
770 return Mgt->getIndex().getValueType() == MVT::v8i64;
772 def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
773 (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
774 X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
775 return Mgt->getIndex().getValueType() == MVT::v16i32;
778 def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
779 (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
780 X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
781 return Sc->getIndex().getValueType() == MVT::v2i64;
784 def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
785 (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
786 X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
787 return Sc->getIndex().getValueType() == MVT::v4i32;
790 def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
791 (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
792 X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
793 return Sc->getIndex().getValueType() == MVT::v4i64;
796 def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
797 (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
798 X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
799 return Sc->getIndex().getValueType() == MVT::v8i32;
802 def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
803 (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
804 X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
805 return Sc->getIndex().getValueType() == MVT::v8i64;
807 def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
808 (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
809 X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
810 return Sc->getIndex().getValueType() == MVT::v16i32;
813 // 128-bit bitconvert pattern fragments
814 def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
815 def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
816 def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
817 def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
818 def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
819 def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
821 // 256-bit bitconvert pattern fragments
822 def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>;
823 def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
824 def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
825 def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
826 def bc_v8f32 : PatFrag<(ops node:$in), (v8f32 (bitconvert node:$in))>;
827 def bc_v4f64 : PatFrag<(ops node:$in), (v4f64 (bitconvert node:$in))>;
829 // 512-bit bitconvert pattern fragments
830 def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>;
831 def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
832 def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
833 def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
834 def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
836 def vzmovl_v2i64 : PatFrag<(ops node:$src),
837 (bitconvert (v2i64 (X86vzmovl
838 (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
839 def vzmovl_v4i32 : PatFrag<(ops node:$src),
840 (bitconvert (v4i32 (X86vzmovl
841 (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
843 def vzload_v2i64 : PatFrag<(ops node:$src),
844 (bitconvert (v2i64 (X86vzload node:$src)))>;
847 def fp32imm0 : PatLeaf<(f32 fpimm), [{
848 return N->isExactlyValue(+0.0);
851 def fp64imm0 : PatLeaf<(f64 fpimm), [{
852 return N->isExactlyValue(+0.0);
855 def I8Imm : SDNodeXForm<imm, [{
856 // Transformation function: get the low 8 bits.
857 return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
860 def FROUND_NO_EXC : PatLeaf<(i32 8)>;
861 def FROUND_CURRENT : PatLeaf<(i32 4)>;
863 // BYTE_imm - Transform bit immediates into byte immediates.
864 def BYTE_imm : SDNodeXForm<imm, [{
865 // Transformation function: imm >> 3
866 return getI32Imm(N->getZExtValue() >> 3, SDLoc(N));
869 // EXTRACT_get_vextract128_imm xform function: convert extract_subvector index
870 // to VEXTRACTF128/VEXTRACTI128 imm.
871 def EXTRACT_get_vextract128_imm : SDNodeXForm<extract_subvector, [{
872 return getExtractVEXTRACTImmediate(N, 128, SDLoc(N));
875 // INSERT_get_vinsert128_imm xform function: convert insert_subvector index to
876 // VINSERTF128/VINSERTI128 imm.
877 def INSERT_get_vinsert128_imm : SDNodeXForm<insert_subvector, [{
878 return getInsertVINSERTImmediate(N, 128, SDLoc(N));
881 // EXTRACT_get_vextract256_imm xform function: convert extract_subvector index
882 // to VEXTRACTF64x4 imm.
883 def EXTRACT_get_vextract256_imm : SDNodeXForm<extract_subvector, [{
884 return getExtractVEXTRACTImmediate(N, 256, SDLoc(N));
887 // INSERT_get_vinsert256_imm xform function: convert insert_subvector index to
889 def INSERT_get_vinsert256_imm : SDNodeXForm<insert_subvector, [{
890 return getInsertVINSERTImmediate(N, 256, SDLoc(N));
893 def vextract128_extract : PatFrag<(ops node:$bigvec, node:$index),
894 (extract_subvector node:$bigvec,
896 EXTRACT_get_vextract128_imm>;
898 def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
900 (insert_subvector node:$bigvec, node:$smallvec,
902 INSERT_get_vinsert128_imm>;
904 def vextract256_extract : PatFrag<(ops node:$bigvec, node:$index),
905 (extract_subvector node:$bigvec,
907 EXTRACT_get_vextract256_imm>;
909 def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
911 (insert_subvector node:$bigvec, node:$smallvec,
913 INSERT_get_vinsert256_imm>;
915 def X86mload : PatFrag<(ops node:$src1, node:$src2, node:$src3),
916 (masked_load node:$src1, node:$src2, node:$src3), [{
917 return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
918 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
921 def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
922 (X86mload node:$src1, node:$src2, node:$src3), [{
923 return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16;
926 def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
927 (X86mload node:$src1, node:$src2, node:$src3), [{
928 return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32;
931 def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
932 (X86mload node:$src1, node:$src2, node:$src3), [{
933 return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64;
936 def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
937 (masked_load node:$src1, node:$src2, node:$src3), [{
938 return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
939 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
942 def X86mExpandingLoad : PatFrag<(ops node:$src1, node:$src2, node:$src3),
943 (masked_load node:$src1, node:$src2, node:$src3), [{
944 return cast<MaskedLoadSDNode>(N)->isExpandingLoad();
947 // Masked store fragments.
948 // X86mstore can't be implemented in core DAG files because some targets
949 // do not support vector types (llvm-tblgen will fail).
950 def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
951 (masked_store node:$src1, node:$src2, node:$src3), [{
952 return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
953 (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
956 def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
957 (X86mstore node:$src1, node:$src2, node:$src3), [{
958 return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16;
961 def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
962 (X86mstore node:$src1, node:$src2, node:$src3), [{
963 return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32;
966 def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
967 (X86mstore node:$src1, node:$src2, node:$src3), [{
968 return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64;
971 def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
972 (masked_store node:$src1, node:$src2, node:$src3), [{
973 return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
974 (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
977 def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
978 (masked_store node:$src1, node:$src2, node:$src3), [{
979 return cast<MaskedStoreSDNode>(N)->isCompressingStore();
982 // masked truncstore fragments
983 // X86mtruncstore can't be implemented in core DAG files because some targets
984 // doesn't support vector type ( llvm-tblgen will fail)
985 def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
986 (masked_store node:$src1, node:$src2, node:$src3), [{
987 return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
989 def masked_truncstorevi8 :
990 PatFrag<(ops node:$src1, node:$src2, node:$src3),
991 (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
992 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
994 def masked_truncstorevi16 :
995 PatFrag<(ops node:$src1, node:$src2, node:$src3),
996 (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
997 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
999 def masked_truncstorevi32 :
1000 PatFrag<(ops node:$src1, node:$src2, node:$src3),
1001 (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
1002 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
1005 def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore,
1006 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
1008 def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore,
1009 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
1011 def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTMaskedStore,
1012 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
1014 def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTMaskedStore,
1015 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
1017 def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr),
1018 (X86TruncSStore node:$val, node:$ptr), [{
1019 return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
1022 def truncstore_us_vi8 : PatFrag<(ops node:$val, node:$ptr),
1023 (X86TruncUSStore node:$val, node:$ptr), [{
1024 return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
1027 def truncstore_s_vi16 : PatFrag<(ops node:$val, node:$ptr),
1028 (X86TruncSStore node:$val, node:$ptr), [{
1029 return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
1032 def truncstore_us_vi16 : PatFrag<(ops node:$val, node:$ptr),
1033 (X86TruncUSStore node:$val, node:$ptr), [{
1034 return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
1037 def truncstore_s_vi32 : PatFrag<(ops node:$val, node:$ptr),
1038 (X86TruncSStore node:$val, node:$ptr), [{
1039 return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
1042 def truncstore_us_vi32 : PatFrag<(ops node:$val, node:$ptr),
1043 (X86TruncUSStore node:$val, node:$ptr), [{
1044 return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
1047 def masked_truncstore_s_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
1048 (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
1049 return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
1052 def masked_truncstore_us_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
1053 (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
1054 return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
1057 def masked_truncstore_s_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
1058 (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
1059 return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
1062 def masked_truncstore_us_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
1063 (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
1064 return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
1067 def masked_truncstore_s_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
1068 (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
1069 return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
1072 def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
1073 (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
1074 return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;