1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
4 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
5 define i32 @test_kortestz(i16 %a0, i16 %a1) {
6 ; CHECK-LABEL: test_kortestz:
8 ; CHECK-NEXT: kmovw %esi, %k0
9 ; CHECK-NEXT: kmovw %edi, %k1
10 ; CHECK-NEXT: xorl %eax, %eax
11 ; CHECK-NEXT: kortestw %k0, %k1
12 ; CHECK-NEXT: sete %al
14 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
18 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
19 define i32 @test_kortestc(i16 %a0, i16 %a1) {
20 ; CHECK-LABEL: test_kortestc:
22 ; CHECK-NEXT: kmovw %esi, %k0
23 ; CHECK-NEXT: kmovw %edi, %k1
24 ; CHECK-NEXT: kortestw %k0, %k1
25 ; CHECK-NEXT: sbbl %eax, %eax
26 ; CHECK-NEXT: andl $1, %eax
28 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
32 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
33 define i16 @test_kand(i16 %a0, i16 %a1) {
34 ; CHECK-LABEL: test_kand:
36 ; CHECK-NEXT: movw $8, %ax
37 ; CHECK-NEXT: kmovw %eax, %k0
38 ; CHECK-NEXT: kmovw %edi, %k1
39 ; CHECK-NEXT: kandw %k0, %k1, %k0
40 ; CHECK-NEXT: kmovw %esi, %k1
41 ; CHECK-NEXT: kandw %k1, %k0, %k0
42 ; CHECK-NEXT: kmovw %k0, %eax
44 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
45 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
49 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
50 define i16 @test_knot(i16 %a0) {
51 ; CHECK-LABEL: test_knot:
53 ; CHECK-NEXT: kmovw %edi, %k0
54 ; CHECK-NEXT: knotw %k0, %k0
55 ; CHECK-NEXT: kmovw %k0, %eax
57 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
61 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
63 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
64 ; CHECK-LABEL: unpckbw_test:
66 ; CHECK-NEXT: kmovw %edi, %k0
67 ; CHECK-NEXT: kmovw %esi, %k1
68 ; CHECK-NEXT: kunpckbw %k1, %k0, %k0
69 ; CHECK-NEXT: kmovw %k0, %eax
71 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
75 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
76 ; CHECK-LABEL: test_rcp_ps_512:
78 ; CHECK-NEXT: vrcp14ps %zmm0, %zmm0
80 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
83 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
85 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
86 ; CHECK-LABEL: test_rcp_pd_512:
88 ; CHECK-NEXT: vrcp14pd %zmm0, %zmm0
90 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
93 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
95 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
97 define <8 x double> @test7(<8 x double> %a) {
100 ; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
102 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
106 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
108 define <16 x float> @test8(<16 x float> %a) {
109 ; CHECK-LABEL: test8:
111 ; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
113 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
117 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
118 ; CHECK-LABEL: test_rsqrt_ps_512:
120 ; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0
122 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
123 ret <16 x float> %res
125 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
127 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
128 ; CHECK-LABEL: test_sqrt_pd_512:
130 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
132 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
133 ret <8 x double> %res
135 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
137 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
138 ; CHECK-LABEL: test_sqrt_ps_512:
140 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
142 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
143 ret <16 x float> %res
145 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
146 ; CHECK-LABEL: test_sqrt_round_ps_512:
148 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0
150 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
151 ret <16 x float> %res
153 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
155 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
156 ; CHECK-LABEL: test_getexp_pd_512:
158 ; CHECK-NEXT: vgetexppd %zmm0, %zmm0
160 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
161 ret <8 x double> %res
163 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
164 ; CHECK-LABEL: test_getexp_round_pd_512:
166 ; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0
168 %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
169 ret <8 x double> %res
171 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
173 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
174 ; CHECK-LABEL: test_getexp_ps_512:
176 ; CHECK-NEXT: vgetexpps %zmm0, %zmm0
178 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
179 ret <16 x float> %res
182 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
183 ; CHECK-LABEL: test_getexp_round_ps_512:
185 ; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0
187 %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
188 ret <16 x float> %res
190 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
192 declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
194 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
195 ; CHECK-LABEL: test_sqrt_ss:
197 ; CHECK-NEXT: andl $1, %edi
198 ; CHECK-NEXT: kmovw %edi, %k1
199 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
200 ; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
201 ; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
202 ; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
203 ; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
204 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
205 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
206 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
208 %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
209 %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
210 %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
211 %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
213 %res.1 = fadd <4 x float> %res0, %res1
214 %res.2 = fadd <4 x float> %res2, %res3
215 %res = fadd <4 x float> %res.1, %res.2
219 declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
221 define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
222 ; CHECK-LABEL: test_sqrt_sd:
224 ; CHECK-NEXT: andl $1, %edi
225 ; CHECK-NEXT: kmovw %edi, %k1
226 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
227 ; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
228 ; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
229 ; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
230 ; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
231 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
232 ; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
233 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
235 %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
236 %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
237 %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
238 %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
240 %res.1 = fadd <2 x double> %res0, %res1
241 %res.2 = fadd <2 x double> %res2, %res3
242 %res = fadd <2 x double> %res.1, %res.2
243 ret <2 x double> %res
246 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
247 ; CHECK-LABEL: test_x86_sse2_cvtsd2si64:
249 ; CHECK-NEXT: vcvtsd2si %xmm0, %rax
251 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
254 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
256 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
257 ; CHECK-LABEL: test_x86_sse2_cvtsi642sd:
259 ; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
261 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
262 ret <2 x double> %res
264 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
266 define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
267 ; CHECK-LABEL: test_x86_avx512_cvttsd2si64:
269 ; CHECK-NEXT: vcvttsd2si %xmm0, %rcx
270 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax
271 ; CHECK-NEXT: addq %rcx, %rax
273 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
274 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
275 %res2 = add i64 %res0, %res1
278 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
280 define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
281 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi:
283 ; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx
284 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax
285 ; CHECK-NEXT: addl %ecx, %eax
287 %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
288 %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
289 %res2 = add i32 %res0, %res1
292 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
294 define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
295 ; CHECK-LABEL: test_x86_avx512_cvttsd2si:
297 ; CHECK-NEXT: vcvttsd2si %xmm0, %ecx
298 ; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax
299 ; CHECK-NEXT: addl %ecx, %eax
301 %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
302 %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
303 %res2 = add i32 %res0, %res1
306 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
310 define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
311 ; CHECK-LABEL: test_x86_avx512_cvttsd2usi64:
313 ; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx
314 ; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax
315 ; CHECK-NEXT: addq %rcx, %rax
317 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
318 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
319 %res2 = add i64 %res0, %res1
322 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
324 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
325 ; CHECK-LABEL: test_x86_sse_cvtss2si64:
327 ; CHECK-NEXT: vcvtss2si %xmm0, %rax
329 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
332 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
335 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
336 ; CHECK-LABEL: test_x86_sse_cvtsi642ss:
338 ; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
340 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
343 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
346 define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
347 ; CHECK-LABEL: test_x86_avx512_cvttss2si:
349 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx
350 ; CHECK-NEXT: vcvttss2si %xmm0, %eax
351 ; CHECK-NEXT: addl %ecx, %eax
353 %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
354 %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
355 %res2 = add i32 %res0, %res1
358 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
360 define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
361 ; CHECK-LABEL: test_x86_avx512_cvttss2si64:
363 ; CHECK-NEXT: vcvttss2si %xmm0, %rcx
364 ; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax
365 ; CHECK-NEXT: addq %rcx, %rax
367 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
368 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
369 %res2 = add i64 %res0, %res1
372 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
374 define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
375 ; CHECK-LABEL: test_x86_avx512_cvttss2usi:
377 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx
378 ; CHECK-NEXT: vcvttss2usi %xmm0, %eax
379 ; CHECK-NEXT: addl %ecx, %eax
381 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
382 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
383 %res2 = add i32 %res0, %res1
386 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
388 define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
389 ; CHECK-LABEL: test_x86_avx512_cvttss2usi64:
391 ; CHECK-NEXT: vcvttss2usi %xmm0, %rcx
392 ; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax
393 ; CHECK-NEXT: addq %rcx, %rax
395 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
396 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
397 %res2 = add i64 %res0, %res1
400 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
402 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
403 ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
405 ; CHECK-NEXT: vcvtsd2usi %xmm0, %rcx
406 ; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rax
407 ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rdx
408 ; CHECK-NEXT: addq %rcx, %rax
409 ; CHECK-NEXT: addq %rdx, %rax
412 %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4)
413 %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 3)
414 %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 1)
415 %res3 = add i64 %res, %res1
416 %res4 = add i64 %res3, %res2
419 declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone
421 define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) {
422 ; CHECK-LABEL: test_x86_avx512_cvtsd2si64:
424 ; CHECK-NEXT: vcvtsd2si %xmm0, %rcx
425 ; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rax
426 ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rdx
427 ; CHECK-NEXT: addq %rcx, %rax
428 ; CHECK-NEXT: addq %rdx, %rax
431 %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4)
432 %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 3)
433 %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 1)
434 %res3 = add i64 %res, %res1
435 %res4 = add i64 %res3, %res2
438 declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone
440 define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) {
441 ; CHECK-LABEL: test_x86_avx512_cvtss2usi64:
443 ; CHECK-NEXT: vcvtss2usi %xmm0, %rcx
444 ; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rax
445 ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rdx
446 ; CHECK-NEXT: addq %rcx, %rax
447 ; CHECK-NEXT: addq %rdx, %rax
450 %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4)
451 %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 3)
452 %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 1)
453 %res3 = add i64 %res, %res1
454 %res4 = add i64 %res3, %res2
457 declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone
459 define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) {
460 ; CHECK-LABEL: test_x86_avx512_cvtss2si64:
462 ; CHECK-NEXT: vcvtss2si %xmm0, %rcx
463 ; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rax
464 ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rdx
465 ; CHECK-NEXT: addq %rcx, %rax
466 ; CHECK-NEXT: addq %rdx, %rax
469 %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4)
470 %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 3)
471 %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 1)
472 %res3 = add i64 %res, %res1
473 %res4 = add i64 %res3, %res2
476 declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone
478 define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) {
479 ; CHECK-LABEL: test_x86_avx512_cvtsd2usi32:
481 ; CHECK-NEXT: vcvtsd2usi %xmm0, %ecx
482 ; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %eax
483 ; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %edx
484 ; CHECK-NEXT: addl %ecx, %eax
485 ; CHECK-NEXT: addl %edx, %eax
488 %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4)
489 %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 3)
490 %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 1)
491 %res3 = add i32 %res, %res1
492 %res4 = add i32 %res3, %res2
495 declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone
497 define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) {
498 ; CHECK-LABEL: test_x86_avx512_cvtsd2si32:
500 ; CHECK-NEXT: vcvtsd2si %xmm0, %ecx
501 ; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %eax
502 ; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %edx
503 ; CHECK-NEXT: addl %ecx, %eax
504 ; CHECK-NEXT: addl %edx, %eax
507 %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4)
508 %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 3)
509 %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 1)
510 %res3 = add i32 %res, %res1
511 %res4 = add i32 %res3, %res2
514 declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone
516 define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) {
517 ; CHECK-LABEL: test_x86_avx512_cvtss2usi32:
519 ; CHECK-NEXT: vcvtss2usi %xmm0, %ecx
520 ; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %eax
521 ; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %edx
522 ; CHECK-NEXT: addl %ecx, %eax
523 ; CHECK-NEXT: addl %edx, %eax
526 %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4)
527 %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 3)
528 %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 1)
529 %res3 = add i32 %res, %res1
530 %res4 = add i32 %res3, %res2
533 declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone
535 define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) {
536 ; CHECK-LABEL: test_x86_avx512_cvtss2si32:
538 ; CHECK-NEXT: vcvtss2si %xmm0, %ecx
539 ; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %eax
540 ; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %edx
541 ; CHECK-NEXT: addl %ecx, %eax
542 ; CHECK-NEXT: addl %edx, %eax
545 %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4)
546 %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 3)
547 %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 1)
548 %res3 = add i32 %res, %res1
549 %res4 = add i32 %res3, %res2
552 declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone
554 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
555 ; CHECK-LABEL: test_x86_vcvtph2ps_512:
557 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
559 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
560 ret <16 x float> %res
563 define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
564 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
566 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0
568 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
569 ret <16 x float> %res
572 define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
573 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk:
575 ; CHECK-NEXT: kmovw %edi, %k1
576 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1}
577 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
579 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
580 ret <16 x float> %res
583 define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
584 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
586 ; CHECK-NEXT: kmovw %edi, %k1
587 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
589 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
590 ret <16 x float> %res
593 define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
594 ; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz:
596 ; CHECK-NEXT: kmovw %edi, %k1
597 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
599 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
600 ret <16 x float> %res
603 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
605 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, <16 x i16> * %dst) {
606 ; CHECK-LABEL: test_x86_vcvtps2ph_256:
608 ; CHECK-NEXT: kmovw %edi, %k1
609 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm1 {%k1}
610 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm2 {%k1} {z}
611 ; CHECK-NEXT: vcvtps2ph $2, %zmm0, (%rsi)
612 ; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm0
614 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
615 %res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 %mask)
616 %res3 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> %src, i16 %mask)
617 store <16 x i16> %res1, <16 x i16> * %dst
618 %res = add <16 x i16> %res2, %res3
622 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
624 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
625 ; CHECK-LABEL: test_x86_vbroadcast_ss_512:
627 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0
629 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
630 ret <16 x float> %res
632 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
634 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
635 ; CHECK-LABEL: test_x86_vbroadcast_sd_512:
637 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
639 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
640 ret <8 x double> %res
642 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
644 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
645 ; CHECK-LABEL: test_conflict_d:
647 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0
649 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
653 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
655 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
656 ; CHECK-LABEL: test_conflict_q:
658 ; CHECK-NEXT: vpconflictq %zmm0, %zmm0
660 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
664 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
666 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
667 ; CHECK-LABEL: test_maskz_conflict_d:
669 ; CHECK-NEXT: kmovw %edi, %k1
670 ; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
672 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
676 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
677 ; CHECK-LABEL: test_mask_conflict_q:
679 ; CHECK-NEXT: kmovw %edi, %k1
680 ; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
681 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
683 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
687 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
688 ; CHECK-LABEL: test_lzcnt_d:
690 ; CHECK-NEXT: vplzcntd %zmm0, %zmm0
692 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
696 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
698 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
699 ; CHECK-LABEL: test_lzcnt_q:
701 ; CHECK-NEXT: vplzcntq %zmm0, %zmm0
703 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
707 declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
710 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
711 ; CHECK-LABEL: test_mask_lzcnt_d:
713 ; CHECK-NEXT: kmovw %edi, %k1
714 ; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
715 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
717 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
721 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
722 ; CHECK-LABEL: test_mask_lzcnt_q:
724 ; CHECK-NEXT: kmovw %edi, %k1
725 ; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
726 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
728 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
732 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
733 ; CHECK-LABEL: test_cmpps:
735 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
736 ; CHECK-NEXT: kmovw %k0, %eax
738 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
741 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
743 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
744 ; CHECK-LABEL: test_cmppd:
746 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0
747 ; CHECK-NEXT: kmovw %k0, %eax
749 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
752 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
755 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
756 ; CHECK-LABEL: test_vmaxpd:
758 ; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
760 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
761 <8 x double>zeroinitializer, i8 -1, i32 4)
762 ret <8 x double> %res
764 declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
765 <8 x double>, i8, i32)
767 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
768 ; CHECK-LABEL: test_vminpd:
770 ; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0
772 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
773 <8 x double>zeroinitializer, i8 -1, i32 4)
774 ret <8 x double> %res
776 declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
777 <8 x double>, i8, i32)
779 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
781 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
782 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512:
784 ; CHECK-NEXT: kmovw %edi, %k1
785 ; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1}
786 ; CHECK-NEXT: vpabsd %zmm0, %zmm0
787 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
789 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
790 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
791 %res2 = add <16 x i32> %res, %res1
795 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
797 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
798 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512:
800 ; CHECK-NEXT: kmovw %edi, %k1
801 ; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1}
802 ; CHECK-NEXT: vpabsq %zmm0, %zmm0
803 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
805 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
806 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
807 %res2 = add <8 x i64> %res, %res1
811 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) {
812 ; CHECK-LABEL: test_vptestmq:
814 ; CHECK-NEXT: kmovw %edi, %k1
815 ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1}
816 ; CHECK-NEXT: kmovw %k0, %ecx
817 ; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
818 ; CHECK-NEXT: kmovw %k0, %eax
819 ; CHECK-NEXT: addb %cl, %al
821 %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
822 %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m)
823 %res2 = add i8 %res1, %res
826 declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
828 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) {
829 ; CHECK-LABEL: test_vptestmd:
831 ; CHECK-NEXT: kmovw %edi, %k1
832 ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1}
833 ; CHECK-NEXT: kmovw %k0, %ecx
834 ; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
835 ; CHECK-NEXT: kmovw %k0, %eax
836 ; CHECK-NEXT: addl %ecx, %eax
837 ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
839 %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
840 %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m)
841 %res2 = add i16 %res1, %res
844 declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
846 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
847 ; CHECK-LABEL: test_mask_store_ss:
849 ; CHECK-NEXT: kmovw %esi, %k1
850 ; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1}
852 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
856 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
858 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
859 ; CHECK-LABEL: test_cmp_d_512:
861 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k3
862 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k4
863 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k5
864 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k6
865 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k7
866 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k2
867 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k1
868 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0
869 ; CHECK-NEXT: kmovw %k4, %eax
870 ; CHECK-NEXT: kmovw %k3, %ecx
871 ; CHECK-NEXT: vmovd %ecx, %xmm0
872 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
873 ; CHECK-NEXT: kmovw %k5, %eax
874 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
875 ; CHECK-NEXT: kmovw %k6, %eax
876 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
877 ; CHECK-NEXT: kmovw %k7, %eax
878 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
879 ; CHECK-NEXT: kmovw %k2, %eax
880 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
881 ; CHECK-NEXT: kmovw %k1, %eax
882 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
883 ; CHECK-NEXT: kmovw %k0, %eax
884 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
886 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
887 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
888 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
889 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
890 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
891 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
892 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
893 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
894 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
895 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
896 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
897 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
898 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
899 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
900 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
901 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
905 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
906 ; CHECK-LABEL: test_mask_cmp_d_512:
908 ; CHECK-NEXT: kmovw %edi, %k3
909 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k4 {%k3}
910 ; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k5 {%k3}
911 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k6 {%k3}
912 ; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k7 {%k3}
913 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k3}
914 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k2 {%k3}
915 ; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k1 {%k3}
916 ; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k3 {%k3}
917 ; CHECK-NEXT: kmovw %k5, %eax
918 ; CHECK-NEXT: kmovw %k4, %ecx
919 ; CHECK-NEXT: vmovd %ecx, %xmm0
920 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
921 ; CHECK-NEXT: kmovw %k6, %eax
922 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
923 ; CHECK-NEXT: kmovw %k7, %eax
924 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
925 ; CHECK-NEXT: kmovw %k0, %eax
926 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
927 ; CHECK-NEXT: kmovw %k2, %eax
928 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
929 ; CHECK-NEXT: kmovw %k1, %eax
930 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
931 ; CHECK-NEXT: kmovw %k3, %eax
932 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
934 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
935 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
936 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
937 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
938 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
939 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
940 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
941 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
942 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
943 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
944 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
945 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
946 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
947 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
948 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
949 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
953 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
955 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
956 ; CHECK-LABEL: test_ucmp_d_512:
958 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k3
959 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k4
960 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k5
961 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k6
962 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k7
963 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k2
964 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k1
965 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0
966 ; CHECK-NEXT: kmovw %k4, %eax
967 ; CHECK-NEXT: kmovw %k3, %ecx
968 ; CHECK-NEXT: vmovd %ecx, %xmm0
969 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
970 ; CHECK-NEXT: kmovw %k5, %eax
971 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
972 ; CHECK-NEXT: kmovw %k6, %eax
973 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
974 ; CHECK-NEXT: kmovw %k7, %eax
975 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
976 ; CHECK-NEXT: kmovw %k2, %eax
977 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
978 ; CHECK-NEXT: kmovw %k1, %eax
979 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
980 ; CHECK-NEXT: kmovw %k0, %eax
981 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
983 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
984 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
985 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
986 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
987 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
988 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
989 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
990 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
991 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
992 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
993 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
994 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
995 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
996 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
997 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
998 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1002 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1003 ; CHECK-LABEL: test_mask_ucmp_d_512:
1005 ; CHECK-NEXT: kmovw %edi, %k3
1006 ; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k4 {%k3}
1007 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k5 {%k3}
1008 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k6 {%k3}
1009 ; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k7 {%k3}
1010 ; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 {%k3}
1011 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k2 {%k3}
1012 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k3}
1013 ; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k3 {%k3}
1014 ; CHECK-NEXT: kmovw %k5, %eax
1015 ; CHECK-NEXT: kmovw %k4, %ecx
1016 ; CHECK-NEXT: vmovd %ecx, %xmm0
1017 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
1018 ; CHECK-NEXT: kmovw %k6, %eax
1019 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
1020 ; CHECK-NEXT: kmovw %k7, %eax
1021 ; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
1022 ; CHECK-NEXT: kmovw %k0, %eax
1023 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
1024 ; CHECK-NEXT: kmovw %k2, %eax
1025 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
1026 ; CHECK-NEXT: kmovw %k1, %eax
1027 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
1028 ; CHECK-NEXT: kmovw %k3, %eax
1029 ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
1031 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
1032 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
1033 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
1034 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
1035 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
1036 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
1037 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
1038 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
1039 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
1040 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
1041 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
1042 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
1043 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
1044 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
1045 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
1046 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
1050 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
1052 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
1053 ; CHECK-LABEL: test_cmp_q_512:
1055 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k3
1056 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k4
1057 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k5
1058 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k6
1059 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k7
1060 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k2
1061 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k1
1062 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0
1063 ; CHECK-NEXT: kmovw %k4, %eax
1064 ; CHECK-NEXT: kmovw %k3, %ecx
1065 ; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
1066 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1067 ; CHECK-NEXT: kmovw %k5, %eax
1068 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1069 ; CHECK-NEXT: kmovw %k6, %eax
1070 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1071 ; CHECK-NEXT: kmovw %k7, %eax
1072 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1073 ; CHECK-NEXT: kmovw %k2, %eax
1074 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1075 ; CHECK-NEXT: kmovw %k1, %eax
1076 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1077 ; CHECK-NEXT: kmovw %k0, %eax
1078 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1080 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
1081 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1082 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
1083 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1084 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
1085 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1086 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
1087 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1088 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
1089 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1090 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1091 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1092 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1093 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1094 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1095 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1099 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1100 ; CHECK-LABEL: test_mask_cmp_q_512:
1102 ; CHECK-NEXT: kmovw %edi, %k3
1103 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k4 {%k3}
1104 ; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k5 {%k3}
1105 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k6 {%k3}
1106 ; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k7 {%k3}
1107 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k3}
1108 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k2 {%k3}
1109 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k1 {%k3}
1110 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k3 {%k3}
1111 ; CHECK-NEXT: kmovw %k5, %eax
1112 ; CHECK-NEXT: kmovw %k4, %ecx
1113 ; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
1114 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1115 ; CHECK-NEXT: kmovw %k6, %eax
1116 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1117 ; CHECK-NEXT: kmovw %k7, %eax
1118 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1119 ; CHECK-NEXT: kmovw %k0, %eax
1120 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1121 ; CHECK-NEXT: kmovw %k2, %eax
1122 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1123 ; CHECK-NEXT: kmovw %k1, %eax
1124 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1125 ; CHECK-NEXT: kmovw %k3, %eax
1126 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1128 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1129 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1130 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1131 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1132 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1133 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1134 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1135 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1136 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1137 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1138 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1139 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1140 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1141 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1142 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1143 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1147 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1149 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
1150 ; CHECK-LABEL: test_ucmp_q_512:
1152 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k3
1153 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k4
1154 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k5
1155 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k6
1156 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k7
1157 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k2
1158 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
1159 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0
1160 ; CHECK-NEXT: kmovw %k4, %eax
1161 ; CHECK-NEXT: kmovw %k3, %ecx
1162 ; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
1163 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1164 ; CHECK-NEXT: kmovw %k5, %eax
1165 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1166 ; CHECK-NEXT: kmovw %k6, %eax
1167 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1168 ; CHECK-NEXT: kmovw %k7, %eax
1169 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1170 ; CHECK-NEXT: kmovw %k2, %eax
1171 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1172 ; CHECK-NEXT: kmovw %k1, %eax
1173 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1174 ; CHECK-NEXT: kmovw %k0, %eax
1175 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1177 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
1178 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1179 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
1180 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1181 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
1182 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1183 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
1184 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1185 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
1186 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1187 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
1188 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1189 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
1190 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1191 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
1192 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1196 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1197 ; CHECK-LABEL: test_mask_ucmp_q_512:
1199 ; CHECK-NEXT: kmovw %edi, %k3
1200 ; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k4 {%k3}
1201 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k5 {%k3}
1202 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k6 {%k3}
1203 ; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k7 {%k3}
1204 ; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 {%k3}
1205 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k2 {%k3}
1206 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k3}
1207 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k3 {%k3}
1208 ; CHECK-NEXT: kmovw %k5, %eax
1209 ; CHECK-NEXT: kmovw %k4, %ecx
1210 ; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
1211 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1212 ; CHECK-NEXT: kmovw %k6, %eax
1213 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1214 ; CHECK-NEXT: kmovw %k7, %eax
1215 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1216 ; CHECK-NEXT: kmovw %k0, %eax
1217 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1218 ; CHECK-NEXT: kmovw %k2, %eax
1219 ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1220 ; CHECK-NEXT: kmovw %k1, %eax
1221 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1222 ; CHECK-NEXT: kmovw %k3, %eax
1223 ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1225 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
1226 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
1227 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
1228 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
1229 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
1230 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
1231 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
1232 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
1233 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
1234 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
1235 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
1236 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
1237 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
1238 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
1239 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
1240 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
1244 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
1246 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1247 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1248 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1250 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1251 ; CHECK-LABEL: test_vsubps_rn:
1253 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
1255 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1256 <16 x float> zeroinitializer, i16 -1, i32 0)
1257 ret <16 x float> %res
1260 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1261 ; CHECK-LABEL: test_vsubps_rd:
1263 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
1265 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1266 <16 x float> zeroinitializer, i16 -1, i32 1)
1267 ret <16 x float> %res
1270 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1271 ; CHECK-LABEL: test_vsubps_ru:
1273 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
1275 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1276 <16 x float> zeroinitializer, i16 -1, i32 2)
1277 ret <16 x float> %res
1280 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1281 ; CHECK-LABEL: test_vsubps_rz:
1283 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
1285 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1286 <16 x float> zeroinitializer, i16 -1, i32 3)
1287 ret <16 x float> %res
1290 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1291 ; CHECK-LABEL: test_vmulps_rn:
1293 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
1295 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1296 <16 x float> zeroinitializer, i16 -1, i32 0)
1297 ret <16 x float> %res
1300 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1301 ; CHECK-LABEL: test_vmulps_rd:
1303 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
1305 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1306 <16 x float> zeroinitializer, i16 -1, i32 1)
1307 ret <16 x float> %res
1310 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1311 ; CHECK-LABEL: test_vmulps_ru:
1313 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
1315 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1316 <16 x float> zeroinitializer, i16 -1, i32 2)
1317 ret <16 x float> %res
1320 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1321 ; CHECK-LABEL: test_vmulps_rz:
1323 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
1325 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1326 <16 x float> zeroinitializer, i16 -1, i32 3)
1327 ret <16 x float> %res
1331 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1332 ; CHECK-LABEL: test_vmulps_mask_rn:
1334 ; CHECK-NEXT: kmovw %edi, %k1
1335 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1337 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1338 <16 x float> zeroinitializer, i16 %mask, i32 0)
1339 ret <16 x float> %res
1342 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1343 ; CHECK-LABEL: test_vmulps_mask_rd:
1345 ; CHECK-NEXT: kmovw %edi, %k1
1346 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1348 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1349 <16 x float> zeroinitializer, i16 %mask, i32 1)
1350 ret <16 x float> %res
1353 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1354 ; CHECK-LABEL: test_vmulps_mask_ru:
1356 ; CHECK-NEXT: kmovw %edi, %k1
1357 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1359 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1360 <16 x float> zeroinitializer, i16 %mask, i32 2)
1361 ret <16 x float> %res
1364 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1365 ; CHECK-LABEL: test_vmulps_mask_rz:
1367 ; CHECK-NEXT: kmovw %edi, %k1
1368 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1370 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1371 <16 x float> zeroinitializer, i16 %mask, i32 3)
1372 ret <16 x float> %res
1375 ;; With Passthru value
1376 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1377 ; CHECK-LABEL: test_vmulps_mask_passthru_rn:
1379 ; CHECK-NEXT: kmovw %edi, %k1
1380 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1381 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1383 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1384 <16 x float> %passthru, i16 %mask, i32 0)
1385 ret <16 x float> %res
1388 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1389 ; CHECK-LABEL: test_vmulps_mask_passthru_rd:
1391 ; CHECK-NEXT: kmovw %edi, %k1
1392 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1393 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1395 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1396 <16 x float> %passthru, i16 %mask, i32 1)
1397 ret <16 x float> %res
1400 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1401 ; CHECK-LABEL: test_vmulps_mask_passthru_ru:
1403 ; CHECK-NEXT: kmovw %edi, %k1
1404 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1405 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1407 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1408 <16 x float> %passthru, i16 %mask, i32 2)
1409 ret <16 x float> %res
1412 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1413 ; CHECK-LABEL: test_vmulps_mask_passthru_rz:
1415 ; CHECK-NEXT: kmovw %edi, %k1
1416 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1417 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1419 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1420 <16 x float> %passthru, i16 %mask, i32 3)
1421 ret <16 x float> %res
1425 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1426 ; CHECK-LABEL: test_vmulpd_mask_rn:
1428 ; CHECK-NEXT: kmovw %edi, %k1
1429 ; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1431 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1432 <8 x double> zeroinitializer, i8 %mask, i32 0)
1433 ret <8 x double> %res
1436 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1437 ; CHECK-LABEL: test_vmulpd_mask_rd:
1439 ; CHECK-NEXT: kmovw %edi, %k1
1440 ; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1442 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1443 <8 x double> zeroinitializer, i8 %mask, i32 1)
1444 ret <8 x double> %res
1447 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1448 ; CHECK-LABEL: test_vmulpd_mask_ru:
1450 ; CHECK-NEXT: kmovw %edi, %k1
1451 ; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1453 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1454 <8 x double> zeroinitializer, i8 %mask, i32 2)
1455 ret <8 x double> %res
1458 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1459 ; CHECK-LABEL: test_vmulpd_mask_rz:
1461 ; CHECK-NEXT: kmovw %edi, %k1
1462 ; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1464 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1465 <8 x double> zeroinitializer, i8 %mask, i32 3)
1466 ret <8 x double> %res
1469 define <8 x i64> @test_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
1470 ; CHECK-LABEL: test_mul_epi32_rr:
1472 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0
1474 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
1478 define <8 x i64> @test_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
1479 ; CHECK-LABEL: test_mul_epi32_rrk:
1481 ; CHECK-NEXT: kmovw %edi, %k1
1482 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1}
1483 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1485 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
1486 %mask.cast = bitcast i8 %mask to <8 x i1>
1487 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
1491 define <8 x i64> @test_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
1492 ; CHECK-LABEL: test_mul_epi32_rrkz:
1494 ; CHECK-NEXT: kmovw %edi, %k1
1495 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z}
1497 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
1498 %mask.cast = bitcast i8 %mask to <8 x i1>
1499 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
1503 define <8 x i64> @test_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1504 ; CHECK-LABEL: test_mul_epi32_rm:
1506 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0
1508 %b = load <16 x i32>, <16 x i32>* %ptr_b
1509 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
1513 define <8 x i64> @test_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1514 ; CHECK-LABEL: test_mul_epi32_rmk:
1516 ; CHECK-NEXT: kmovw %esi, %k1
1517 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1}
1518 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1520 %b = load <16 x i32>, <16 x i32>* %ptr_b
1521 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
1522 %mask.cast = bitcast i8 %mask to <8 x i1>
1523 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
1527 define <8 x i64> @test_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
1528 ; CHECK-LABEL: test_mul_epi32_rmkz:
1530 ; CHECK-NEXT: kmovw %esi, %k1
1531 ; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z}
1533 %b = load <16 x i32>, <16 x i32>* %ptr_b
1534 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
1535 %mask.cast = bitcast i8 %mask to <8 x i1>
1536 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
1540 define <8 x i64> @test_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
1541 ; CHECK-LABEL: test_mul_epi32_rmb:
1543 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0
1545 %q = load i64, i64* %ptr_b
1546 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1547 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1548 %b = bitcast <8 x i64> %b64 to <16 x i32>
1549 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
1553 define <8 x i64> @test_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1554 ; CHECK-LABEL: test_mul_epi32_rmbk:
1556 ; CHECK-NEXT: kmovw %esi, %k1
1557 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
1558 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1560 %q = load i64, i64* %ptr_b
1561 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1562 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1563 %b = bitcast <8 x i64> %b64 to <16 x i32>
1564 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
1565 %mask.cast = bitcast i8 %mask to <8 x i1>
1566 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
1570 define <8 x i64> @test_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
1571 ; CHECK-LABEL: test_mul_epi32_rmbkz:
1573 ; CHECK-NEXT: kmovw %esi, %k1
1574 ; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
1576 %q = load i64, i64* %ptr_b
1577 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1578 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1579 %b = bitcast <8 x i64> %b64 to <16 x i32>
1580 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
1581 %mask.cast = bitcast i8 %mask to <8 x i1>
1582 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
1586 declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>)
1588 define <8 x i64> @test_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
1589 ; CHECK-LABEL: test_mul_epu32_rr:
1591 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
1593 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
1597 define <8 x i64> @test_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
1598 ; CHECK-LABEL: test_mul_epu32_rrk:
1600 ; CHECK-NEXT: kmovw %edi, %k1
1601 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1}
1602 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1604 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
1605 %mask.cast = bitcast i8 %mask to <8 x i1>
1606 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
1610 define <8 x i64> @test_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
1611 ; CHECK-LABEL: test_mul_epu32_rrkz:
1613 ; CHECK-NEXT: kmovw %edi, %k1
1614 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z}
1616 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
1617 %mask.cast = bitcast i8 %mask to <8 x i1>
1618 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
1622 define <8 x i64> @test_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
1623 ; CHECK-LABEL: test_mul_epu32_rm:
1625 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0
1627 %b = load <16 x i32>, <16 x i32>* %ptr_b
1628 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
1632 define <8 x i64> @test_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1633 ; CHECK-LABEL: test_mul_epu32_rmk:
1635 ; CHECK-NEXT: kmovw %esi, %k1
1636 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1}
1637 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1639 %b = load <16 x i32>, <16 x i32>* %ptr_b
1640 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
1641 %mask.cast = bitcast i8 %mask to <8 x i1>
1642 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
1646 define <8 x i64> @test_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
1647 ; CHECK-LABEL: test_mul_epu32_rmkz:
1649 ; CHECK-NEXT: kmovw %esi, %k1
1650 ; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z}
1652 %b = load <16 x i32>, <16 x i32>* %ptr_b
1653 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
1654 %mask.cast = bitcast i8 %mask to <8 x i1>
1655 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
1659 define <8 x i64> @test_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
1660 ; CHECK-LABEL: test_mul_epu32_rmb:
1662 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0
1664 %q = load i64, i64* %ptr_b
1665 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1666 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1667 %b = bitcast <8 x i64> %b64 to <16 x i32>
1668 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
1672 define <8 x i64> @test_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
1673 ; CHECK-LABEL: test_mul_epu32_rmbk:
1675 ; CHECK-NEXT: kmovw %esi, %k1
1676 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
1677 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1679 %q = load i64, i64* %ptr_b
1680 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1681 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1682 %b = bitcast <8 x i64> %b64 to <16 x i32>
1683 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
1684 %mask.cast = bitcast i8 %mask to <8 x i1>
1685 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
1689 define <8 x i64> @test_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
1690 ; CHECK-LABEL: test_mul_epu32_rmbkz:
1692 ; CHECK-NEXT: kmovw %esi, %k1
1693 ; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
1695 %q = load i64, i64* %ptr_b
1696 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
1697 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
1698 %b = bitcast <8 x i64> %b64 to <16 x i32>
1699 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
1700 %mask.cast = bitcast i8 %mask to <8 x i1>
1701 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
1705 declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>)
1707 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1708 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
1710 ; CHECK-NEXT: kmovw %edi, %k1
1711 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1713 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
1714 ret <16 x float> %res
1716 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1717 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
1719 ; CHECK-NEXT: kmovw %edi, %k1
1720 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1722 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
1723 ret <16 x float> %res
1725 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1726 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
1728 ; CHECK-NEXT: kmovw %edi, %k1
1729 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1731 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
1732 ret <16 x float> %res
1735 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1736 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
1738 ; CHECK-NEXT: kmovw %edi, %k1
1739 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1741 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
1742 ret <16 x float> %res
1746 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1747 ; CHECK-LABEL: test_mm512_maskz_add_round_ps_current:
1749 ; CHECK-NEXT: kmovw %edi, %k1
1750 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
1752 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
1753 ret <16 x float> %res
1756 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1757 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae:
1759 ; CHECK-NEXT: kmovw %edi, %k1
1760 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1761 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1763 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
1764 ret <16 x float> %res
1766 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1767 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae:
1769 ; CHECK-NEXT: kmovw %edi, %k1
1770 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1771 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1773 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
1774 ret <16 x float> %res
1776 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1777 ; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae:
1779 ; CHECK-NEXT: kmovw %edi, %k1
1780 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1781 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1783 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
1784 ret <16 x float> %res
1787 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1788 ; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae:
1790 ; CHECK-NEXT: kmovw %edi, %k1
1791 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1792 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1794 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
1795 ret <16 x float> %res
1799 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1800 ; CHECK-LABEL: test_mm512_mask_add_round_ps_current:
1802 ; CHECK-NEXT: kmovw %edi, %k1
1803 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1}
1804 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1806 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
1807 ret <16 x float> %res
1811 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1812 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
1814 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
1816 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
1817 ret <16 x float> %res
1819 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1820 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
1822 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
1824 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
1825 ret <16 x float> %res
1827 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1828 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
1830 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
1832 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
1833 ret <16 x float> %res
1836 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1837 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
1839 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
1841 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
1842 ret <16 x float> %res
1845 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1846 ; CHECK-LABEL: test_mm512_add_round_ps_current:
1848 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
1850 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
1851 ret <16 x float> %res
1853 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1855 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1856 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
1858 ; CHECK-NEXT: kmovw %edi, %k1
1859 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1860 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1862 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
1863 ret <16 x float> %res
1865 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1866 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
1868 ; CHECK-NEXT: kmovw %edi, %k1
1869 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1870 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1872 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
1873 ret <16 x float> %res
1875 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1876 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
1878 ; CHECK-NEXT: kmovw %edi, %k1
1879 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1880 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1882 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
1883 ret <16 x float> %res
1886 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1887 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
1889 ; CHECK-NEXT: kmovw %edi, %k1
1890 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
1891 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1893 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
1894 ret <16 x float> %res
1898 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
1899 ; CHECK-LABEL: test_mm512_mask_sub_round_ps_current:
1901 ; CHECK-NEXT: kmovw %edi, %k1
1902 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1}
1903 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1905 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
1906 ret <16 x float> %res
1909 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1910 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
1912 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
1914 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
1915 ret <16 x float> %res
1917 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1918 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
1920 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
1922 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
1923 ret <16 x float> %res
1925 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1926 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
1928 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
1930 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
1931 ret <16 x float> %res
1934 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1935 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
1937 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
1939 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
1940 ret <16 x float> %res
1943 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1944 ; CHECK-LABEL: test_mm512_sub_round_ps_current:
1946 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0
1948 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
1949 ret <16 x float> %res
1952 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1953 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
1955 ; CHECK-NEXT: kmovw %edi, %k1
1956 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1958 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
1959 ret <16 x float> %res
1961 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1962 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
1964 ; CHECK-NEXT: kmovw %edi, %k1
1965 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1967 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
1968 ret <16 x float> %res
1970 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1971 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
1973 ; CHECK-NEXT: kmovw %edi, %k1
1974 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1976 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
1977 ret <16 x float> %res
1980 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1981 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
1983 ; CHECK-NEXT: kmovw %edi, %k1
1984 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
1986 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
1987 ret <16 x float> %res
1991 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1992 ; CHECK-LABEL: test_mm512_maskz_div_round_ps_current:
1994 ; CHECK-NEXT: kmovw %edi, %k1
1995 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
1997 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
1998 ret <16 x float> %res
2001 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2002 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae:
2004 ; CHECK-NEXT: kmovw %edi, %k1
2005 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2006 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2008 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
2009 ret <16 x float> %res
2011 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2012 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae:
2014 ; CHECK-NEXT: kmovw %edi, %k1
2015 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2016 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2018 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
2019 ret <16 x float> %res
2021 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2022 ; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae:
2024 ; CHECK-NEXT: kmovw %edi, %k1
2025 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2026 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2028 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
2029 ret <16 x float> %res
2032 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2033 ; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae:
2035 ; CHECK-NEXT: kmovw %edi, %k1
2036 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2037 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2039 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
2040 ret <16 x float> %res
2044 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2045 ; CHECK-LABEL: test_mm512_mask_div_round_ps_current:
2047 ; CHECK-NEXT: kmovw %edi, %k1
2048 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1}
2049 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2051 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2052 ret <16 x float> %res
2056 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2057 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
2059 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
2061 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
2062 ret <16 x float> %res
2064 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2065 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
2067 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
2069 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
2070 ret <16 x float> %res
2072 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2073 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
2075 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
2077 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
2078 ret <16 x float> %res
2081 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2082 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
2084 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
2086 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
2087 ret <16 x float> %res
2090 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2091 ; CHECK-LABEL: test_mm512_div_round_ps_current:
2093 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0
2095 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2096 ret <16 x float> %res
2098 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2100 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2101 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae:
2103 ; CHECK-NEXT: kmovw %edi, %k1
2104 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2106 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2107 ret <16 x float> %res
2110 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2111 ; CHECK-LABEL: test_mm512_maskz_min_round_ps_current:
2113 ; CHECK-NEXT: kmovw %edi, %k1
2114 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
2116 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2117 ret <16 x float> %res
2120 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2121 ; CHECK-LABEL: test_mm512_mask_min_round_ps_sae:
2123 ; CHECK-NEXT: kmovw %edi, %k1
2124 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2125 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2127 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2128 ret <16 x float> %res
2131 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2132 ; CHECK-LABEL: test_mm512_mask_min_round_ps_current:
2134 ; CHECK-NEXT: kmovw %edi, %k1
2135 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1}
2136 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2138 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2139 ret <16 x float> %res
2142 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2143 ; CHECK-LABEL: test_mm512_min_round_ps_sae:
2145 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0
2147 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2148 ret <16 x float> %res
2151 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2152 ; CHECK-LABEL: test_mm512_min_round_ps_current:
2154 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0
2156 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2157 ret <16 x float> %res
2159 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2161 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2162 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae:
2164 ; CHECK-NEXT: kmovw %edi, %k1
2165 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
2167 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
2168 ret <16 x float> %res
2171 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2172 ; CHECK-LABEL: test_mm512_maskz_max_round_ps_current:
2174 ; CHECK-NEXT: kmovw %edi, %k1
2175 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
2177 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
2178 ret <16 x float> %res
2181 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2182 ; CHECK-LABEL: test_mm512_mask_max_round_ps_sae:
2184 ; CHECK-NEXT: kmovw %edi, %k1
2185 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
2186 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2188 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
2189 ret <16 x float> %res
2192 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
2193 ; CHECK-LABEL: test_mm512_mask_max_round_ps_current:
2195 ; CHECK-NEXT: kmovw %edi, %k1
2196 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
2197 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2199 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
2200 ret <16 x float> %res
2203 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2204 ; CHECK-LABEL: test_mm512_max_round_ps_sae:
2206 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0
2208 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
2209 ret <16 x float> %res
2212 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
2213 ; CHECK-LABEL: test_mm512_max_round_ps_current:
2215 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0
2217 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
2218 ret <16 x float> %res
2220 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2222 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2224 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2225 ; CHECK-LABEL: test_mask_add_ss_rn:
2227 ; CHECK-NEXT: andl $1, %edi
2228 ; CHECK-NEXT: kmovw %edi, %k1
2229 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2230 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
2232 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
2233 ret <4 x float> %res
2236 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2237 ; CHECK-LABEL: test_mask_add_ss_rd:
2239 ; CHECK-NEXT: andl $1, %edi
2240 ; CHECK-NEXT: kmovw %edi, %k1
2241 ; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2242 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
2244 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
2245 ret <4 x float> %res
2248 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2249 ; CHECK-LABEL: test_mask_add_ss_ru:
2251 ; CHECK-NEXT: andl $1, %edi
2252 ; CHECK-NEXT: kmovw %edi, %k1
2253 ; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2254 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
2256 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
2257 ret <4 x float> %res
2260 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2261 ; CHECK-LABEL: test_mask_add_ss_rz:
2263 ; CHECK-NEXT: andl $1, %edi
2264 ; CHECK-NEXT: kmovw %edi, %k1
2265 ; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2266 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
2268 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
2269 ret <4 x float> %res
2272 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2273 ; CHECK-LABEL: test_mask_add_ss_current:
2275 ; CHECK-NEXT: andl $1, %edi
2276 ; CHECK-NEXT: kmovw %edi, %k1
2277 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
2278 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
2280 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2281 ret <4 x float> %res
2284 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2285 ; CHECK-LABEL: test_maskz_add_ss_rn:
2287 ; CHECK-NEXT: andl $1, %edi
2288 ; CHECK-NEXT: kmovw %edi, %k1
2289 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2291 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
2292 ret <4 x float> %res
2295 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
2296 ; CHECK-LABEL: test_add_ss_rn:
2298 ; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
2300 %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
2301 ret <4 x float> %res
2304 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2306 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2307 ; CHECK-LABEL: test_mask_add_sd_rn:
2309 ; CHECK-NEXT: andl $1, %edi
2310 ; CHECK-NEXT: kmovw %edi, %k1
2311 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2312 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
2314 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
2315 ret <2 x double> %res
2318 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2319 ; CHECK-LABEL: test_mask_add_sd_rd:
2321 ; CHECK-NEXT: andl $1, %edi
2322 ; CHECK-NEXT: kmovw %edi, %k1
2323 ; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2324 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
2326 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
2327 ret <2 x double> %res
2330 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2331 ; CHECK-LABEL: test_mask_add_sd_ru:
2333 ; CHECK-NEXT: andl $1, %edi
2334 ; CHECK-NEXT: kmovw %edi, %k1
2335 ; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2336 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
2338 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
2339 ret <2 x double> %res
2342 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2343 ; CHECK-LABEL: test_mask_add_sd_rz:
2345 ; CHECK-NEXT: andl $1, %edi
2346 ; CHECK-NEXT: kmovw %edi, %k1
2347 ; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
2348 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
2350 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
2351 ret <2 x double> %res
2354 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2355 ; CHECK-LABEL: test_mask_add_sd_current:
2357 ; CHECK-NEXT: andl $1, %edi
2358 ; CHECK-NEXT: kmovw %edi, %k1
2359 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
2360 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
2362 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2363 ret <2 x double> %res
2366 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2367 ; CHECK-LABEL: test_maskz_add_sd_rn:
2369 ; CHECK-NEXT: andl $1, %edi
2370 ; CHECK-NEXT: kmovw %edi, %k1
2371 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2373 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
2374 ret <2 x double> %res
2377 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
2378 ; CHECK-LABEL: test_add_sd_rn:
2380 ; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
2382 %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
2383 ret <2 x double> %res
2386 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
2388 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2389 ; CHECK-LABEL: test_mask_max_ss_sae:
2391 ; CHECK-NEXT: andl $1, %edi
2392 ; CHECK-NEXT: kmovw %edi, %k1
2393 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2394 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
2396 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
2397 ret <4 x float> %res
2400 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2401 ; CHECK-LABEL: test_maskz_max_ss_sae:
2403 ; CHECK-NEXT: andl $1, %edi
2404 ; CHECK-NEXT: kmovw %edi, %k1
2405 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2407 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
2408 ret <4 x float> %res
2411 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
2412 ; CHECK-LABEL: test_max_ss_sae:
2414 ; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0
2416 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
2417 ret <4 x float> %res
2420 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
2421 ; CHECK-LABEL: test_mask_max_ss:
2423 ; CHECK-NEXT: andl $1, %edi
2424 ; CHECK-NEXT: kmovw %edi, %k1
2425 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
2426 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
2428 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
2429 ret <4 x float> %res
2432 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
2433 ; CHECK-LABEL: test_maskz_max_ss:
2435 ; CHECK-NEXT: andl $1, %edi
2436 ; CHECK-NEXT: kmovw %edi, %k1
2437 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
2439 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
2440 ret <4 x float> %res
2443 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
2444 ; CHECK-LABEL: test_max_ss:
2446 ; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
2448 %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
2449 ret <4 x float> %res
2451 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
2453 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2454 ; CHECK-LABEL: test_mask_max_sd_sae:
2456 ; CHECK-NEXT: andl $1, %edi
2457 ; CHECK-NEXT: kmovw %edi, %k1
2458 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
2459 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
2461 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
2462 ret <2 x double> %res
2465 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2466 ; CHECK-LABEL: test_maskz_max_sd_sae:
2468 ; CHECK-NEXT: andl $1, %edi
2469 ; CHECK-NEXT: kmovw %edi, %k1
2470 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
2472 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
2473 ret <2 x double> %res
2476 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
2477 ; CHECK-LABEL: test_max_sd_sae:
2479 ; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
2481 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
2482 ret <2 x double> %res
2485 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
2486 ; CHECK-LABEL: test_mask_max_sd:
2488 ; CHECK-NEXT: andl $1, %edi
2489 ; CHECK-NEXT: kmovw %edi, %k1
2490 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
2491 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
2493 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
2494 ret <2 x double> %res
2497 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
2498 ; CHECK-LABEL: test_maskz_max_sd:
2500 ; CHECK-NEXT: andl $1, %edi
2501 ; CHECK-NEXT: kmovw %edi, %k1
2502 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
2504 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
2505 ret <2 x double> %res
2508 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
2509 ; CHECK-LABEL: test_max_sd:
2511 ; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
2513 %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
2514 ret <2 x double> %res
2517 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
2518 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
2520 ; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
2522 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
2523 ret <2 x double> %res
2525 declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
2527 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
2528 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
2530 ; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
2532 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
2533 ret <4 x float> %res
2535 declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
2537 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
2538 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
2540 ; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
2542 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
2543 ret <4 x float> %res
2545 declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
2547 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
2548 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
2550 ; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
2553 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2554 ret <4 x float> %res
2557 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
2558 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
2560 ; CHECK-NEXT: movl (%rdi), %eax
2561 ; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
2564 %b = load i32, i32* %ptr
2565 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
2566 ret <4 x float> %res
2569 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
2570 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
2572 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
2575 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2576 ret <4 x float> %res
2579 define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
2580 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
2582 ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
2585 %b = load i32, i32* %ptr
2586 %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
2587 ret <4 x float> %res
2589 declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
2591 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
2592 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
2594 ; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
2597 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
2598 ret <4 x float> %res
2601 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
2602 ; CHECK-LABEL: _mm_cvtu64_ss:
2604 ; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
2607 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
2608 ret <4 x float> %res
2610 declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone
2612 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
2613 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
2615 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
2618 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
2619 ret <2 x double> %res
2621 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
2623 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
2624 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
2626 ; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
2629 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
2630 ret <2 x double> %res
2633 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
2634 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
2636 ; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
2639 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
2640 ret <2 x double> %res
2642 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
2644 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2646 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
2647 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
2649 ; CHECK-NEXT: kmovw %esi, %k1
2650 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
2651 ; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
2652 ; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
2653 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
2655 %x2 = load <16 x i32>, <16 x i32>* %x2p
2656 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2657 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
2658 %res2 = add <16 x i32> %res, %res1
2659 ret <16 x i32> %res2
2662 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
2664 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
2665 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
2667 ; CHECK-NEXT: kmovw %edi, %k1
2668 ; CHECK-NEXT: vmovapd %zmm1, %zmm3
2669 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1}
2670 ; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1
2671 ; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0
2673 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
2674 %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
2675 %res2 = fadd <8 x double> %res, %res1
2676 ret <8 x double> %res2
2679 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
2681 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
2682 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
2684 ; CHECK-NEXT: kmovw %edi, %k1
2685 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
2686 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1}
2687 ; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
2688 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
2690 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
2691 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
2692 %res2 = fadd <16 x float> %res, %res1
2693 ret <16 x float> %res2
2696 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2698 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2699 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
2701 ; CHECK-NEXT: kmovw %edi, %k1
2702 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
2703 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1}
2704 ; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
2705 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
2707 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2708 %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2709 %res2 = add <8 x i64> %res, %res1
2713 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2715 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
2716 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
2718 ; CHECK-NEXT: kmovw %esi, %k1
2719 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm2
2720 ; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z}
2721 ; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1
2722 ; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0
2724 %x2 = load <16 x i32>, <16 x i32>* %x2p
2725 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2726 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
2727 %res2 = add <16 x i32> %res, %res1
2728 ret <16 x i32> %res2
2731 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
2733 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
2734 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
2736 ; CHECK-NEXT: kmovw %esi, %k1
2737 ; CHECK-NEXT: vmovapd %zmm1, %zmm2
2738 ; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z}
2739 ; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1
2740 ; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0
2742 %x2s = load double, double* %x2ptr
2743 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
2744 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
2745 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
2746 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
2747 %res2 = fadd <8 x double> %res, %res1
2748 ret <8 x double> %res2
2751 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
2753 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
2754 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
2756 ; CHECK-NEXT: kmovw %edi, %k1
2757 ; CHECK-NEXT: vmovaps %zmm1, %zmm3
2758 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z}
2759 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1
2760 ; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
2762 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
2763 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
2764 %res2 = fadd <16 x float> %res, %res1
2765 ret <16 x float> %res2
2769 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2771 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
2772 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
2774 ; CHECK-NEXT: kmovw %edi, %k1
2775 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
2776 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z}
2777 ; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1
2778 ; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
2780 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
2781 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
2782 %res2 = add <8 x i64> %res, %res1
2786 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2788 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
2789 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
2791 ; CHECK-NEXT: kmovw %edi, %k1
2792 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
2793 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1}
2794 ; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1
2795 ; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
2797 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
2798 %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
2799 %res2 = add <16 x i32> %res, %res1
2800 ret <16 x i32> %res2
2803 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
2804 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
2805 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512:
2807 ; CHECK-NEXT: kmovw %edi, %k1
2808 ; CHECK-NEXT: vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2809 ; CHECK-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0
2810 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
2812 %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
2813 %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
2814 %res2 = fadd <8 x double> %res, %res1
2815 ret <8 x double> %res2
2818 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
2819 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
2820 ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512:
2822 ; CHECK-NEXT: kmovw %edi, %k1
2823 ; CHECK-NEXT: vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
2824 ; CHECK-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0
2825 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
2827 %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
2828 %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
2829 %res2 = fadd <16 x float> %res, %res1
2830 ret <16 x float> %res2
2833 declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
2835 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
2836 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
2838 ; CHECK-NEXT: kmovw %edi, %k1
2839 ; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1}
2840 ; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
2841 ; CHECK-NEXT: vpmovqb %zmm0, %xmm0
2842 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
2843 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2845 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
2846 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
2847 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
2848 %res3 = add <16 x i8> %res0, %res1
2849 %res4 = add <16 x i8> %res3, %res2
2853 declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
2855 define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
2856 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
2858 ; CHECK-NEXT: kmovw %esi, %k1
2859 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi)
2860 ; CHECK-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
2862 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
2863 call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
2867 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
2869 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
2870 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
2872 ; CHECK-NEXT: kmovw %edi, %k1
2873 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1}
2874 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
2875 ; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
2876 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
2877 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2879 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
2880 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
2881 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
2882 %res3 = add <16 x i8> %res0, %res1
2883 %res4 = add <16 x i8> %res3, %res2
2887 declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
2889 define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
2890 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
2892 ; CHECK-NEXT: kmovw %esi, %k1
2893 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi)
2894 ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1}
2896 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
2897 call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
2901 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
2903 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
2904 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
2906 ; CHECK-NEXT: kmovw %edi, %k1
2907 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1}
2908 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
2909 ; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
2910 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
2911 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
2913 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
2914 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
2915 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
2916 %res3 = add <16 x i8> %res0, %res1
2917 %res4 = add <16 x i8> %res3, %res2
2921 declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
2923 define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
2924 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
2926 ; CHECK-NEXT: kmovw %esi, %k1
2927 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi)
2928 ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1}
2930 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
2931 call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
2935 declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
2937 define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2938 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
2940 ; CHECK-NEXT: kmovw %edi, %k1
2941 ; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
2942 ; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
2943 ; CHECK-NEXT: vpmovqw %zmm0, %xmm0
2944 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2945 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
2947 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
2948 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
2949 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2950 %res3 = add <8 x i16> %res0, %res1
2951 %res4 = add <8 x i16> %res3, %res2
2955 declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
2957 define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
2958 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
2960 ; CHECK-NEXT: kmovw %esi, %k1
2961 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi)
2962 ; CHECK-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
2964 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
2965 call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
2969 declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
2971 define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
2972 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
2974 ; CHECK-NEXT: kmovw %edi, %k1
2975 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1}
2976 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
2977 ; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
2978 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
2979 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
2981 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
2982 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
2983 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
2984 %res3 = add <8 x i16> %res0, %res1
2985 %res4 = add <8 x i16> %res3, %res2
2989 declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
2991 define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
2992 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
2994 ; CHECK-NEXT: kmovw %esi, %k1
2995 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi)
2996 ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1}
2998 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
2999 call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3003 declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
3005 define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
3006 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
3008 ; CHECK-NEXT: kmovw %edi, %k1
3009 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1}
3010 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
3011 ; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
3012 ; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
3013 ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
3015 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
3016 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
3017 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
3018 %res3 = add <8 x i16> %res0, %res1
3019 %res4 = add <8 x i16> %res3, %res2
3023 declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
3025 define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3026 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
3028 ; CHECK-NEXT: kmovw %esi, %k1
3029 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi)
3030 ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1}
3032 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3033 call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3037 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
3039 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3040 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
3042 ; CHECK-NEXT: kmovw %edi, %k1
3043 ; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1}
3044 ; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
3045 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0
3046 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
3047 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
3049 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3050 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3051 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3052 %res3 = add <8 x i32> %res0, %res1
3053 %res4 = add <8 x i32> %res3, %res2
3057 declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3059 define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3060 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
3062 ; CHECK-NEXT: kmovw %esi, %k1
3063 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi)
3064 ; CHECK-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
3066 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3067 call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3071 declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
3073 define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3074 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
3076 ; CHECK-NEXT: kmovw %edi, %k1
3077 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1}
3078 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
3079 ; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
3080 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
3081 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
3083 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3084 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3085 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3086 %res3 = add <8 x i32> %res0, %res1
3087 %res4 = add <8 x i32> %res3, %res2
3091 declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3093 define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3094 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
3096 ; CHECK-NEXT: kmovw %esi, %k1
3097 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi)
3098 ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1}
3100 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3101 call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3105 declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
3107 define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
3108 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
3110 ; CHECK-NEXT: kmovw %edi, %k1
3111 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1}
3112 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
3113 ; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
3114 ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
3115 ; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
3117 %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
3118 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
3119 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
3120 %res3 = add <8 x i32> %res0, %res1
3121 %res4 = add <8 x i32> %res3, %res2
3125 declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
3127 define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
3128 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
3130 ; CHECK-NEXT: kmovw %esi, %k1
3131 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi)
3132 ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1}
3134 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
3135 call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
3139 declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
3141 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3142 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
3144 ; CHECK-NEXT: kmovw %edi, %k1
3145 ; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1}
3146 ; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
3147 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0
3148 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
3149 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3151 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3152 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3153 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3154 %res3 = add <16 x i8> %res0, %res1
3155 %res4 = add <16 x i8> %res3, %res2
3159 declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
3161 define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3162 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
3164 ; CHECK-NEXT: kmovw %esi, %k1
3165 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi)
3166 ; CHECK-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
3168 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3169 call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3173 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
3175 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3176 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
3178 ; CHECK-NEXT: kmovw %edi, %k1
3179 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1}
3180 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
3181 ; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
3182 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
3183 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3185 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3186 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3187 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3188 %res3 = add <16 x i8> %res0, %res1
3189 %res4 = add <16 x i8> %res3, %res2
3193 declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
3195 define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3196 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
3198 ; CHECK-NEXT: kmovw %esi, %k1
3199 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi)
3200 ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1}
3202 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3203 call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3207 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
3209 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
3210 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
3212 ; CHECK-NEXT: kmovw %edi, %k1
3213 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1}
3214 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
3215 ; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
3216 ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
3217 ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
3219 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
3220 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
3221 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
3222 %res3 = add <16 x i8> %res0, %res1
3223 %res4 = add <16 x i8> %res3, %res2
3227 declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
3229 define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3230 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
3232 ; CHECK-NEXT: kmovw %esi, %k1
3233 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi)
3234 ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1}
3236 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3237 call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3241 declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
3243 define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3244 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
3246 ; CHECK-NEXT: kmovw %edi, %k1
3247 ; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
3248 ; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
3249 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0
3250 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
3251 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
3253 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3254 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3255 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3256 %res3 = add <16 x i16> %res0, %res1
3257 %res4 = add <16 x i16> %res3, %res2
3258 ret <16 x i16> %res4
3261 declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3263 define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3264 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
3266 ; CHECK-NEXT: kmovw %esi, %k1
3267 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi)
3268 ; CHECK-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
3270 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3271 call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3275 declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
3277 define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3278 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
3280 ; CHECK-NEXT: kmovw %edi, %k1
3281 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1}
3282 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
3283 ; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
3284 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
3285 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
3287 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3288 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3289 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3290 %res3 = add <16 x i16> %res0, %res1
3291 %res4 = add <16 x i16> %res3, %res2
3292 ret <16 x i16> %res4
3295 declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3297 define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3298 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
3300 ; CHECK-NEXT: kmovw %esi, %k1
3301 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi)
3302 ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1}
3304 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3305 call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3309 declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
3311 define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
3312 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
3314 ; CHECK-NEXT: kmovw %edi, %k1
3315 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1}
3316 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
3317 ; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
3318 ; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
3319 ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
3321 %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
3322 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
3323 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
3324 %res3 = add <16 x i16> %res0, %res1
3325 %res4 = add <16 x i16> %res3, %res2
3326 ret <16 x i16> %res4
3329 declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
3331 define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
3332 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
3334 ; CHECK-NEXT: kmovw %esi, %k1
3335 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi)
3336 ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1}
3338 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
3339 call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
3343 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3345 define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3346 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
3348 ; CHECK-NEXT: kmovw %edi, %k1
3349 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
3350 ; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
3351 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3353 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3354 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3355 %res2 = fadd <16 x float> %res, %res1
3356 ret <16 x float> %res2
3359 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3361 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3362 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
3364 ; CHECK-NEXT: kmovw %edi, %k1
3365 ; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
3366 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
3367 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3369 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3370 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3371 %res2 = add <8 x i32> %res, %res1
3375 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
3377 define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
3378 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
3380 ; CHECK-NEXT: kmovw %edi, %k1
3381 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
3382 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
3383 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
3385 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
3386 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
3387 %res2 = fadd <8 x float> %res, %res1
3388 ret <8 x float> %res2
3391 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3393 define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3394 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
3396 ; CHECK-NEXT: kmovw %edi, %k1
3397 ; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
3398 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
3399 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3401 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
3402 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
3403 %res2 = add <8 x i32> %res, %res1
3407 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3409 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3410 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
3412 ; CHECK-NEXT: kmovw %edi, %k1
3413 ; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
3414 ; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
3415 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3417 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3418 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3419 %res2 = add <16 x i32> %res, %res1
3420 ret <16 x i32> %res2
3423 declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
3425 define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
3426 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
3428 ; CHECK-NEXT: kmovw %edi, %k1
3429 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
3430 ; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
3431 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3433 %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
3434 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
3435 %res2 = fadd <8 x double> %res, %res1
3436 ret <8 x double> %res2
3439 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3441 define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3442 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
3444 ; CHECK-NEXT: kmovw %edi, %k1
3445 ; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
3446 ; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
3447 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3449 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
3450 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
3451 %res2 = add <16 x i32> %res, %res1
3452 ret <16 x i32> %res2
3455 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
3457 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3458 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
3460 ; CHECK-NEXT: kmovw %edi, %k1
3461 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
3462 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
3463 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3465 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3466 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3467 %res2 = add <8 x i32> %res, %res1
3471 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
3473 define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
3474 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
3476 ; CHECK-NEXT: kmovw %edi, %k1
3477 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
3478 ; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
3479 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3481 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
3482 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
3483 %res2 = fadd <16 x float> %res, %res1
3484 ret <16 x float> %res2
3487 declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
3489 define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
3490 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
3492 ; CHECK-NEXT: kmovw %edi, %k1
3493 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
3494 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
3495 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
3497 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
3498 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
3499 %res2 = add <8 x i32> %res, %res1
3503 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
3505 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3506 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
3508 ; CHECK-NEXT: kmovw %edi, %k1
3509 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
3510 ; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
3511 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3513 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
3514 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
3515 %res2 = add <16 x i32> %res, %res1
3516 ret <16 x i32> %res2
3519 declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
3521 define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
3522 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
3524 ; CHECK-NEXT: kmovw %edi, %k1
3525 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
3526 ; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
3527 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
3529 %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
3530 %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
3531 %res2 = add <16 x i32> %res, %res1
3532 ret <16 x i32> %res2
3535 declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
3537 define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
3538 ; CHECK-LABEL: test_getexp_ss:
3540 ; CHECK-NEXT: andl $1, %edi
3541 ; CHECK-NEXT: kmovw %edi, %k1
3542 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
3543 ; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
3544 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
3545 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
3546 ; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
3547 ; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
3548 ; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
3549 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
3551 %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
3552 %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
3553 %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
3554 %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
3556 %res.1 = fadd <4 x float> %res0, %res1
3557 %res.2 = fadd <4 x float> %res2, %res3
3558 %res = fadd <4 x float> %res.1, %res.2
3559 ret <4 x float> %res
3562 declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
3564 define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
3565 ; CHECK-LABEL: test_getexp_sd:
3567 ; CHECK-NEXT: andl $1, %edi
3568 ; CHECK-NEXT: kmovw %edi, %k1
3569 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
3570 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
3571 ; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
3572 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
3573 ; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
3574 ; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
3575 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
3576 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
3578 %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
3579 %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
3580 %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
3581 %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
3583 %res.1 = fadd <2 x double> %res0, %res1
3584 %res.2 = fadd <2 x double> %res2, %res3
3585 %res = fadd <2 x double> %res.1, %res.2
3586 ret <2 x double> %res
3589 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
3591 define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
3592 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
3594 ; CHECK-NEXT: andl $1, %edi
3595 ; CHECK-NEXT: kmovw %edi, %k1
3596 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
3597 ; CHECK-NEXT: kmovw %k0, %eax
3598 ; CHECK-NEXT: andl $1, %eax
3599 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
3602 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
3606 define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
3607 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
3609 ; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
3610 ; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
3611 ; CHECK-NEXT: korw %k0, %k1, %k0
3612 ; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
3613 ; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
3614 ; CHECK-NEXT: korw %k1, %k2, %k1
3615 ; CHECK-NEXT: andl $1, %edi
3616 ; CHECK-NEXT: kmovw %edi, %k2
3617 ; CHECK-NEXT: kandw %k2, %k1, %k1
3618 ; CHECK-NEXT: korw %k1, %k0, %k0
3619 ; CHECK-NEXT: kmovw %k0, %eax
3620 ; CHECK-NEXT: andl $1, %eax
3621 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
3624 %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
3625 %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
3626 %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
3627 %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
3629 %res11 = or i8 %res1, %res2
3630 %res12 = or i8 %res3, %res4
3631 %res13 = or i8 %res11, %res12
3635 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
3637 define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
3638 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
3640 ; CHECK-NEXT: andl $1, %edi
3641 ; CHECK-NEXT: kmovw %edi, %k1
3642 ; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
3643 ; CHECK-NEXT: kmovw %k0, %eax
3644 ; CHECK-NEXT: andl $1, %eax
3645 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
3648 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
3653 define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
3654 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
3656 ; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
3657 ; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
3658 ; CHECK-NEXT: andl $1, %edi
3659 ; CHECK-NEXT: kmovw %edi, %k1
3660 ; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k2 {%k1}
3661 ; CHECK-NEXT: kmovw %k2, %ecx
3662 ; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
3663 ; CHECK-NEXT: kmovw %k1, %edx
3664 ; CHECK-NEXT: andl $1, %edx
3665 ; CHECK-NEXT: kmovw %k0, %eax
3666 ; CHECK-NEXT: andb %cl, %al
3667 ; CHECK-NEXT: andb %dl, %al
3668 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
3670 %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
3671 %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
3672 %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
3673 %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
3675 %res11 = and i8 %res1, %res2
3676 %res12 = and i8 %res3, %res4
3677 %res13 = and i8 %res11, %res12
3681 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
3683 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
3684 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
3686 ; CHECK-NEXT: kmovw %edi, %k1
3687 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
3688 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
3689 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
3691 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
3692 %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
3693 %res2 = fadd <16 x float> %res, %res1
3694 ret <16 x float> %res2
3697 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
3699 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
3700 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
3702 ; CHECK-NEXT: kmovw %edi, %k1
3703 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
3704 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
3705 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
3706 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
3707 ; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
3709 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
3710 %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
3711 %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
3713 %res3 = fadd <8 x double> %res, %res1
3714 %res4 = fadd <8 x double> %res3, %res2
3715 ret <8 x double> %res4
3718 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
3720 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
3721 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
3723 ; CHECK-NEXT: kmovw %edi, %k1
3724 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
3725 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
3726 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
3728 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
3729 %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
3730 %res2 = add <16 x i32> %res, %res1
3731 ret <16 x i32> %res2
3734 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
3736 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
3737 ; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
3739 ; CHECK-NEXT: kmovw %edi, %k1
3740 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
3741 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
3742 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
3744 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
3745 %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
3746 %res2 = add <8 x i64> %res, %res1
3750 declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
3752 define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
3753 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
3755 ; CHECK-NEXT: kmovw %edi, %k1
3756 ; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
3757 ; CHECK-NEXT: vgetmantpd $11, {sae}, %zmm0, %zmm0
3758 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
3760 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
3761 %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
3762 %res2 = fadd <8 x double> %res, %res1
3763 ret <8 x double> %res2
3766 declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
3768 define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
3769 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
3771 ; CHECK-NEXT: kmovw %edi, %k1
3772 ; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
3773 ; CHECK-NEXT: vgetmantps $11, {sae}, %zmm0, %zmm0
3774 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
3776 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
3777 %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
3778 %res2 = fadd <16 x float> %res, %res1
3779 ret <16 x float> %res2
3782 declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
3784 define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
3785 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
3787 ; CHECK-NEXT: andl $1, %edi
3788 ; CHECK-NEXT: kmovw %edi, %k1
3789 ; CHECK-NEXT: vmovapd %xmm2, %xmm3
3790 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
3791 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
3792 ; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
3793 ; CHECK-NEXT: vgetmantsd $11, {sae}, %xmm1, %xmm0, %xmm2 {%k1}
3794 ; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
3795 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
3796 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
3798 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
3799 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
3800 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
3801 %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
3802 %res11 = fadd <2 x double> %res, %res1
3803 %res12 = fadd <2 x double> %res2, %res3
3804 %res13 = fadd <2 x double> %res11, %res12
3805 ret <2 x double> %res13
3808 declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
3810 define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
3811 ; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
3813 ; CHECK-NEXT: andl $1, %edi
3814 ; CHECK-NEXT: kmovw %edi, %k1
3815 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
3816 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
3817 ; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
3818 ; CHECK-NEXT: vgetmantss $11, {sae}, %xmm1, %xmm0, %xmm0
3819 ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
3820 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
3821 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
3823 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
3824 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
3825 %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
3826 %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
3827 %res11 = fadd <4 x float> %res, %res1
3828 %res12 = fadd <4 x float> %res2, %res3
3829 %res13 = fadd <4 x float> %res11, %res12
3830 ret <4 x float> %res13
3833 declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
3835 define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1) {
3836 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512:
3838 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
3840 %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
3841 ret <8 x double> %res
3844 define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %mask) {
3845 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512_mask:
3847 ; CHECK-NEXT: kmovw %edi, %k1
3848 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
3849 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
3851 %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
3852 %mask.cast = bitcast i8 %mask to <8 x i1>
3853 %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> %x2
3854 ret <8 x double> %res2
3857 define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0, <8 x i64> %x1, i8 %mask) {
3858 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512_maskz:
3860 ; CHECK-NEXT: kmovw %edi, %k1
3861 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z}
3863 %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
3864 %mask.cast = bitcast i8 %mask to <8 x i1>
3865 %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> zeroinitializer
3866 ret <8 x double> %res2
3869 declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)
3871 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1) {
3872 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512:
3874 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
3876 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
3877 ret <16 x float> %res
3880 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) {
3881 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_mask:
3883 ; CHECK-NEXT: kmovw %edi, %k1
3884 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
3885 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3887 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
3888 %mask.cast = bitcast i16 %mask to <16 x i1>
3889 %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
3890 ret <16 x float> %res2
3893 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) {
3894 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_maskz:
3896 ; CHECK-NEXT: kmovw %edi, %k1
3897 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z}
3899 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
3900 %mask.cast = bitcast i16 %mask to <16 x i1>
3901 %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
3902 ret <16 x float> %res2
3905 ; Test case to make sure we can print shuffle decode comments for constant pool loads.
3906 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1) {
3907 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool:
3909 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
3911 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
3912 ret <16 x float> %res
3915 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) {
3916 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask:
3918 ; CHECK-NEXT: kmovw %edi, %k1
3919 ; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
3920 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
3922 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
3923 %mask.cast = bitcast i16 %mask to <16 x i1>
3924 %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
3925 ret <16 x float> %res2
3928 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) {
3929 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz:
3931 ; CHECK-NEXT: kmovw %edi, %k1
3932 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
3934 %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
3935 %mask.cast = bitcast i16 %mask to <16 x i1>
3936 %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
3937 ret <16 x float> %res2
3940 declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x float>, <2 x double>, i8, i32)
3942 define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
3943 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
3945 ; CHECK-NEXT: andl $1, %edi
3946 ; CHECK-NEXT: kmovw %edi, %k1
3947 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
3948 ; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
3949 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
3951 %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
3952 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
3953 %res2 = fadd <2 x double> %res, %res1
3954 ret <2 x double> %res2
3957 declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float>, <2 x double>, <4 x float>, i8, i32)
3959 define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
3960 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
3962 ; CHECK-NEXT: andl $1, %edi
3963 ; CHECK-NEXT: kmovw %edi, %k1
3964 ; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
3965 ; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
3966 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
3968 %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
3969 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
3970 %res2 = fadd <4 x float> %res, %res1
3971 ret <4 x float> %res2
3974 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
3976 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
3977 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
3979 ; CHECK-NEXT: kmovw %edi, %k1
3980 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
3981 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
3982 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
3983 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
3985 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
3986 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
3987 %res2 = add <16 x i32> %res, %res1
3988 ret <16 x i32> %res2
3991 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
3993 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
3994 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
3996 ; CHECK-NEXT: kmovw %edi, %k1
3997 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
3998 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
3999 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
4000 ; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
4002 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
4003 %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
4004 %res2 = add <16 x i32> %res, %res1
4005 ret <16 x i32> %res2
4008 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
4010 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
4011 ; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
4013 ; CHECK-NEXT: kmovw %edi, %k1
4014 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
4015 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
4016 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4017 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4019 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
4020 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
4021 %res2 = add <8 x i64> %res, %res1
4025 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
4027 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
4028 ; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
4030 ; CHECK-NEXT: kmovw %edi, %k1
4031 ; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
4032 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
4033 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
4034 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
4036 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
4037 %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
4038 %res2 = add <8 x i64> %res, %res1
4042 define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
4043 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
4045 ; CHECK-NEXT: vcmpeqsd {sae}, %xmm1, %xmm0, %k0
4046 ; CHECK-NEXT: kmovw %k0, %eax
4048 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
4052 define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
4053 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
4055 ; CHECK-NEXT: vcmpeq_uqsd {sae}, %xmm1, %xmm0, %k0
4056 ; CHECK-NEXT: kmovw %k0, %eax
4058 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
4062 define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
4063 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
4065 ; CHECK-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
4066 ; CHECK-NEXT: kmovw %k0, %eax
4068 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
4072 define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
4073 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
4075 ; CHECK-NEXT: vcmpeq_uqsd %xmm1, %xmm0, %k0
4076 ; CHECK-NEXT: kmovw %k0, %eax
4078 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
4082 define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
4083 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
4085 ; CHECK-NEXT: vcmpltsd {sae}, %xmm1, %xmm0, %k0
4086 ; CHECK-NEXT: kmovw %k0, %eax
4088 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
4092 define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
4093 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
4095 ; CHECK-NEXT: vcmpngesd {sae}, %xmm1, %xmm0, %k0
4096 ; CHECK-NEXT: kmovw %k0, %eax
4098 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
4102 define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
4103 ; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
4105 ; CHECK-NEXT: vcmpltsd %xmm1, %xmm0, %k0
4106 ; CHECK-NEXT: kmovw %k0, %eax
4108 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
4112 define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
4113 ; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
4115 ; CHECK-NEXT: vcmpngesd %xmm1, %xmm0, %k0
4116 ; CHECK-NEXT: kmovw %k0, %eax
4118 %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
4122 declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
4124 define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
4125 ; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
4127 ; CHECK-NEXT: vcmpngess %xmm1, %xmm0, %k0
4128 ; CHECK-NEXT: kmovw %k0, %eax
4130 %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
4134 declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
4136 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16)
4138 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
4139 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
4141 ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
4142 ; CHECK-NEXT: kmovw %edi, %k1
4143 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
4144 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
4145 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
4146 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
4147 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
4150 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
4151 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
4152 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
4153 %res4 = fadd <16 x float> %res1, %res2
4154 %res5 = fadd <16 x float> %res3, %res4
4155 ret <16 x float> %res5
4158 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8)
4160 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
4161 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
4163 ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
4164 ; CHECK-NEXT: kmovw %edi, %k1
4165 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
4166 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
4167 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3]
4168 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
4169 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
4172 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1)
4173 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
4174 %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
4175 %res4 = fadd <8 x double> %res1, %res2
4176 %res5 = fadd <8 x double> %res3, %res4
4177 ret <8 x double> %res5
4180 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16)
4182 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
4183 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
4185 ; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
4186 ; CHECK-NEXT: kmovw %edi, %k1
4187 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
4188 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
4189 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
4190 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
4191 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4194 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
4195 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
4196 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
4197 %res4 = add <16 x i32> %res1, %res2
4198 %res5 = add <16 x i32> %res3, %res4
4199 ret <16 x i32> %res5
4202 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8)
4204 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
4205 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
4207 ; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
4208 ; CHECK-NEXT: kmovw %edi, %k1
4209 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
4210 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
4211 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3]
4212 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
4213 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4216 %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1)
4217 %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
4218 %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
4219 %res4 = add <8 x i64> %res1, %res2
4220 %res5 = add <8 x i64> %res3, %res4
4224 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4226 define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4227 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_512:
4229 ; CHECK-NEXT: kmovw %edi, %k1
4230 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1}
4231 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3 {%k1} {z}
4232 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0
4233 ; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
4234 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
4236 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4237 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
4238 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4239 %res3 = add <16 x i32> %res, %res1
4240 %res4 = add <16 x i32> %res3, %res2
4241 ret <16 x i32> %res4
4244 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4246 define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4247 ; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_512:
4249 ; CHECK-NEXT: kmovw %edi, %k1
4250 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1}
4251 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3 {%k1} {z}
4252 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0
4253 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
4254 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
4256 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4257 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
4258 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4259 %res3 = add <8 x i64> %res, %res1
4260 %res4 = add <8 x i64> %res3, %res2
4264 declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i32, <16 x i32>, i16)
4266 define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4267 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_512:
4269 ; CHECK-NEXT: kmovw %esi, %k1
4270 ; CHECK-NEXT: vprold $3, %zmm0, %zmm1 {%k1}
4271 ; CHECK-NEXT: vprold $3, %zmm0, %zmm2 {%k1} {z}
4272 ; CHECK-NEXT: vprold $3, %zmm0, %zmm0
4273 ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
4274 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
4276 %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
4277 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
4278 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
4279 %res3 = add <16 x i32> %res, %res1
4280 %res4 = add <16 x i32> %res3, %res2
4281 ret <16 x i32> %res4
4284 declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i32, <8 x i64>, i8)
4286 define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4287 ; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_512:
4289 ; CHECK-NEXT: kmovw %esi, %k1
4290 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm1 {%k1}
4291 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm2 {%k1} {z}
4292 ; CHECK-NEXT: vprolq $3, %zmm0, %zmm0
4293 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
4294 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
4296 %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
4297 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
4298 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
4299 %res3 = add <8 x i64> %res, %res1
4300 %res4 = add <8 x i64> %res3, %res2
4304 declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
4306 define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
4307 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_512:
4309 ; CHECK-NEXT: kmovw %edi, %k1
4310 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1}
4311 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm3 {%k1} {z}
4312 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0
4313 ; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
4314 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4316 %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
4317 %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
4318 %res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
4319 %res3 = fadd <8 x double> %res, %res1
4320 %res4 = fadd <8 x double> %res3, %res2
4321 ret <8 x double> %res4
4324 declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
4326 define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4327 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_512:
4329 ; CHECK-NEXT: kmovw %edi, %k1
4330 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1}
4331 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm3 {%k1} {z}
4332 ; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0
4333 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
4334 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
4336 %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
4337 %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
4338 %res2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
4339 %res3 = add <8 x i64> %res, %res1
4340 %res4 = add <8 x i64> %res3, %res2
4344 declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
4346 define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
4347 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
4349 ; CHECK-NEXT: kmovw %edi, %k1
4350 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1}
4351 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm3 {%k1} {z}
4352 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
4353 ; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
4354 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4356 %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
4357 %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
4358 %res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
4359 %res3 = fadd <16 x float> %res, %res1
4360 %res4 = fadd <16 x float> %res3, %res2
4361 ret <16 x float> %res4
4364 declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
4366 define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4367 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_512:
4369 ; CHECK-NEXT: kmovw %edi, %k1
4370 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1}
4371 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm3 {%k1} {z}
4372 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0
4373 ; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
4374 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
4376 %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
4377 %res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
4378 %res2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
4379 %res3 = add <16 x i32> %res, %res1
4380 %res4 = add <16 x i32> %res3, %res2
4381 ret <16 x i32> %res4
4384 declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
4386 define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) {
4387 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_512:
4389 ; CHECK-NEXT: kmovw %edi, %k1
4390 ; CHECK-NEXT: vmovapd %zmm0, %zmm3
4391 ; CHECK-NEXT: vfixupimmpd $4, %zmm2, %zmm1, %zmm3 {%k1}
4392 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
4393 ; CHECK-NEXT: vfixupimmpd $5, %zmm2, %zmm1, %zmm4 {%k1} {z}
4394 ; CHECK-NEXT: vfixupimmpd $3, {sae}, %zmm2, %zmm1, %zmm0
4395 ; CHECK-NEXT: vaddpd %zmm4, %zmm3, %zmm1
4396 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4398 %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4)
4399 %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4)
4400 %res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8)
4401 %res3 = fadd <8 x double> %res, %res1
4402 %res4 = fadd <8 x double> %res3, %res2
4403 ret <8 x double> %res4
4406 declare <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
4408 define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) {
4409 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_512:
4411 ; CHECK-NEXT: kmovw %edi, %k1
4412 ; CHECK-NEXT: vmovapd %zmm0, %zmm3
4413 ; CHECK-NEXT: vfixupimmpd $3, %zmm2, %zmm1, %zmm3 {%k1} {z}
4414 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
4415 ; CHECK-NEXT: vmovapd %zmm0, %zmm5
4416 ; CHECK-NEXT: vfixupimmpd $5, %zmm4, %zmm1, %zmm5 {%k1} {z}
4417 ; CHECK-NEXT: vfixupimmpd $2, {sae}, %zmm2, %zmm1, %zmm0
4418 ; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1
4419 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
4421 %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4)
4422 %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4)
4423 %res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8)
4424 %res3 = fadd <8 x double> %res, %res1
4425 %res4 = fadd <8 x double> %res3, %res2
4426 ret <8 x double> %res4
4429 declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
4431 define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
4432 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ss:
4434 ; CHECK-NEXT: andl $1, %edi
4435 ; CHECK-NEXT: kmovw %edi, %k1
4436 ; CHECK-NEXT: vmovaps %xmm0, %xmm3
4437 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1}
4438 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
4439 ; CHECK-NEXT: vmovaps %xmm0, %xmm5
4440 ; CHECK-NEXT: vfixupimmss $5, %xmm4, %xmm1, %xmm5 {%k1}
4441 ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0
4442 ; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1
4443 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4445 %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
4446 %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4)
4447 %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8)
4448 %res3 = fadd <4 x float> %res, %res1
4449 %res4 = fadd <4 x float> %res3, %res2
4450 ret <4 x float> %res4
4453 declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
4455 define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) {
4456 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ss:
4458 ; CHECK-NEXT: andl $1, %edi
4459 ; CHECK-NEXT: kmovw %edi, %k1
4460 ; CHECK-NEXT: vmovaps %xmm0, %xmm3
4461 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
4462 ; CHECK-NEXT: vmovaps %xmm0, %xmm4
4463 ; CHECK-NEXT: vfixupimmss $5, %xmm2, %xmm1, %xmm4
4464 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
4465 ; CHECK-NEXT: vfixupimmss $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
4466 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0
4467 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
4469 %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
4470 %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8)
4471 %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 4)
4472 %res3 = fadd <4 x float> %res, %res1
4473 %res4 = fadd <4 x float> %res3, %res2
4474 ret <4 x float> %res4
4477 declare <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
4479 define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) {
4480 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_512:
4482 ; CHECK-NEXT: kmovw %edi, %k1
4483 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4484 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1}
4485 ; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
4486 ; CHECK-NEXT: vmovaps %zmm0, %zmm5
4487 ; CHECK-NEXT: vfixupimmps $5, %zmm4, %zmm1, %zmm5 {%k1}
4488 ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0
4489 ; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1
4490 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
4492 %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
4493 %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4)
4494 %res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8)
4495 %res3 = fadd <16 x float> %res, %res1
4496 %res4 = fadd <16 x float> %res3, %res2
4497 ret <16 x float> %res4
4500 declare <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
4502 define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) {
4503 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_512:
4505 ; CHECK-NEXT: kmovw %edi, %k1
4506 ; CHECK-NEXT: vmovaps %zmm0, %zmm3
4507 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm3 {%k1} {z}
4508 ; CHECK-NEXT: vmovaps %zmm0, %zmm4
4509 ; CHECK-NEXT: vfixupimmps $5, %zmm2, %zmm1, %zmm4
4510 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
4511 ; CHECK-NEXT: vfixupimmps $5, {sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
4512 ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
4513 ; CHECK-NEXT: vaddps %zmm4, %zmm0, %zmm0
4515 %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
4516 %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 8)
4517 %res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4)
4518 %res3 = fadd <16 x float> %res, %res1
4519 %res4 = fadd <16 x float> %res3, %res2
4520 ret <16 x float> %res4
4523 declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
4525 define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
4526 ; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_sd:
4528 ; CHECK-NEXT: andl $1, %edi
4529 ; CHECK-NEXT: kmovw %edi, %k1
4530 ; CHECK-NEXT: vmovapd %xmm0, %xmm3
4531 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1}
4532 ; CHECK-NEXT: vmovapd %xmm0, %xmm4
4533 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm4
4534 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
4535 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1}
4536 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
4537 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
4539 %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
4540 %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
4541 %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 -1, i32 4)
4542 %res3 = fadd <2 x double> %res, %res1
4543 %res4 = fadd <2 x double> %res3, %res2
4544 ret <2 x double> %res4
4547 declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
4549 define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {
4550 ; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_sd:
4552 ; CHECK-NEXT: andl $1, %edi
4553 ; CHECK-NEXT: kmovw %edi, %k1
4554 ; CHECK-NEXT: vmovapd %xmm0, %xmm3
4555 ; CHECK-NEXT: vfixupimmsd $5, %xmm2, %xmm1, %xmm3 {%k1} {z}
4556 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
4557 ; CHECK-NEXT: vmovapd %xmm0, %xmm5
4558 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm4, %xmm1, %xmm5 {%k1} {z}
4559 ; CHECK-NEXT: vfixupimmsd $5, {sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
4560 ; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1
4561 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4563 %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
4564 %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
4565 %res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8)
4566 %res3 = fadd <2 x double> %res, %res1
4567 %res4 = fadd <2 x double> %res3, %res2
4568 ret <2 x double> %res4
4571 declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2)
4573 define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
4574 ; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_512:
4576 ; CHECK-NEXT: kmovw %edi, %k1
4577 ; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 {%k1}
4578 ; CHECK-NEXT: kmovw %k0, %ecx
4579 ; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0
4580 ; CHECK-NEXT: kmovw %k0, %eax
4581 ; CHECK-NEXT: addl %ecx, %eax
4582 ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
4584 %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
4585 %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1)
4586 %res2 = add i16 %res, %res1
4590 declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2)
4592 define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
4593 ; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_512:
4595 ; CHECK-NEXT: kmovw %edi, %k1
4596 ; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 {%k1}
4597 ; CHECK-NEXT: kmovw %k0, %ecx
4598 ; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0
4599 ; CHECK-NEXT: kmovw %k0, %eax
4600 ; CHECK-NEXT: addb %cl, %al
4602 %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
4603 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1)
4604 %res2 = add i8 %res, %res1
4608 define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
4609 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
4611 ; CHECK-NEXT: kmovw %esi, %k1
4612 ; CHECK-NEXT: vpbroadcastd %edi, %zmm0 {%k1}
4613 ; CHECK-NEXT: vpbroadcastd %edi, %zmm1 {%k1} {z}
4614 ; CHECK-NEXT: vpbroadcastd %edi, %zmm2
4615 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
4616 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
4618 %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1)
4619 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask)
4620 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask)
4621 %res3 = add <16 x i32> %res, %res1
4622 %res4 = add <16 x i32> %res2, %res3
4623 ret <16 x i32> %res4
4626 declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16)
4628 define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) {
4629 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
4631 ; CHECK-NEXT: kmovw %esi, %k1
4632 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 {%k1}
4633 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm1 {%k1} {z}
4634 ; CHECK-NEXT: vpbroadcastq %rdi, %zmm2
4635 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
4636 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
4638 %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1)
4639 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask)
4640 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask)
4641 %res3 = add <8 x i64> %res, %res1
4642 %res4 = add <8 x i64> %res2, %res3
4645 declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8)
4647 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
4649 define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
4650 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
4652 ; CHECK-NEXT: andl $1, %edi
4653 ; CHECK-NEXT: kmovw %edi, %k1
4654 ; CHECK-NEXT: vmovaps %xmm0, %xmm3
4655 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1}
4656 ; CHECK-NEXT: vmovaps %xmm0, %xmm4
4657 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm4
4658 ; CHECK-NEXT: vmovaps %xmm0, %xmm5
4659 ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm5 {%k1}
4660 ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0
4661 ; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm1
4662 ; CHECK-NEXT: vaddpd %xmm5, %xmm0, %xmm0
4663 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
4665 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
4666 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
4667 %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
4668 %res3 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
4669 %res4 = fadd <2 x double> %res, %res1
4670 %res5 = fadd <2 x double> %res2, %res3
4671 %res6 = fadd <2 x double> %res4, %res5
4672 ret <2 x double> %res6
4675 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
4677 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
4678 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
4680 ; CHECK-NEXT: andl $1, %edi
4681 ; CHECK-NEXT: kmovw %edi, %k1
4682 ; CHECK-NEXT: vmovaps %xmm0, %xmm3
4683 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1}
4684 ; CHECK-NEXT: vmovaps %xmm0, %xmm4
4685 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm4
4686 ; CHECK-NEXT: vmovaps %xmm0, %xmm5
4687 ; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm5 {%k1}
4688 ; CHECK-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0
4689 ; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm1
4690 ; CHECK-NEXT: vaddps %xmm5, %xmm0, %xmm0
4691 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
4693 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
4694 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
4695 %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
4696 %res3 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
4697 %res4 = fadd <4 x float> %res, %res1
4698 %res5 = fadd <4 x float> %res2, %res3
4699 %res6 = fadd <4 x float> %res4, %res5
4700 ret <4 x float> %res6
4703 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
4705 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
4706 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
4708 ; CHECK-NEXT: andl $1, %edi
4709 ; CHECK-NEXT: kmovw %edi, %k1
4710 ; CHECK-NEXT: vmovaps %xmm0, %xmm3
4711 ; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z}
4712 ; CHECK-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
4713 ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
4715 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
4716 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
4717 %res2 = fadd <2 x double> %res, %res1
4718 ret <2 x double> %res2
4721 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
4723 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
4724 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
4726 ; CHECK-NEXT: andl $1, %edi
4727 ; CHECK-NEXT: kmovw %edi, %k1
4728 ; CHECK-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z}
4730 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
4731 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
4732 %res2 = fadd <4 x float> %res, %res1
4733 ret <4 x float> %res
4735 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
4737 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
4738 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
4740 ; CHECK-NEXT: andl $1, %edi
4741 ; CHECK-NEXT: kmovw %edi, %k1
4742 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
4743 ; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1}
4744 ; CHECK-NEXT: vmovaps %xmm2, %xmm4
4745 ; CHECK-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm4
4746 ; CHECK-NEXT: vmovaps %xmm2, %xmm5
4747 ; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
4748 ; CHECK-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2
4749 ; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0
4750 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
4751 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
4753 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
4754 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
4755 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
4756 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
4757 %res4 = fadd <2 x double> %res, %res1
4758 %res5 = fadd <2 x double> %res2, %res3
4759 %res6 = fadd <2 x double> %res4, %res5
4760 ret <2 x double> %res6
4763 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
4765 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
4766 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
4768 ; CHECK-NEXT: andl $1, %edi
4769 ; CHECK-NEXT: kmovw %edi, %k1
4770 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
4771 ; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1}
4772 ; CHECK-NEXT: vmovaps %xmm2, %xmm4
4773 ; CHECK-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm4
4774 ; CHECK-NEXT: vmovaps %xmm2, %xmm5
4775 ; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
4776 ; CHECK-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2
4777 ; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0
4778 ; CHECK-NEXT: vaddps %xmm5, %xmm2, %xmm1
4779 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
4781 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
4782 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
4783 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
4784 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
4785 %res4 = fadd <4 x float> %res, %res1
4786 %res5 = fadd <4 x float> %res2, %res3
4787 %res6 = fadd <4 x float> %res4, %res5
4788 ret <4 x float> %res6
4791 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
4793 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
4794 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
4796 ; CHECK-NEXT: andl $1, %edi
4797 ; CHECK-NEXT: kmovw %edi, %k1
4798 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
4799 ; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1}
4800 ; CHECK-NEXT: vmovaps %xmm2, %xmm4
4801 ; CHECK-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm4
4802 ; CHECK-NEXT: vmovaps %xmm2, %xmm5
4803 ; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
4804 ; CHECK-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2
4805 ; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0
4806 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
4807 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
4809 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
4810 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
4811 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
4812 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
4813 %res4 = fadd <2 x double> %res, %res1
4814 %res5 = fadd <2 x double> %res2, %res3
4815 %res6 = fadd <2 x double> %res4, %res5
4816 ret <2 x double> %res6
4819 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
4821 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
4822 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
4824 ; CHECK-NEXT: andl $1, %edi
4825 ; CHECK-NEXT: kmovw %edi, %k1
4826 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
4827 ; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1}
4828 ; CHECK-NEXT: vmovaps %xmm2, %xmm4
4829 ; CHECK-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm4
4830 ; CHECK-NEXT: vmovaps %xmm2, %xmm5
4831 ; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
4832 ; CHECK-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2
4833 ; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0
4834 ; CHECK-NEXT: vaddps %xmm5, %xmm2, %xmm1
4835 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
4837 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
4838 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
4839 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
4840 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
4841 %res4 = fadd <4 x float> %res, %res1
4842 %res5 = fadd <4 x float> %res2, %res3
4843 %res6 = fadd <4 x float> %res4, %res5
4844 ret <4 x float> %res6
4847 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
4849 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
4850 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
4852 ; CHECK-NEXT: andl $1, %edi
4853 ; CHECK-NEXT: kmovw %edi, %k1
4854 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
4855 ; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1}
4856 ; CHECK-NEXT: vmovaps %xmm2, %xmm4
4857 ; CHECK-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm4
4858 ; CHECK-NEXT: vmovaps %xmm2, %xmm5
4859 ; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
4860 ; CHECK-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2
4861 ; CHECK-NEXT: vaddpd %xmm3, %xmm4, %xmm0
4862 ; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
4863 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
4865 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
4866 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
4867 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
4868 %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
4869 %res4 = fadd <2 x double> %res, %res1
4870 %res5 = fadd <2 x double> %res2, %res3
4871 %res6 = fadd <2 x double> %res4, %res5
4872 ret <2 x double> %res6
4875 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
4877 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
4878 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
4880 ; CHECK-NEXT: andl $1, %edi
4881 ; CHECK-NEXT: kmovw %edi, %k1
4882 ; CHECK-NEXT: vmovaps %xmm2, %xmm3
4883 ; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1}
4884 ; CHECK-NEXT: vmovaps %xmm2, %xmm4
4885 ; CHECK-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm4
4886 ; CHECK-NEXT: vmovaps %xmm2, %xmm5
4887 ; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm5 {%k1}
4888 ; CHECK-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2
4889 ; CHECK-NEXT: vaddps %xmm3, %xmm4, %xmm0
4890 ; CHECK-NEXT: vaddps %xmm5, %xmm2, %xmm1
4891 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
4893 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
4894 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
4895 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
4896 %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
4897 %res4 = fadd <4 x float> %res, %res1
4898 %res5 = fadd <4 x float> %res2, %res3
4899 %res6 = fadd <4 x float> %res4, %res5
4900 ret <4 x float> %res6
4903 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) {
4904 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
4906 ; CHECK-NEXT: andl $1, %esi
4907 ; CHECK-NEXT: kmovw %esi, %k1
4908 ; CHECK-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1}
4909 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
4911 %q = load float, float* %ptr_b
4912 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
4913 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4)
4914 ret < 4 x float> %res
4917 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
4918 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
4920 ; CHECK-NEXT: andl $1, %esi
4921 ; CHECK-NEXT: kmovw %esi, %k1
4922 ; CHECK-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1}
4924 %q = load float, float* %ptr_b
4925 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
4926 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0,<4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4)
4927 ret < 4 x float> %res
4931 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
4932 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
4934 ; CHECK-NEXT: kxorw %k0, %k0, %k1
4935 ; CHECK-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 {%k1} {z}
4937 %q = load float, float* %ptr_b
4938 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
4939 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %vecinit.i, i8 0, i32 4)
4940 ret < 4 x float> %res
4943 define <16 x i32> @test_x86_avx512_psll_d_512(<16 x i32> %a0, <4 x i32> %a1) {
4944 ; CHECK-LABEL: test_x86_avx512_psll_d_512:
4946 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0
4948 %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
4951 define <16 x i32> @test_x86_avx512_mask_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) {
4952 ; CHECK-LABEL: test_x86_avx512_mask_psll_d_512:
4954 ; CHECK-NEXT: kmovw %edi, %k1
4955 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1}
4956 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
4958 %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
4959 %mask.cast = bitcast i16 %mask to <16 x i1>
4960 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
4961 ret <16 x i32> %res2
4963 define <16 x i32> @test_x86_avx512_maskz_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
4964 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d_512:
4966 ; CHECK-NEXT: kmovw %edi, %k1
4967 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
4969 %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
4970 %mask.cast = bitcast i16 %mask to <16 x i1>
4971 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
4972 ret <16 x i32> %res2
4974 declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) nounwind readnone
4977 define <8 x i64> @test_x86_avx512_psll_q_512(<8 x i64> %a0, <2 x i64> %a1) {
4978 ; CHECK-LABEL: test_x86_avx512_psll_q_512:
4980 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0
4982 %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
4985 define <8 x i64> @test_x86_avx512_mask_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) {
4986 ; CHECK-LABEL: test_x86_avx512_mask_psll_q_512:
4988 ; CHECK-NEXT: kmovw %edi, %k1
4989 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
4990 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
4992 %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
4993 %mask.cast = bitcast i8 %mask to <8 x i1>
4994 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
4997 define <8 x i64> @test_x86_avx512_maskz_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
4998 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q_512:
5000 ; CHECK-NEXT: kmovw %edi, %k1
5001 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
5003 %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
5004 %mask.cast = bitcast i8 %mask to <8 x i1>
5005 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
5008 declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) nounwind readnone
5011 define <16 x i32> @test_x86_avx512_pslli_d_512(<16 x i32> %a0) {
5012 ; CHECK-LABEL: test_x86_avx512_pslli_d_512:
5014 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0
5016 %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
5019 define <16 x i32> @test_x86_avx512_mask_pslli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
5020 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d_512:
5022 ; CHECK-NEXT: kmovw %edi, %k1
5023 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1}
5024 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
5026 %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
5027 %mask.cast = bitcast i16 %mask to <16 x i1>
5028 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
5029 ret <16 x i32> %res2
5031 define <16 x i32> @test_x86_avx512_maskz_pslli_d_512(<16 x i32> %a0, i16 %mask) {
5032 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d_512:
5034 ; CHECK-NEXT: kmovw %edi, %k1
5035 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z}
5037 %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
5038 %mask.cast = bitcast i16 %mask to <16 x i1>
5039 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
5040 ret <16 x i32> %res2
5042 declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) nounwind readnone
5045 define <8 x i64> @test_x86_avx512_pslli_q_512(<8 x i64> %a0) {
5046 ; CHECK-LABEL: test_x86_avx512_pslli_q_512:
5048 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0
5050 %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
5053 define <8 x i64> @test_x86_avx512_mask_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
5054 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q_512:
5056 ; CHECK-NEXT: kmovw %edi, %k1
5057 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1}
5058 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
5060 %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
5061 %mask.cast = bitcast i8 %mask to <8 x i1>
5062 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
5065 define <8 x i64> @test_x86_avx512_maskz_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
5066 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q_512:
5068 ; CHECK-NEXT: kmovw %edi, %k1
5069 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
5071 %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
5072 %mask.cast = bitcast i8 %mask to <8 x i1>
5073 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
5076 declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) nounwind readnone
5079 define <8 x i64> @test_x86_avx512_psra_q_512(<8 x i64> %a0, <2 x i64> %a1) {
5080 ; CHECK-LABEL: test_x86_avx512_psra_q_512:
5082 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0
5084 %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
5087 define <8 x i64> @test_x86_avx512_mask_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) {
5088 ; CHECK-LABEL: test_x86_avx512_mask_psra_q_512:
5090 ; CHECK-NEXT: kmovw %edi, %k1
5091 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
5092 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5094 %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
5095 %mask.cast = bitcast i8 %mask to <8 x i1>
5096 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
5099 define <8 x i64> @test_x86_avx512_maskz_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
5100 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q_512:
5102 ; CHECK-NEXT: kmovw %edi, %k1
5103 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
5105 %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
5106 %mask.cast = bitcast i8 %mask to <8 x i1>
5107 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
5110 declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) nounwind readnone
5113 define <16 x i32> @test_x86_avx512_psra_d_512(<16 x i32> %a0, <4 x i32> %a1) {
5114 ; CHECK-LABEL: test_x86_avx512_psra_d_512:
5116 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0
5118 %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
5121 define <16 x i32> @test_x86_avx512_mask_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) {
5122 ; CHECK-LABEL: test_x86_avx512_mask_psra_d_512:
5124 ; CHECK-NEXT: kmovw %edi, %k1
5125 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
5126 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5128 %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
5129 %mask.cast = bitcast i16 %mask to <16 x i1>
5130 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
5131 ret <16 x i32> %res2
5133 define <16 x i32> @test_x86_avx512_maskz_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
5134 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d_512:
5136 ; CHECK-NEXT: kmovw %edi, %k1
5137 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
5139 %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
5140 %mask.cast = bitcast i16 %mask to <16 x i1>
5141 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
5142 ret <16 x i32> %res2
5144 declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) nounwind readnone
5148 define <8 x i64> @test_x86_avx512_psrai_q_512(<8 x i64> %a0) {
5149 ; CHECK-LABEL: test_x86_avx512_psrai_q_512:
5151 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0
5153 %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
5156 define <8 x i64> @test_x86_avx512_mask_psrai_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
5157 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q_512:
5159 ; CHECK-NEXT: kmovw %edi, %k1
5160 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1}
5161 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
5163 %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
5164 %mask.cast = bitcast i8 %mask to <8 x i1>
5165 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
5168 define <8 x i64> @test_x86_avx512_maskz_psrai_q_512(<8 x i64> %a0, i8 %mask) {
5169 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q_512:
5171 ; CHECK-NEXT: kmovw %edi, %k1
5172 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
5174 %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
5175 %mask.cast = bitcast i8 %mask to <8 x i1>
5176 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
5179 declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) nounwind readnone
5182 define <16 x i32> @test_x86_avx512_psrai_d_512(<16 x i32> %a0) {
5183 ; CHECK-LABEL: test_x86_avx512_psrai_d_512:
5185 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0
5187 %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
5190 define <16 x i32> @test_x86_avx512_mask_psrai_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
5191 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d_512:
5193 ; CHECK-NEXT: kmovw %edi, %k1
5194 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1}
5195 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
5197 %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
5198 %mask.cast = bitcast i16 %mask to <16 x i1>
5199 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
5200 ret <16 x i32> %res2
5202 define <16 x i32> @test_x86_avx512_maskz_psrai_d_512(<16 x i32> %a0, i16 %mask) {
5203 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d_512:
5205 ; CHECK-NEXT: kmovw %edi, %k1
5206 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
5208 %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
5209 %mask.cast = bitcast i16 %mask to <16 x i1>
5210 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
5211 ret <16 x i32> %res2
5213 declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) nounwind readnone
5217 define <16 x i32> @test_x86_avx512_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1) {
5218 ; CHECK-LABEL: test_x86_avx512_psrl_d_512:
5220 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0
5222 %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
5225 define <16 x i32> @test_x86_avx512_mask_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) {
5226 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d_512:
5228 ; CHECK-NEXT: kmovw %edi, %k1
5229 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
5230 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5232 %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
5233 %mask.cast = bitcast i16 %mask to <16 x i1>
5234 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
5235 ret <16 x i32> %res2
5237 define <16 x i32> @test_x86_avx512_maskz_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
5238 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d_512:
5240 ; CHECK-NEXT: kmovw %edi, %k1
5241 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
5243 %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
5244 %mask.cast = bitcast i16 %mask to <16 x i1>
5245 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
5246 ret <16 x i32> %res2
5248 declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) nounwind readnone
5251 define <8 x i64> @test_x86_avx512_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1) {
5252 ; CHECK-LABEL: test_x86_avx512_psrl_q_512:
5254 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
5256 %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
5259 define <8 x i64> @test_x86_avx512_mask_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) {
5260 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q_512:
5262 ; CHECK-NEXT: kmovw %edi, %k1
5263 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
5264 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5266 %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
5267 %mask.cast = bitcast i8 %mask to <8 x i1>
5268 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
5271 define <8 x i64> @test_x86_avx512_maskz_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
5272 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q_512:
5274 ; CHECK-NEXT: kmovw %edi, %k1
5275 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
5277 %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
5278 %mask.cast = bitcast i8 %mask to <8 x i1>
5279 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
5282 declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) nounwind readnone
5285 define <16 x i32> @test_x86_avx512_psrli_d_512(<16 x i32> %a0) {
5286 ; CHECK-LABEL: test_x86_avx512_psrli_d_512:
5288 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0
5290 %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
5293 define <16 x i32> @test_x86_avx512_mask_psrli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
5294 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d_512:
5296 ; CHECK-NEXT: kmovw %edi, %k1
5297 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1}
5298 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
5300 %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
5301 %mask.cast = bitcast i16 %mask to <16 x i1>
5302 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
5303 ret <16 x i32> %res2
5305 define <16 x i32> @test_x86_avx512_maskz_psrli_d_512(<16 x i32> %a0, i16 %mask) {
5306 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d_512:
5308 ; CHECK-NEXT: kmovw %edi, %k1
5309 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
5311 %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
5312 %mask.cast = bitcast i16 %mask to <16 x i1>
5313 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
5314 ret <16 x i32> %res2
5316 declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) nounwind readnone
5319 define <8 x i64> @test_x86_avx512_psrli_q_512(<8 x i64> %a0) {
5320 ; CHECK-LABEL: test_x86_avx512_psrli_q_512:
5322 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0
5324 %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
5327 define <8 x i64> @test_x86_avx512_mask_psrli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
5328 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q_512:
5330 ; CHECK-NEXT: kmovw %edi, %k1
5331 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1}
5332 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
5334 %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
5335 %mask.cast = bitcast i8 %mask to <8 x i1>
5336 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
5339 define <8 x i64> @test_x86_avx512_maskz_psrli_q_512(<8 x i64> %a0, i8 %mask) {
5340 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q_512:
5342 ; CHECK-NEXT: kmovw %edi, %k1
5343 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
5345 %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
5346 %mask.cast = bitcast i8 %mask to <8 x i1>
5347 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
5350 declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) nounwind readnone
5352 define <16 x i32> @test_x86_avx512_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1) {
5353 ; CHECK-LABEL: test_x86_avx512_psllv_d_512:
5355 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
5357 %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
5361 define <16 x i32> @test_x86_avx512_mask_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
5362 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d_512:
5364 ; CHECK-NEXT: kmovw %edi, %k1
5365 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
5366 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5368 %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
5369 %mask.cast = bitcast i16 %mask to <16 x i1>
5370 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
5371 ret <16 x i32> %res2
5374 define <16 x i32> @test_x86_avx512_maskz_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
5375 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d_512:
5377 ; CHECK-NEXT: kmovw %edi, %k1
5378 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
5380 %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
5381 %mask.cast = bitcast i16 %mask to <16 x i1>
5382 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
5383 ret <16 x i32> %res2
5386 declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
5388 define <8 x i64> @test_x86_avx512_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1) {
5389 ; CHECK-LABEL: test_x86_avx512_psllv_q_512:
5391 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
5393 %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
5397 define <8 x i64> @test_x86_avx512_mask_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
5398 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q_512:
5400 ; CHECK-NEXT: kmovw %edi, %k1
5401 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
5402 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5404 %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
5405 %mask.cast = bitcast i8 %mask to <8 x i1>
5406 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
5410 define <8 x i64> @test_x86_avx512_maskz_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
5411 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q_512:
5413 ; CHECK-NEXT: kmovw %edi, %k1
5414 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
5416 %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
5417 %mask.cast = bitcast i8 %mask to <8 x i1>
5418 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
5422 declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) nounwind readnone
5424 define <16 x i32> @test_x86_avx512_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1) {
5425 ; CHECK-LABEL: test_x86_avx512_psrav_d_512:
5427 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0
5429 %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
5433 define <16 x i32> @test_x86_avx512_mask_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
5434 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d_512:
5436 ; CHECK-NEXT: kmovw %edi, %k1
5437 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
5438 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5440 %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
5441 %mask.cast = bitcast i16 %mask to <16 x i1>
5442 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
5443 ret <16 x i32> %res2
5446 define <16 x i32> @test_x86_avx512_maskz_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
5447 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d_512:
5449 ; CHECK-NEXT: kmovw %edi, %k1
5450 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
5452 %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
5453 %mask.cast = bitcast i16 %mask to <16 x i1>
5454 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
5455 ret <16 x i32> %res2
5458 declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) nounwind readnone
5460 define <8 x i64> @test_x86_avx512_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1) {
5461 ; CHECK-LABEL: test_x86_avx512_psrav_q_512:
5463 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0
5465 %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
5469 define <8 x i64> @test_x86_avx512_mask_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
5470 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q_512:
5472 ; CHECK-NEXT: kmovw %edi, %k1
5473 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
5474 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5476 %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
5477 %mask.cast = bitcast i8 %mask to <8 x i1>
5478 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
5482 define <8 x i64> @test_x86_avx512_maskz_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
5483 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q_512:
5485 ; CHECK-NEXT: kmovw %edi, %k1
5486 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
5488 %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
5489 %mask.cast = bitcast i8 %mask to <8 x i1>
5490 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
5494 declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) nounwind readnone
5496 define <16 x i32> @test_x86_avx512_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1) {
5497 ; CHECK-LABEL: test_x86_avx512_psrlv_d_512:
5499 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
5501 %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
5505 define <16 x i32> @test_x86_avx512_mask_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
5506 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d_512:
5508 ; CHECK-NEXT: kmovw %edi, %k1
5509 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
5510 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5512 %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
5513 %mask.cast = bitcast i16 %mask to <16 x i1>
5514 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
5515 ret <16 x i32> %res2
5518 define <16 x i32> @test_x86_avx512_maskz_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
5519 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d_512:
5521 ; CHECK-NEXT: kmovw %edi, %k1
5522 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
5524 %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
5525 %mask.cast = bitcast i16 %mask to <16 x i1>
5526 %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
5527 ret <16 x i32> %res2
5530 declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
5532 define <8 x i64> @test_x86_avx512_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1) {
5533 ; CHECK-LABEL: test_x86_avx512_psrlv_q_512:
5535 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
5537 %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
5541 define <8 x i64> @test_x86_avx512_mask_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
5542 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q_512:
5544 ; CHECK-NEXT: kmovw %edi, %k1
5545 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
5546 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
5548 %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
5549 %mask.cast = bitcast i8 %mask to <8 x i1>
5550 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
5554 define <8 x i64> @test_x86_avx512_maskz_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
5555 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q_512:
5557 ; CHECK-NEXT: kmovw %edi, %k1
5558 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
5560 %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
5561 %mask.cast = bitcast i8 %mask to <8 x i1>
5562 %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
5566 declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) nounwind readnone