1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X32
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c
7 define <8 x i64> @test_mm512_broadcastd_epi32(<2 x i64> %a0) {
8 ; X32-LABEL: test_mm512_broadcastd_epi32:
10 ; X32-NEXT: vbroadcastss %xmm0, %zmm0
13 ; X64-LABEL: test_mm512_broadcastd_epi32:
15 ; X64-NEXT: vbroadcastss %xmm0, %zmm0
17 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
18 %res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <16 x i32> zeroinitializer
19 %res1 = bitcast <16 x i32> %res0 to <8 x i64>
23 define <8 x i64> @test_mm512_mask_broadcastd_epi32(<8 x i64> %a0, i16 %a1, <2 x i64> %a2) {
24 ; X32-LABEL: test_mm512_mask_broadcastd_epi32:
26 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
27 ; X32-NEXT: kmovw %eax, %k1
28 ; X32-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1}
31 ; X64-LABEL: test_mm512_mask_broadcastd_epi32:
33 ; X64-NEXT: kmovw %edi, %k1
34 ; X64-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1}
36 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
37 %arg1 = bitcast i16 %a1 to <16 x i1>
38 %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
39 %res0 = shufflevector <4 x i32> %arg2, <4 x i32> undef, <16 x i32> zeroinitializer
40 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
41 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
45 define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) {
46 ; X32-LABEL: test_mm512_maskz_broadcastd_epi32:
48 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
49 ; X32-NEXT: kmovw %eax, %k1
50 ; X32-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z}
53 ; X64-LABEL: test_mm512_maskz_broadcastd_epi32:
55 ; X64-NEXT: kmovw %edi, %k1
56 ; X64-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z}
58 %arg0 = bitcast i16 %a0 to <16 x i1>
59 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
60 %res0 = shufflevector <4 x i32> %arg1, <4 x i32> undef, <16 x i32> zeroinitializer
61 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
62 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
66 define <8 x i64> @test_mm512_broadcastq_epi64(<2 x i64> %a0) {
67 ; X32-LABEL: test_mm512_broadcastq_epi64:
69 ; X32-NEXT: vbroadcastsd %xmm0, %zmm0
72 ; X64-LABEL: test_mm512_broadcastq_epi64:
74 ; X64-NEXT: vbroadcastsd %xmm0, %zmm0
76 %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <8 x i32> zeroinitializer
80 define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i64> %a2) {
81 ; X32-LABEL: test_mm512_mask_broadcastq_epi64:
83 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
84 ; X32-NEXT: kmovw %eax, %k1
85 ; X32-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1}
88 ; X64-LABEL: test_mm512_mask_broadcastq_epi64:
90 ; X64-NEXT: kmovw %edi, %k1
91 ; X64-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1}
93 %arg1 = bitcast i8 %a1 to <8 x i1>
94 %res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <8 x i32> zeroinitializer
95 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
99 define <8 x i64> @test_mm512_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
100 ; X32-LABEL: test_mm512_maskz_broadcastq_epi64:
102 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
103 ; X32-NEXT: kmovw %eax, %k1
104 ; X32-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z}
107 ; X64-LABEL: test_mm512_maskz_broadcastq_epi64:
109 ; X64-NEXT: kmovw %edi, %k1
110 ; X64-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z}
112 %arg0 = bitcast i8 %a0 to <8 x i1>
113 %res0 = shufflevector <2 x i64> %a1, <2 x i64> undef, <8 x i32> zeroinitializer
114 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
118 define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a0) {
119 ; X32-LABEL: test_mm512_broadcastsd_pd:
121 ; X32-NEXT: vbroadcastsd %xmm0, %zmm0
124 ; X64-LABEL: test_mm512_broadcastsd_pd:
126 ; X64-NEXT: vbroadcastsd %xmm0, %zmm0
128 %res = shufflevector <2 x double> %a0, <2 x double> undef, <8 x i32> zeroinitializer
129 ret <8 x double> %res
132 define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2 x double> %a2) {
133 ; X32-LABEL: test_mm512_mask_broadcastsd_pd:
135 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
136 ; X32-NEXT: kmovw %eax, %k1
137 ; X32-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1}
140 ; X64-LABEL: test_mm512_mask_broadcastsd_pd:
142 ; X64-NEXT: kmovw %edi, %k1
143 ; X64-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1}
145 %arg1 = bitcast i8 %a1 to <8 x i1>
146 %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <8 x i32> zeroinitializer
147 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
148 ret <8 x double> %res1
151 define <8 x double> @test_mm512_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
152 ; X32-LABEL: test_mm512_maskz_broadcastsd_pd:
154 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
155 ; X32-NEXT: kmovw %eax, %k1
156 ; X32-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
159 ; X64-LABEL: test_mm512_maskz_broadcastsd_pd:
161 ; X64-NEXT: kmovw %edi, %k1
162 ; X64-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
164 %arg0 = bitcast i8 %a0 to <8 x i1>
165 %res0 = shufflevector <2 x double> %a1, <2 x double> undef, <8 x i32> zeroinitializer
166 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
167 ret <8 x double> %res1
170 define <16 x float> @test_mm512_broadcastss_ps(<4 x float> %a0) {
171 ; X32-LABEL: test_mm512_broadcastss_ps:
173 ; X32-NEXT: vbroadcastss %xmm0, %zmm0
176 ; X64-LABEL: test_mm512_broadcastss_ps:
178 ; X64-NEXT: vbroadcastss %xmm0, %zmm0
180 %res = shufflevector <4 x float> %a0, <4 x float> undef, <16 x i32> zeroinitializer
181 ret <16 x float> %res
184 define <16 x float> @test_mm512_mask_broadcastss_ps(<16 x float> %a0, i16 %a1, <4 x float> %a2) {
185 ; X32-LABEL: test_mm512_mask_broadcastss_ps:
187 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
188 ; X32-NEXT: kmovw %eax, %k1
189 ; X32-NEXT: vbroadcastss %xmm1, %zmm0 {%k1}
192 ; X64-LABEL: test_mm512_mask_broadcastss_ps:
194 ; X64-NEXT: kmovw %edi, %k1
195 ; X64-NEXT: vbroadcastss %xmm1, %zmm0 {%k1}
197 %arg1 = bitcast i16 %a1 to <16 x i1>
198 %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <16 x i32> zeroinitializer
199 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
200 ret <16 x float> %res1
203 define <16 x float> @test_mm512_maskz_broadcastss_ps(i16 %a0, <4 x float> %a1) {
204 ; X32-LABEL: test_mm512_maskz_broadcastss_ps:
206 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
207 ; X32-NEXT: kmovw %eax, %k1
208 ; X32-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
211 ; X64-LABEL: test_mm512_maskz_broadcastss_ps:
213 ; X64-NEXT: kmovw %edi, %k1
214 ; X64-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
216 %arg0 = bitcast i16 %a0 to <16 x i1>
217 %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <16 x i32> zeroinitializer
218 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
219 ret <16 x float> %res1
222 define <8 x double> @test_mm512_movddup_pd(<8 x double> %a0) {
223 ; X32-LABEL: test_mm512_movddup_pd:
225 ; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
228 ; X64-LABEL: test_mm512_movddup_pd:
230 ; X64-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
232 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
233 ret <8 x double> %res
236 define <8 x double> @test_mm512_mask_movddup_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
237 ; X32-LABEL: test_mm512_mask_movddup_pd:
239 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
240 ; X32-NEXT: kmovw %eax, %k1
241 ; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
244 ; X64-LABEL: test_mm512_mask_movddup_pd:
246 ; X64-NEXT: kmovw %edi, %k1
247 ; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
249 %arg1 = bitcast i8 %a1 to <8 x i1>
250 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
251 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
252 ret <8 x double> %res1
255 define <8 x double> @test_mm512_maskz_movddup_pd(i8 %a0, <8 x double> %a1) {
256 ; X32-LABEL: test_mm512_maskz_movddup_pd:
258 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
259 ; X32-NEXT: kmovw %eax, %k1
260 ; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
263 ; X64-LABEL: test_mm512_maskz_movddup_pd:
265 ; X64-NEXT: kmovw %edi, %k1
266 ; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
268 %arg0 = bitcast i8 %a0 to <8 x i1>
269 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
270 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
271 ret <8 x double> %res1
274 define <16 x float> @test_mm512_movehdup_ps(<16 x float> %a0) {
275 ; X32-LABEL: test_mm512_movehdup_ps:
277 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
280 ; X64-LABEL: test_mm512_movehdup_ps:
282 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
284 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
285 ret <16 x float> %res
288 define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
289 ; X32-LABEL: test_mm512_mask_movehdup_ps:
291 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
292 ; X32-NEXT: kmovw %eax, %k1
293 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
296 ; X64-LABEL: test_mm512_mask_movehdup_ps:
298 ; X64-NEXT: kmovw %edi, %k1
299 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
301 %arg1 = bitcast i16 %a1 to <16 x i1>
302 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
303 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
304 ret <16 x float> %res1
307 define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) {
308 ; X32-LABEL: test_mm512_maskz_movehdup_ps:
310 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
311 ; X32-NEXT: kmovw %eax, %k1
312 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
315 ; X64-LABEL: test_mm512_maskz_movehdup_ps:
317 ; X64-NEXT: kmovw %edi, %k1
318 ; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
320 %arg0 = bitcast i16 %a0 to <16 x i1>
321 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
322 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
323 ret <16 x float> %res1
326 define <16 x float> @test_mm512_moveldup_ps(<16 x float> %a0) {
327 ; X32-LABEL: test_mm512_moveldup_ps:
329 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
332 ; X64-LABEL: test_mm512_moveldup_ps:
334 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
336 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
337 ret <16 x float> %res
340 define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
341 ; X32-LABEL: test_mm512_mask_moveldup_ps:
343 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
344 ; X32-NEXT: kmovw %eax, %k1
345 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
348 ; X64-LABEL: test_mm512_mask_moveldup_ps:
350 ; X64-NEXT: kmovw %edi, %k1
351 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
353 %arg1 = bitcast i16 %a1 to <16 x i1>
354 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
355 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
356 ret <16 x float> %res1
359 define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) {
360 ; X32-LABEL: test_mm512_maskz_moveldup_ps:
362 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
363 ; X32-NEXT: kmovw %eax, %k1
364 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
367 ; X64-LABEL: test_mm512_maskz_moveldup_ps:
369 ; X64-NEXT: kmovw %edi, %k1
370 ; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
372 %arg0 = bitcast i16 %a0 to <16 x i1>
373 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
374 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
375 ret <16 x float> %res1
378 define <8 x double> @test_mm512_permute_pd(<8 x double> %a0) {
379 ; X32-LABEL: test_mm512_permute_pd:
381 ; X32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6]
384 ; X64-LABEL: test_mm512_permute_pd:
386 ; X64-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6]
388 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
389 ret <8 x double> %res
392 define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
393 ; X32-LABEL: test_mm512_mask_permute_pd:
395 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
396 ; X32-NEXT: kmovw %eax, %k1
397 ; X32-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
400 ; X64-LABEL: test_mm512_mask_permute_pd:
402 ; X64-NEXT: kmovw %edi, %k1
403 ; X64-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
405 %arg1 = bitcast i8 %a1 to <8 x i1>
406 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
407 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
408 ret <8 x double> %res1
411 define <8 x double> @test_mm512_maskz_permute_pd(i8 %a0, <8 x double> %a1) {
412 ; X32-LABEL: test_mm512_maskz_permute_pd:
414 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
415 ; X32-NEXT: kmovw %eax, %k1
416 ; X32-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
419 ; X64-LABEL: test_mm512_maskz_permute_pd:
421 ; X64-NEXT: kmovw %edi, %k1
422 ; X64-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
424 %arg0 = bitcast i8 %a0 to <8 x i1>
425 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
426 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
427 ret <8 x double> %res1
430 define <16 x float> @test_mm512_permute_ps(<16 x float> %a0) {
431 ; X32-LABEL: test_mm512_permute_ps:
433 ; X32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
436 ; X64-LABEL: test_mm512_permute_ps:
438 ; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
440 %res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
441 ret <16 x float> %res
444 define <16 x float> @test_mm512_mask_permute_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
445 ; X32-LABEL: test_mm512_mask_permute_ps:
447 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
448 ; X32-NEXT: kmovw %eax, %k1
449 ; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
452 ; X64-LABEL: test_mm512_mask_permute_ps:
454 ; X64-NEXT: kmovw %edi, %k1
455 ; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
457 %arg1 = bitcast i16 %a1 to <16 x i1>
458 %res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
459 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
460 ret <16 x float> %res1
463 define <16 x float> @test_mm512_maskz_permute_ps(i16 %a0, <16 x float> %a1) {
464 ; X32-LABEL: test_mm512_maskz_permute_ps:
466 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
467 ; X32-NEXT: kmovw %eax, %k1
468 ; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
471 ; X64-LABEL: test_mm512_maskz_permute_ps:
473 ; X64-NEXT: kmovw %edi, %k1
474 ; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
476 %arg0 = bitcast i16 %a0 to <16 x i1>
477 %res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
478 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
479 ret <16 x float> %res1
482 define <8 x i64> @test_mm512_permutex_epi64(<8 x i64> %a0) {
483 ; X32-LABEL: test_mm512_permutex_epi64:
485 ; X32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
488 ; X64-LABEL: test_mm512_permutex_epi64:
490 ; X64-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
492 %res = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
496 define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2) {
497 ; X32-LABEL: test_mm512_mask_permutex_epi64:
499 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
500 ; X32-NEXT: kmovw %eax, %k1
501 ; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
504 ; X64-LABEL: test_mm512_mask_permutex_epi64:
506 ; X64-NEXT: kmovw %edi, %k1
507 ; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
509 %arg1 = bitcast i8 %a1 to <8 x i1>
510 %res0 = shufflevector <8 x i64> %a2, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
511 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
515 define <8 x i64> @test_mm512_maskz_permutex_epi64(i8 %a0, <8 x i64> %a1) {
516 ; X32-LABEL: test_mm512_maskz_permutex_epi64:
518 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
519 ; X32-NEXT: kmovw %eax, %k1
520 ; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
523 ; X64-LABEL: test_mm512_maskz_permutex_epi64:
525 ; X64-NEXT: kmovw %edi, %k1
526 ; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
528 %arg0 = bitcast i8 %a0 to <8 x i1>
529 %res0 = shufflevector <8 x i64> %a1, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
530 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
534 define <8 x double> @test_mm512_permutex_pd(<8 x double> %a0) {
535 ; X32-LABEL: test_mm512_permutex_pd:
537 ; X32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
540 ; X64-LABEL: test_mm512_permutex_pd:
542 ; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
544 %res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
545 ret <8 x double> %res
548 define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
549 ; X32-LABEL: test_mm512_mask_permutex_pd:
551 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
552 ; X32-NEXT: kmovw %eax, %k1
553 ; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
556 ; X64-LABEL: test_mm512_mask_permutex_pd:
558 ; X64-NEXT: kmovw %edi, %k1
559 ; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
561 %arg1 = bitcast i8 %a1 to <8 x i1>
562 %res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
563 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
564 ret <8 x double> %res1
567 define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) {
568 ; X32-LABEL: test_mm512_maskz_permutex_pd:
570 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
571 ; X32-NEXT: kmovw %eax, %k1
572 ; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
575 ; X64-LABEL: test_mm512_maskz_permutex_pd:
577 ; X64-NEXT: kmovw %edi, %k1
578 ; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
580 %arg0 = bitcast i8 %a0 to <8 x i1>
581 %res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
582 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
583 ret <8 x double> %res1
586 define <8 x i64> @test_mm512_shuffle_epi32(<8 x i64> %a0) {
587 ; X32-LABEL: test_mm512_shuffle_epi32:
589 ; X32-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
592 ; X64-LABEL: test_mm512_shuffle_epi32:
594 ; X64-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
596 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
597 %res0 = shufflevector <16 x i32> %arg0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
598 %res1 = bitcast <16 x i32> %res0 to <8 x i64>
602 define <8 x i64> @test_mm512_mask_shuffle_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2) {
603 ; X32-LABEL: test_mm512_mask_shuffle_epi32:
605 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
606 ; X32-NEXT: kmovw %eax, %k1
607 ; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
610 ; X64-LABEL: test_mm512_mask_shuffle_epi32:
612 ; X64-NEXT: kmovw %edi, %k1
613 ; X64-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
615 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
616 %arg1 = bitcast i16 %a1 to <16 x i1>
617 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
618 %res0 = shufflevector <16 x i32> %arg2, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
619 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
620 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
624 define <8 x i64> @test_mm512_maskz_shuffle_epi32(i16 %a0, <8 x i64> %a1) {
625 ; X32-LABEL: test_mm512_maskz_shuffle_epi32:
627 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
628 ; X32-NEXT: kmovw %eax, %k1
629 ; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
632 ; X64-LABEL: test_mm512_maskz_shuffle_epi32:
634 ; X64-NEXT: kmovw %edi, %k1
635 ; X64-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
637 %arg0 = bitcast i16 %a0 to <16 x i1>
638 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
639 %res0 = shufflevector <16 x i32> %arg1, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
640 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
641 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
645 define <8 x double> @test_mm512_shuffle_pd(<8 x double> %a0, <8 x double> %a1) {
646 ; X32-LABEL: test_mm512_shuffle_pd:
648 ; X32-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
651 ; X64-LABEL: test_mm512_shuffle_pd:
653 ; X64-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
655 %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
656 ret <8 x double> %res
659 define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
660 ; X32-LABEL: test_mm512_mask_shuffle_pd:
662 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
663 ; X32-NEXT: kmovw %eax, %k1
664 ; X32-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
667 ; X64-LABEL: test_mm512_mask_shuffle_pd:
669 ; X64-NEXT: kmovw %edi, %k1
670 ; X64-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
672 %arg1 = bitcast i8 %a1 to <8 x i1>
673 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
674 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
675 ret <8 x double> %res1
678 define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
679 ; X32-LABEL: test_mm512_maskz_shuffle_pd:
681 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
682 ; X32-NEXT: kmovw %eax, %k1
683 ; X32-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
686 ; X64-LABEL: test_mm512_maskz_shuffle_pd:
688 ; X64-NEXT: kmovw %edi, %k1
689 ; X64-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
691 %arg0 = bitcast i8 %a0 to <8 x i1>
692 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
693 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
694 ret <8 x double> %res1
697 define <8 x i64> @test_mm512_unpackhi_epi32(<8 x i64> %a0, <8 x i64> %a1) {
698 ; X32-LABEL: test_mm512_unpackhi_epi32:
700 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
703 ; X64-LABEL: test_mm512_unpackhi_epi32:
705 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
707 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
708 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
709 %res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
710 %res1 = bitcast <16 x i32> %res0 to <8 x i64>
714 define <8 x i64> @test_mm512_mask_unpackhi_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
715 ; X32-LABEL: test_mm512_mask_unpackhi_epi32:
717 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
718 ; X32-NEXT: kmovw %eax, %k1
719 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
722 ; X64-LABEL: test_mm512_mask_unpackhi_epi32:
724 ; X64-NEXT: kmovw %edi, %k1
725 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
727 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
728 %arg1 = bitcast i16 %a1 to <16 x i1>
729 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
730 %arg3 = bitcast <8 x i64> %a3 to <16 x i32>
731 %res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
732 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
733 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
737 define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
738 ; X32-LABEL: test_mm512_maskz_unpackhi_epi32:
740 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
741 ; X32-NEXT: kmovw %eax, %k1
742 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
745 ; X64-LABEL: test_mm512_maskz_unpackhi_epi32:
747 ; X64-NEXT: kmovw %edi, %k1
748 ; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
750 %arg0 = bitcast i16 %a0 to <16 x i1>
751 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
752 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
753 %res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
754 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
755 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
759 define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) {
760 ; X32-LABEL: test_mm512_unpackhi_epi64:
762 ; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
765 ; X64-LABEL: test_mm512_unpackhi_epi64:
767 ; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
769 %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
773 define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
774 ; X32-LABEL: test_mm512_mask_unpackhi_epi64:
776 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
777 ; X32-NEXT: kmovw %eax, %k1
778 ; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
781 ; X64-LABEL: test_mm512_mask_unpackhi_epi64:
783 ; X64-NEXT: kmovw %edi, %k1
784 ; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
786 %arg1 = bitcast i8 %a1 to <8 x i1>
787 %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
788 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
792 define <8 x i64> @test_mm512_maskz_unpackhi_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
793 ; X32-LABEL: test_mm512_maskz_unpackhi_epi64:
795 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
796 ; X32-NEXT: kmovw %eax, %k1
797 ; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
800 ; X64-LABEL: test_mm512_maskz_unpackhi_epi64:
802 ; X64-NEXT: kmovw %edi, %k1
803 ; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
805 %arg0 = bitcast i8 %a0 to <8 x i1>
806 %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
807 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
811 define <8 x double> @test_mm512_unpackhi_pd(<8 x double> %a0, <8 x double> %a1) {
812 ; X32-LABEL: test_mm512_unpackhi_pd:
814 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
817 ; X64-LABEL: test_mm512_unpackhi_pd:
819 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
821 %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
822 ret <8 x double> %res
825 define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
826 ; X32-LABEL: test_mm512_mask_unpackhi_pd:
828 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
829 ; X32-NEXT: kmovw %eax, %k1
830 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
833 ; X64-LABEL: test_mm512_mask_unpackhi_pd:
835 ; X64-NEXT: kmovw %edi, %k1
836 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
838 %arg1 = bitcast i8 %a1 to <8 x i1>
839 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
840 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
841 ret <8 x double> %res1
844 define <8 x double> @test_mm512_maskz_unpackhi_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
845 ; X32-LABEL: test_mm512_maskz_unpackhi_pd:
847 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
848 ; X32-NEXT: kmovw %eax, %k1
849 ; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
852 ; X64-LABEL: test_mm512_maskz_unpackhi_pd:
854 ; X64-NEXT: kmovw %edi, %k1
855 ; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
857 %arg0 = bitcast i8 %a0 to <8 x i1>
858 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
859 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
860 ret <8 x double> %res1
863 define <16 x float> @test_mm512_unpackhi_ps(<16 x float> %a0, <16 x float> %a1) {
864 ; X32-LABEL: test_mm512_unpackhi_ps:
866 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
869 ; X64-LABEL: test_mm512_unpackhi_ps:
871 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
873 %res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
874 ret <16 x float> %res
877 define <16 x float> @test_mm512_mask_unpackhi_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
878 ; X32-LABEL: test_mm512_mask_unpackhi_ps:
880 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
881 ; X32-NEXT: kmovw %eax, %k1
882 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
885 ; X64-LABEL: test_mm512_mask_unpackhi_ps:
887 ; X64-NEXT: kmovw %edi, %k1
888 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
890 %arg1 = bitcast i16 %a1 to <16 x i1>
891 %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
892 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
893 ret <16 x float> %res1
896 define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
897 ; X32-LABEL: test_mm512_maskz_unpackhi_ps:
899 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
900 ; X32-NEXT: kmovw %eax, %k1
901 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
904 ; X64-LABEL: test_mm512_maskz_unpackhi_ps:
906 ; X64-NEXT: kmovw %edi, %k1
907 ; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
909 %arg0 = bitcast i16 %a0 to <16 x i1>
910 %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
911 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
912 ret <16 x float> %res1
915 define <8 x i64> @test_mm512_unpacklo_epi32(<8 x i64> %a0, <8 x i64> %a1) {
916 ; X32-LABEL: test_mm512_unpacklo_epi32:
918 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
921 ; X64-LABEL: test_mm512_unpacklo_epi32:
923 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
925 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
926 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
927 %res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
928 %res1 = bitcast <16 x i32> %res0 to <8 x i64>
932 define <8 x i64> @test_mm512_mask_unpacklo_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
933 ; X32-LABEL: test_mm512_mask_unpacklo_epi32:
935 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
936 ; X32-NEXT: kmovw %eax, %k1
937 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
940 ; X64-LABEL: test_mm512_mask_unpacklo_epi32:
942 ; X64-NEXT: kmovw %edi, %k1
943 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
945 %arg0 = bitcast <8 x i64> %a0 to <16 x i32>
946 %arg1 = bitcast i16 %a1 to <16 x i1>
947 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
948 %arg3 = bitcast <8 x i64> %a3 to <16 x i32>
949 %res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
950 %res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
951 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
955 define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
956 ; X32-LABEL: test_mm512_maskz_unpacklo_epi32:
958 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
959 ; X32-NEXT: kmovw %eax, %k1
960 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
963 ; X64-LABEL: test_mm512_maskz_unpacklo_epi32:
965 ; X64-NEXT: kmovw %edi, %k1
966 ; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
968 %arg0 = bitcast i16 %a0 to <16 x i1>
969 %arg1 = bitcast <8 x i64> %a1 to <16 x i32>
970 %arg2 = bitcast <8 x i64> %a2 to <16 x i32>
971 %res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
972 %res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
973 %res2 = bitcast <16 x i32> %res1 to <8 x i64>
977 define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) {
978 ; X32-LABEL: test_mm512_unpacklo_epi64:
980 ; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
983 ; X64-LABEL: test_mm512_unpacklo_epi64:
985 ; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
987 %res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
991 define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
992 ; X32-LABEL: test_mm512_mask_unpacklo_epi64:
994 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
995 ; X32-NEXT: kmovw %eax, %k1
996 ; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
999 ; X64-LABEL: test_mm512_mask_unpacklo_epi64:
1001 ; X64-NEXT: kmovw %edi, %k1
1002 ; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1004 %arg1 = bitcast i8 %a1 to <8 x i1>
1005 %res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1006 %res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
1010 define <8 x i64> @test_mm512_maskz_unpacklo_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
1011 ; X32-LABEL: test_mm512_maskz_unpacklo_epi64:
1013 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1014 ; X32-NEXT: kmovw %eax, %k1
1015 ; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1018 ; X64-LABEL: test_mm512_maskz_unpacklo_epi64:
1020 ; X64-NEXT: kmovw %edi, %k1
1021 ; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1023 %arg0 = bitcast i8 %a0 to <8 x i1>
1024 %res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1025 %res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
1029 define <8 x double> @test_mm512_unpacklo_pd(<8 x double> %a0, <8 x double> %a1) {
1030 ; X32-LABEL: test_mm512_unpacklo_pd:
1032 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1035 ; X64-LABEL: test_mm512_unpacklo_pd:
1037 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1039 %res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1040 ret <8 x double> %res
1043 define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
1044 ; X32-LABEL: test_mm512_mask_unpacklo_pd:
1046 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1047 ; X32-NEXT: kmovw %eax, %k1
1048 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1051 ; X64-LABEL: test_mm512_mask_unpacklo_pd:
1053 ; X64-NEXT: kmovw %edi, %k1
1054 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
1056 %arg1 = bitcast i8 %a1 to <8 x i1>
1057 %res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1058 %res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
1059 ret <8 x double> %res1
1062 define <8 x double> @test_mm512_maskz_unpacklo_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
1063 ; X32-LABEL: test_mm512_maskz_unpacklo_pd:
1065 ; X32-NEXT: movb {{[0-9]+}}(%esp), %al
1066 ; X32-NEXT: kmovw %eax, %k1
1067 ; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1070 ; X64-LABEL: test_mm512_maskz_unpacklo_pd:
1072 ; X64-NEXT: kmovw %edi, %k1
1073 ; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1075 %arg0 = bitcast i8 %a0 to <8 x i1>
1076 %res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1077 %res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
1078 ret <8 x double> %res1
1081 define <16 x float> @test_mm512_unpacklo_ps(<16 x float> %a0, <16 x float> %a1) {
1082 ; X32-LABEL: test_mm512_unpacklo_ps:
1084 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1087 ; X64-LABEL: test_mm512_unpacklo_ps:
1089 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1091 %res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1092 ret <16 x float> %res
1095 define <16 x float> @test_mm512_mask_unpacklo_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
1096 ; X32-LABEL: test_mm512_mask_unpacklo_ps:
1098 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1099 ; X32-NEXT: kmovw %eax, %k1
1100 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
1103 ; X64-LABEL: test_mm512_mask_unpacklo_ps:
1105 ; X64-NEXT: kmovw %edi, %k1
1106 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
1108 %arg1 = bitcast i16 %a1 to <16 x i1>
1109 %res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1110 %res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
1111 ret <16 x float> %res1
1114 define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
1115 ; X32-LABEL: test_mm512_maskz_unpacklo_ps:
1117 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1118 ; X32-NEXT: kmovw %eax, %k1
1119 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1122 ; X64-LABEL: test_mm512_maskz_unpacklo_ps:
1124 ; X64-NEXT: kmovw %edi, %k1
1125 ; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1127 %arg0 = bitcast i16 %a0 to <16 x i1>
1128 %res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
1129 %res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
1130 ret <16 x float> %res1