1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
5 define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
6 ; SKX-LABEL: extract_subvector128_v32i16:
8 ; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
10 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
14 define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind {
15 ; SKX-LABEL: extract_subvector128_v32i16_first_element:
17 ; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
19 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
23 define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
24 ; SKX-LABEL: extract_subvector128_v64i8:
26 ; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
28 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
32 define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind {
33 ; SKX-LABEL: extract_subvector128_v64i8_first_element:
35 ; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
37 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
42 define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind {
43 ; SKX-LABEL: extract_subvector256_v32i16:
45 ; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
47 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
51 define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind {
52 ; SKX-LABEL: extract_subvector256_v64i8:
54 ; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
56 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
60 define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
61 ; SKX-LABEL: extract_subvector256_v8f64_store:
62 ; SKX: ## BB#0: ## %entry
63 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
66 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
67 %1 = bitcast double* %addr to <2 x double>*
68 store <2 x double> %0, <2 x double>* %1, align 1
72 define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
73 ; SKX-LABEL: extract_subvector256_v8f32_store:
74 ; SKX: ## BB#0: ## %entry
75 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
78 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
79 %1 = bitcast float* %addr to <4 x float>*
80 store <4 x float> %0, <4 x float>* %1, align 1
84 define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
85 ; SKX-LABEL: extract_subvector256_v4i64_store:
86 ; SKX: ## BB#0: ## %entry
87 ; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
90 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
91 %1 = bitcast i64* %addr to <2 x i64>*
92 store <2 x i64> %0, <2 x i64>* %1, align 1
96 define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
97 ; SKX-LABEL: extract_subvector256_v8i32_store:
98 ; SKX: ## BB#0: ## %entry
99 ; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
102 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
103 %1 = bitcast i32* %addr to <4 x i32>*
104 store <4 x i32> %0, <4 x i32>* %1, align 1
108 define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
109 ; SKX-LABEL: extract_subvector256_v16i16_store:
110 ; SKX: ## BB#0: ## %entry
111 ; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
114 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
115 %1 = bitcast i16* %addr to <8 x i16>*
116 store <8 x i16> %0, <8 x i16>* %1, align 1
120 define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
121 ; SKX-LABEL: extract_subvector256_v32i8_store:
122 ; SKX: ## BB#0: ## %entry
123 ; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi)
126 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
127 %1 = bitcast i8* %addr to <16 x i8>*
128 store <16 x i8> %0, <16 x i8>* %1, align 1
132 define void @extract_subvector256_v4f64_store_lo(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
133 ; SKX-LABEL: extract_subvector256_v4f64_store_lo:
134 ; SKX: ## BB#0: ## %entry
135 ; SKX-NEXT: vmovups %xmm0, (%rdi)
138 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
139 %1 = bitcast double* %addr to <2 x double>*
140 store <2 x double> %0, <2 x double>* %1, align 1
144 define void @extract_subvector256_v4f64_store_lo_align_16(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
145 ; SKX-LABEL: extract_subvector256_v4f64_store_lo_align_16:
146 ; SKX: ## BB#0: ## %entry
147 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
150 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
151 %1 = bitcast double* %addr to <2 x double>*
152 store <2 x double> %0, <2 x double>* %1, align 16
156 define void @extract_subvector256_v4f32_store_lo(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
157 ; SKX-LABEL: extract_subvector256_v4f32_store_lo:
158 ; SKX: ## BB#0: ## %entry
159 ; SKX-NEXT: vmovups %xmm0, (%rdi)
162 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163 %1 = bitcast float* %addr to <4 x float>*
164 store <4 x float> %0, <4 x float>* %1, align 1
168 define void @extract_subvector256_v4f32_store_lo_align_16(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
169 ; SKX-LABEL: extract_subvector256_v4f32_store_lo_align_16:
170 ; SKX: ## BB#0: ## %entry
171 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
174 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
175 %1 = bitcast float* %addr to <4 x float>*
176 store <4 x float> %0, <4 x float>* %1, align 16
180 define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
181 ; SKX-LABEL: extract_subvector256_v2i64_store_lo:
182 ; SKX: ## BB#0: ## %entry
183 ; SKX-NEXT: vmovups %xmm0, (%rdi)
186 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
187 %1 = bitcast i64* %addr to <2 x i64>*
188 store <2 x i64> %0, <2 x i64>* %1, align 1
192 define void @extract_subvector256_v2i64_store_lo_align_16(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
193 ; SKX-LABEL: extract_subvector256_v2i64_store_lo_align_16:
194 ; SKX: ## BB#0: ## %entry
195 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
198 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
199 %1 = bitcast i64* %addr to <2 x i64>*
200 store <2 x i64> %0, <2 x i64>* %1, align 16
204 define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
205 ; SKX-LABEL: extract_subvector256_v4i32_store_lo:
206 ; SKX: ## BB#0: ## %entry
207 ; SKX-NEXT: vmovups %xmm0, (%rdi)
210 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
211 %1 = bitcast i32* %addr to <4 x i32>*
212 store <4 x i32> %0, <4 x i32>* %1, align 1
216 define void @extract_subvector256_v4i32_store_lo_align_16(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
217 ; SKX-LABEL: extract_subvector256_v4i32_store_lo_align_16:
218 ; SKX: ## BB#0: ## %entry
219 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
222 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
223 %1 = bitcast i32* %addr to <4 x i32>*
224 store <4 x i32> %0, <4 x i32>* %1, align 16
228 define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
229 ; SKX-LABEL: extract_subvector256_v8i16_store_lo:
230 ; SKX: ## BB#0: ## %entry
231 ; SKX-NEXT: vmovups %xmm0, (%rdi)
234 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
235 %1 = bitcast i16* %addr to <8 x i16>*
236 store <8 x i16> %0, <8 x i16>* %1, align 1
240 define void @extract_subvector256_v8i16_store_lo_align_16(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
241 ; SKX-LABEL: extract_subvector256_v8i16_store_lo_align_16:
242 ; SKX: ## BB#0: ## %entry
243 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
246 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
247 %1 = bitcast i16* %addr to <8 x i16>*
248 store <8 x i16> %0, <8 x i16>* %1, align 16
252 define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
253 ; SKX-LABEL: extract_subvector256_v16i8_store_lo:
254 ; SKX: ## BB#0: ## %entry
255 ; SKX-NEXT: vmovups %xmm0, (%rdi)
258 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
259 %1 = bitcast i8* %addr to <16 x i8>*
260 store <16 x i8> %0, <16 x i8>* %1, align 1
264 define void @extract_subvector256_v16i8_store_lo_align_16(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
265 ; SKX-LABEL: extract_subvector256_v16i8_store_lo_align_16:
266 ; SKX: ## BB#0: ## %entry
267 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
270 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
271 %1 = bitcast i8* %addr to <16 x i8>*
272 store <16 x i8> %0, <16 x i8>* %1, align 16
276 define void @extract_subvector512_v2f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
277 ; SKX-LABEL: extract_subvector512_v2f64_store_lo:
278 ; SKX: ## BB#0: ## %entry
279 ; SKX-NEXT: vmovups %xmm0, (%rdi)
282 %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
283 %1 = bitcast double* %addr to <2 x double>*
284 store <2 x double> %0, <2 x double>* %1, align 1
288 define void @extract_subvector512_v2f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
289 ; SKX-LABEL: extract_subvector512_v2f64_store_lo_align_16:
290 ; SKX: ## BB#0: ## %entry
291 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
294 %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
295 %1 = bitcast double* %addr to <2 x double>*
296 store <2 x double> %0, <2 x double>* %1, align 16
300 define void @extract_subvector512_v4f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
301 ; SKX-LABEL: extract_subvector512_v4f32_store_lo:
302 ; SKX: ## BB#0: ## %entry
303 ; SKX-NEXT: vmovups %xmm0, (%rdi)
306 %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
307 %1 = bitcast float* %addr to <4 x float>*
308 store <4 x float> %0, <4 x float>* %1, align 1
312 define void @extract_subvector512_v4f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
313 ; SKX-LABEL: extract_subvector512_v4f32_store_lo_align_16:
314 ; SKX: ## BB#0: ## %entry
315 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
318 %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
319 %1 = bitcast float* %addr to <4 x float>*
320 store <4 x float> %0, <4 x float>* %1, align 16
324 define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
325 ; SKX-LABEL: extract_subvector512_v2i64_store_lo:
326 ; SKX: ## BB#0: ## %entry
327 ; SKX-NEXT: vmovups %xmm0, (%rdi)
330 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
331 %1 = bitcast i64* %addr to <2 x i64>*
332 store <2 x i64> %0, <2 x i64>* %1, align 1
336 define void @extract_subvector512_v2i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
337 ; SKX-LABEL: extract_subvector512_v2i64_store_lo_align_16:
338 ; SKX: ## BB#0: ## %entry
339 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
342 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
343 %1 = bitcast i64* %addr to <2 x i64>*
344 store <2 x i64> %0, <2 x i64>* %1, align 16
348 define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
349 ; SKX-LABEL: extract_subvector512_v4i32_store_lo:
350 ; SKX: ## BB#0: ## %entry
351 ; SKX-NEXT: vmovups %xmm0, (%rdi)
354 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
355 %1 = bitcast i32* %addr to <4 x i32>*
356 store <4 x i32> %0, <4 x i32>* %1, align 1
360 define void @extract_subvector512_v4i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
361 ; SKX-LABEL: extract_subvector512_v4i32_store_lo_align_16:
362 ; SKX: ## BB#0: ## %entry
363 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
366 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
367 %1 = bitcast i32* %addr to <4 x i32>*
368 store <4 x i32> %0, <4 x i32>* %1, align 16
372 define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
373 ; SKX-LABEL: extract_subvector512_v8i16_store_lo:
374 ; SKX: ## BB#0: ## %entry
375 ; SKX-NEXT: vmovups %xmm0, (%rdi)
378 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
379 %1 = bitcast i16* %addr to <8 x i16>*
380 store <8 x i16> %0, <8 x i16>* %1, align 1
384 define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
385 ; SKX-LABEL: extract_subvector512_v16i8_store_lo:
386 ; SKX: ## BB#0: ## %entry
387 ; SKX-NEXT: vmovups %xmm0, (%rdi)
390 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
391 %1 = bitcast i8* %addr to <16 x i8>*
392 store <16 x i8> %0, <16 x i8>* %1, align 1
396 define void @extract_subvector512_v16i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
397 ; SKX-LABEL: extract_subvector512_v16i8_store_lo_align_16:
398 ; SKX: ## BB#0: ## %entry
399 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
402 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
403 %1 = bitcast i8* %addr to <16 x i8>*
404 store <16 x i8> %0, <16 x i8>* %1, align 16
408 define void @extract_subvector512_v4f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
409 ; SKX-LABEL: extract_subvector512_v4f64_store_lo:
410 ; SKX: ## BB#0: ## %entry
411 ; SKX-NEXT: vmovups %ymm0, (%rdi)
414 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
415 %1 = bitcast double* %addr to <4 x double>*
416 store <4 x double> %0, <4 x double>* %1, align 1
420 define void @extract_subvector512_v4f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
421 ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16:
422 ; SKX: ## BB#0: ## %entry
423 ; SKX-NEXT: vmovups %ymm0, (%rdi)
426 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
427 %1 = bitcast double* %addr to <4 x double>*
428 store <4 x double> %0, <4 x double>* %1, align 16
432 define void @extract_subvector512_v4f64_store_lo_align_32(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
433 ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_32:
434 ; SKX: ## BB#0: ## %entry
435 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
438 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
439 %1 = bitcast double* %addr to <4 x double>*
440 store <4 x double> %0, <4 x double>* %1, align 32
444 define void @extract_subvector512_v8f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
445 ; SKX-LABEL: extract_subvector512_v8f32_store_lo:
446 ; SKX: ## BB#0: ## %entry
447 ; SKX-NEXT: vmovups %ymm0, (%rdi)
450 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
451 %1 = bitcast float* %addr to <8 x float>*
452 store <8 x float> %0, <8 x float>* %1, align 1
456 define void @extract_subvector512_v8f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
457 ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16:
458 ; SKX: ## BB#0: ## %entry
459 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
462 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
463 %1 = bitcast float* %addr to <8 x float>*
464 store <8 x float> %0, <8 x float>* %1, align 16
468 define void @extract_subvector512_v8f32_store_lo_align_32(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
469 ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_32:
470 ; SKX: ## BB#0: ## %entry
471 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
474 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
475 %1 = bitcast float* %addr to <8 x float>*
476 store <8 x float> %0, <8 x float>* %1, align 32
480 define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
481 ; SKX-LABEL: extract_subvector512_v4i64_store_lo:
482 ; SKX: ## BB#0: ## %entry
483 ; SKX-NEXT: vmovups %ymm0, (%rdi)
486 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
487 %1 = bitcast i64* %addr to <4 x i64>*
488 store <4 x i64> %0, <4 x i64>* %1, align 1
492 define void @extract_subvector512_v4i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
493 ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16:
494 ; SKX: ## BB#0: ## %entry
495 ; SKX-NEXT: vmovups %ymm0, (%rdi)
498 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
499 %1 = bitcast i64* %addr to <4 x i64>*
500 store <4 x i64> %0, <4 x i64>* %1, align 16
504 define void @extract_subvector512_v4i64_store_lo_align_32(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
505 ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_32:
506 ; SKX: ## BB#0: ## %entry
507 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
510 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
511 %1 = bitcast i64* %addr to <4 x i64>*
512 store <4 x i64> %0, <4 x i64>* %1, align 32
516 define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
517 ; SKX-LABEL: extract_subvector512_v8i32_store_lo:
518 ; SKX: ## BB#0: ## %entry
519 ; SKX-NEXT: vmovups %ymm0, (%rdi)
522 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
523 %1 = bitcast i32* %addr to <8 x i32>*
524 store <8 x i32> %0, <8 x i32>* %1, align 1
528 define void @extract_subvector512_v8i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
529 ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16:
530 ; SKX: ## BB#0: ## %entry
531 ; SKX-NEXT: vmovups %ymm0, (%rdi)
534 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
535 %1 = bitcast i32* %addr to <8 x i32>*
536 store <8 x i32> %0, <8 x i32>* %1, align 16
540 define void @extract_subvector512_v8i32_store_lo_align_32(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
541 ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_32:
542 ; SKX: ## BB#0: ## %entry
543 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
546 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
547 %1 = bitcast i32* %addr to <8 x i32>*
548 store <8 x i32> %0, <8 x i32>* %1, align 32
552 define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
553 ; SKX-LABEL: extract_subvector512_v16i16_store_lo:
554 ; SKX: ## BB#0: ## %entry
555 ; SKX-NEXT: vmovups %ymm0, (%rdi)
558 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
559 %1 = bitcast i16* %addr to <16 x i16>*
560 store <16 x i16> %0, <16 x i16>* %1, align 1
564 define void @extract_subvector512_v16i16_store_lo_align_16(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
565 ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16:
566 ; SKX: ## BB#0: ## %entry
567 ; SKX-NEXT: vmovups %ymm0, (%rdi)
570 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
571 %1 = bitcast i16* %addr to <16 x i16>*
572 store <16 x i16> %0, <16 x i16>* %1, align 16
576 define void @extract_subvector512_v16i16_store_lo_align_32(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
577 ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_32:
578 ; SKX: ## BB#0: ## %entry
579 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
582 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
583 %1 = bitcast i16* %addr to <16 x i16>*
584 store <16 x i16> %0, <16 x i16>* %1, align 32
588 define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
589 ; SKX-LABEL: extract_subvector512_v32i8_store_lo:
590 ; SKX: ## BB#0: ## %entry
591 ; SKX-NEXT: vmovups %ymm0, (%rdi)
594 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
595 %1 = bitcast i8* %addr to <32 x i8>*
596 store <32 x i8> %0, <32 x i8>* %1, align 1
600 define void @extract_subvector512_v32i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
601 ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16:
602 ; SKX: ## BB#0: ## %entry
603 ; SKX-NEXT: vmovups %ymm0, (%rdi)
606 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
607 %1 = bitcast i8* %addr to <32 x i8>*
608 store <32 x i8> %0, <32 x i8>* %1, align 16
612 define void @extract_subvector512_v32i8_store_lo_align_32(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
613 ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_32:
614 ; SKX: ## BB#0: ## %entry
615 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
618 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
619 %1 = bitcast i8* %addr to <32 x i8>*
620 store <32 x i8> %0, <32 x i8>* %1, align 32
624 define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) {
625 ; SKX-LABEL: test_mm512_mask_extractf64x4_pd:
626 ; SKX: ## BB#0: ## %entry
627 ; SKX-NEXT: kmovb %edi, %k1
628 ; SKX-NEXT: vextractf64x4 $1, %zmm1, %ymm0 {%k1}
631 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
632 %0 = bitcast i8 %__U to <8 x i1>
633 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
634 %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> %__W
638 define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) {
639 ; SKX-LABEL: test_mm512_maskz_extractf64x4_pd:
640 ; SKX: ## BB#0: ## %entry
641 ; SKX-NEXT: kmovb %edi, %k1
642 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
645 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
646 %0 = bitcast i8 %__U to <8 x i1>
647 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
648 %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> zeroinitializer
652 define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) {
653 ; SKX-LABEL: test_mm512_mask_extractf32x4_ps:
654 ; SKX: ## BB#0: ## %entry
655 ; SKX-NEXT: kmovb %edi, %k1
656 ; SKX-NEXT: vextractf32x4 $1, %zmm1, %xmm0 {%k1}
659 %0 = bitcast <8 x double> %__A to <16 x float>
660 %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
661 %1 = bitcast i8 %__U to <8 x i1>
662 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
663 %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
667 define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) {
668 ; SKX-LABEL: test_mm512_maskz_extractf32x4_ps:
669 ; SKX: ## BB#0: ## %entry
670 ; SKX-NEXT: kmovb %edi, %k1
671 ; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
674 %0 = bitcast <8 x double> %__A to <16 x float>
675 %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
676 %1 = bitcast i8 %__U to <8 x i1>
677 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
678 %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
682 define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) {
683 ; SKX-LABEL: test_mm256_mask_extractf64x2_pd:
684 ; SKX: ## BB#0: ## %entry
685 ; SKX-NEXT: kmovb %edi, %k1
686 ; SKX-NEXT: vextractf64x2 $1, %ymm1, %xmm0 {%k1}
689 %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
690 %0 = bitcast i8 %__U to <8 x i1>
691 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
692 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
696 define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) {
697 ; SKX-LABEL: test_mm256_maskz_extractf64x2_pd:
698 ; SKX: ## BB#0: ## %entry
699 ; SKX-NEXT: kmovb %edi, %k1
700 ; SKX-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
703 %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
704 %0 = bitcast i8 %__U to <8 x i1>
705 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
706 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer
710 define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
711 ; SKX-LABEL: test_mm256_mask_extracti64x2_epi64:
712 ; SKX: ## BB#0: ## %entry
713 ; SKX-NEXT: kmovb %edi, %k1
714 ; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm0 {%k1}
717 %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
718 %0 = bitcast i8 %__U to <8 x i1>
719 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
720 %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> %__W
724 define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) {
725 ; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64:
726 ; SKX: ## BB#0: ## %entry
727 ; SKX-NEXT: kmovb %edi, %k1
728 ; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
731 %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
732 %0 = bitcast i8 %__U to <8 x i1>
733 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
734 %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
738 define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) {
739 ; SKX-LABEL: test_mm256_mask_extractf32x4_ps:
740 ; SKX: ## BB#0: ## %entry
741 ; SKX-NEXT: kmovb %edi, %k1
742 ; SKX-NEXT: vextractf32x4 $1, %ymm1, %xmm0 {%k1}
745 %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
746 %0 = bitcast i8 %__U to <8 x i1>
747 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
748 %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
752 define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) {
753 ; SKX-LABEL: test_mm256_maskz_extractf32x4_ps:
754 ; SKX: ## BB#0: ## %entry
755 ; SKX-NEXT: kmovb %edi, %k1
756 ; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
759 %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
760 %0 = bitcast i8 %__U to <8 x i1>
761 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
762 %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
766 define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
767 ; SKX-LABEL: test_mm256_mask_extracti32x4_epi32:
768 ; SKX: ## BB#0: ## %entry
769 ; SKX-NEXT: kmovb %edi, %k1
770 ; SKX-NEXT: vextracti32x4 $1, %ymm1, %xmm0 {%k1}
773 %0 = bitcast <4 x i64> %__A to <8 x i32>
774 %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
775 %1 = bitcast <2 x i64> %__W to <4 x i32>
776 %2 = bitcast i8 %__U to <8 x i1>
777 %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
778 %3 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> %1
779 %4 = bitcast <4 x i32> %3 to <2 x i64>
783 define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) {
784 ; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32:
785 ; SKX: ## BB#0: ## %entry
786 ; SKX-NEXT: kmovb %edi, %k1
787 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
790 %0 = bitcast <4 x i64> %__A to <8 x i32>
791 %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
792 %1 = bitcast i8 %__U to <8 x i1>
793 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
794 %2 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
795 %3 = bitcast <4 x i32> %2 to <2 x i64>
799 define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) {
800 ; SKX-LABEL: test_mm512_mask_extractf32x8_ps:
801 ; SKX: ## BB#0: ## %entry
802 ; SKX-NEXT: kmovb %edi, %k1
803 ; SKX-NEXT: vextractf32x8 $1, %zmm1, %ymm0 {%k1}
806 %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
807 %0 = bitcast i8 %__U to <8 x i1>
808 %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> %__W
812 define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) {
813 ; SKX-LABEL: test_mm512_maskz_extractf32x8_ps:
814 ; SKX: ## BB#0: ## %entry
815 ; SKX-NEXT: kmovb %edi, %k1
816 ; SKX-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
819 %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
820 %0 = bitcast i8 %__U to <8 x i1>
821 %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> zeroinitializer
825 define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) {
826 ; SKX-LABEL: test_mm512_mask_extractf64x2_pd:
827 ; SKX: ## BB#0: ## %entry
828 ; SKX-NEXT: kmovb %edi, %k1
829 ; SKX-NEXT: vextractf64x2 $3, %zmm1, %xmm0 {%k1}
832 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
833 %0 = bitcast i8 %__U to <8 x i1>
834 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
835 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
839 define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) {
840 ; SKX-LABEL: test_mm512_maskz_extractf64x2_pd:
841 ; SKX: ## BB#0: ## %entry
842 ; SKX-NEXT: kmovb %edi, %k1
843 ; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z}
846 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
847 %0 = bitcast i8 %__U to <8 x i1>
848 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
849 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer