1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VLDQ
10 ; 32-bit tests to make sure we're not doing anything stupid.
11 ; RUN: llc < %s -mtriple=i686-unknown-unknown
12 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
13 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
16 ; Signed Integer to Double
19 define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
20 ; SSE-LABEL: sitofp_2i64_to_2f64:
22 ; SSE-NEXT: movq %xmm0, %rax
23 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
24 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
25 ; SSE-NEXT: movq %xmm0, %rax
26 ; SSE-NEXT: xorps %xmm0, %xmm0
27 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
28 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
29 ; SSE-NEXT: movapd %xmm1, %xmm0
32 ; VEX-LABEL: sitofp_2i64_to_2f64:
34 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
35 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
36 ; VEX-NEXT: vmovq %xmm0, %rax
37 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
38 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
41 ; AVX512F-LABEL: sitofp_2i64_to_2f64:
43 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
44 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
45 ; AVX512F-NEXT: vmovq %xmm0, %rax
46 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
47 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
50 ; AVX512VL-LABEL: sitofp_2i64_to_2f64:
52 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
53 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
54 ; AVX512VL-NEXT: vmovq %xmm0, %rax
55 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
56 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
59 ; AVX512DQ-LABEL: sitofp_2i64_to_2f64:
61 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
62 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
63 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
64 ; AVX512DQ-NEXT: vzeroupper
67 ; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64:
69 ; AVX512VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
70 ; AVX512VLDQ-NEXT: retq
71 %cvt = sitofp <2 x i64> %a to <2 x double>
75 define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) {
76 ; SSE-LABEL: sitofp_2i32_to_2f64:
78 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
81 ; AVX-LABEL: sitofp_2i32_to_2f64:
83 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
85 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
86 %cvt = sitofp <2 x i32> %shuf to <2 x double>
90 define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
91 ; SSE-LABEL: sitofp_4i32_to_2f64:
93 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
96 ; AVX-LABEL: sitofp_4i32_to_2f64:
98 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
99 ; AVX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
100 ; AVX-NEXT: vzeroupper
102 %cvt = sitofp <4 x i32> %a to <4 x double>
103 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
104 ret <2 x double> %shuf
107 define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) {
108 ; SSE-LABEL: sitofp_2i16_to_2f64:
110 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
111 ; SSE-NEXT: psrad $16, %xmm0
112 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
115 ; AVX-LABEL: sitofp_2i16_to_2f64:
117 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
118 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
120 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
121 %cvt = sitofp <2 x i16> %shuf to <2 x double>
122 ret <2 x double> %cvt
125 define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
126 ; SSE-LABEL: sitofp_8i16_to_2f64:
128 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
129 ; SSE-NEXT: psrad $16, %xmm0
130 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
133 ; AVX1-LABEL: sitofp_8i16_to_2f64:
135 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
136 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
137 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
138 ; AVX1-NEXT: vzeroupper
141 ; AVX2-LABEL: sitofp_8i16_to_2f64:
143 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
144 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
145 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
146 ; AVX2-NEXT: vzeroupper
149 ; AVX512-LABEL: sitofp_8i16_to_2f64:
151 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
152 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
153 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
154 ; AVX512-NEXT: vzeroupper
156 %cvt = sitofp <8 x i16> %a to <8 x double>
157 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
158 ret <2 x double> %shuf
161 define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) {
162 ; SSE-LABEL: sitofp_2i8_to_2f64:
164 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
165 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
166 ; SSE-NEXT: psrad $24, %xmm0
167 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
170 ; AVX-LABEL: sitofp_2i8_to_2f64:
172 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
173 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
175 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
176 %cvt = sitofp <2 x i8> %shuf to <2 x double>
177 ret <2 x double> %cvt
180 define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
181 ; SSE-LABEL: sitofp_16i8_to_2f64:
183 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
184 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
185 ; SSE-NEXT: psrad $24, %xmm0
186 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
189 ; AVX1-LABEL: sitofp_16i8_to_2f64:
191 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
192 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
193 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
194 ; AVX1-NEXT: vzeroupper
197 ; AVX2-LABEL: sitofp_16i8_to_2f64:
199 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
200 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
201 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
202 ; AVX2-NEXT: vzeroupper
205 ; AVX512-LABEL: sitofp_16i8_to_2f64:
207 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
208 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
209 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
210 ; AVX512-NEXT: vzeroupper
212 %cvt = sitofp <16 x i8> %a to <16 x double>
213 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
214 ret <2 x double> %shuf
217 define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
218 ; SSE-LABEL: sitofp_4i64_to_4f64:
220 ; SSE-NEXT: movq %xmm0, %rax
221 ; SSE-NEXT: cvtsi2sdq %rax, %xmm2
222 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
223 ; SSE-NEXT: movq %xmm0, %rax
224 ; SSE-NEXT: xorps %xmm0, %xmm0
225 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
226 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
227 ; SSE-NEXT: movq %xmm1, %rax
228 ; SSE-NEXT: cvtsi2sdq %rax, %xmm3
229 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
230 ; SSE-NEXT: movq %xmm0, %rax
231 ; SSE-NEXT: xorps %xmm0, %xmm0
232 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
233 ; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
234 ; SSE-NEXT: movapd %xmm2, %xmm0
235 ; SSE-NEXT: movapd %xmm3, %xmm1
238 ; AVX1-LABEL: sitofp_4i64_to_4f64:
240 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
241 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
242 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
243 ; AVX1-NEXT: vmovq %xmm1, %rax
244 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
245 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
246 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
247 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
248 ; AVX1-NEXT: vmovq %xmm0, %rax
249 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
250 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
251 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
254 ; AVX2-LABEL: sitofp_4i64_to_4f64:
256 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
257 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
258 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
259 ; AVX2-NEXT: vmovq %xmm1, %rax
260 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
261 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
262 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
263 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
264 ; AVX2-NEXT: vmovq %xmm0, %rax
265 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
266 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
267 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
270 ; AVX512F-LABEL: sitofp_4i64_to_4f64:
272 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
273 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
274 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
275 ; AVX512F-NEXT: vmovq %xmm1, %rax
276 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
277 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
278 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
279 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
280 ; AVX512F-NEXT: vmovq %xmm0, %rax
281 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
282 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
283 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
286 ; AVX512VL-LABEL: sitofp_4i64_to_4f64:
288 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
289 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
290 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
291 ; AVX512VL-NEXT: vmovq %xmm1, %rax
292 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
293 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
294 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
295 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
296 ; AVX512VL-NEXT: vmovq %xmm0, %rax
297 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
298 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
299 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
300 ; AVX512VL-NEXT: retq
302 ; AVX512DQ-LABEL: sitofp_4i64_to_4f64:
304 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
305 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
306 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
307 ; AVX512DQ-NEXT: retq
309 ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f64:
310 ; AVX512VLDQ: # BB#0:
311 ; AVX512VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0
312 ; AVX512VLDQ-NEXT: retq
313 %cvt = sitofp <4 x i64> %a to <4 x double>
314 ret <4 x double> %cvt
317 define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) {
318 ; SSE-LABEL: sitofp_4i32_to_4f64:
320 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
321 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
322 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
323 ; SSE-NEXT: movaps %xmm2, %xmm0
326 ; AVX-LABEL: sitofp_4i32_to_4f64:
328 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
330 %cvt = sitofp <4 x i32> %a to <4 x double>
331 ret <4 x double> %cvt
334 define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) {
335 ; SSE-LABEL: sitofp_4i16_to_4f64:
337 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
338 ; SSE-NEXT: psrad $16, %xmm1
339 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
340 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
341 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
344 ; AVX-LABEL: sitofp_4i16_to_4f64:
346 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
347 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
349 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
350 %cvt = sitofp <4 x i16> %shuf to <4 x double>
351 ret <4 x double> %cvt
354 define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
355 ; SSE-LABEL: sitofp_8i16_to_4f64:
357 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
358 ; SSE-NEXT: psrad $16, %xmm1
359 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
360 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
361 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
364 ; AVX1-LABEL: sitofp_8i16_to_4f64:
366 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
367 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
370 ; AVX2-LABEL: sitofp_8i16_to_4f64:
372 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
373 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
376 ; AVX512-LABEL: sitofp_8i16_to_4f64:
378 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
379 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
380 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
382 %cvt = sitofp <8 x i16> %a to <8 x double>
383 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
384 ret <4 x double> %shuf
387 define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) {
388 ; SSE-LABEL: sitofp_4i8_to_4f64:
390 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
391 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
392 ; SSE-NEXT: psrad $24, %xmm1
393 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
394 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
395 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
398 ; AVX-LABEL: sitofp_4i8_to_4f64:
400 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
401 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
403 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
404 %cvt = sitofp <4 x i8> %shuf to <4 x double>
405 ret <4 x double> %cvt
408 define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
409 ; SSE-LABEL: sitofp_16i8_to_4f64:
411 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
412 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
413 ; SSE-NEXT: psrad $24, %xmm1
414 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
415 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
416 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
419 ; AVX1-LABEL: sitofp_16i8_to_4f64:
421 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
422 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
425 ; AVX2-LABEL: sitofp_16i8_to_4f64:
427 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
428 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
431 ; AVX512-LABEL: sitofp_16i8_to_4f64:
433 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
434 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
435 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
437 %cvt = sitofp <16 x i8> %a to <16 x double>
438 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
439 ret <4 x double> %shuf
443 ; Unsigned Integer to Double
446 define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
447 ; SSE-LABEL: uitofp_2i64_to_2f64:
449 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
450 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
451 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
452 ; SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
453 ; SSE-NEXT: subpd %xmm3, %xmm0
454 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
455 ; SSE-NEXT: addpd %xmm4, %xmm0
456 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
457 ; SSE-NEXT: subpd %xmm3, %xmm2
458 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
459 ; SSE-NEXT: addpd %xmm2, %xmm1
460 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
463 ; VEX-LABEL: uitofp_2i64_to_2f64:
465 ; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
466 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
467 ; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
468 ; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
469 ; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
470 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
471 ; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
472 ; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
475 ; AVX512F-LABEL: uitofp_2i64_to_2f64:
477 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
478 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
479 ; AVX512F-NEXT: vmovq %xmm0, %rax
480 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
481 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
484 ; AVX512VL-LABEL: uitofp_2i64_to_2f64:
486 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
487 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
488 ; AVX512VL-NEXT: vmovq %xmm0, %rax
489 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
490 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
491 ; AVX512VL-NEXT: retq
493 ; AVX512DQ-LABEL: uitofp_2i64_to_2f64:
495 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
496 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
497 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
498 ; AVX512DQ-NEXT: vzeroupper
499 ; AVX512DQ-NEXT: retq
501 ; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64:
502 ; AVX512VLDQ: # BB#0:
503 ; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0
504 ; AVX512VLDQ-NEXT: retq
505 %cvt = uitofp <2 x i64> %a to <2 x double>
506 ret <2 x double> %cvt
509 define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) {
510 ; SSE-LABEL: uitofp_2i32_to_2f64:
512 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
513 ; SSE-NEXT: pand %xmm0, %xmm1
514 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
515 ; SSE-NEXT: psrld $16, %xmm0
516 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
517 ; SSE-NEXT: mulpd {{.*}}(%rip), %xmm0
518 ; SSE-NEXT: addpd %xmm1, %xmm0
521 ; VEX-LABEL: uitofp_2i32_to_2f64:
523 ; VEX-NEXT: vpxor %xmm1, %xmm1, %xmm1
524 ; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
525 ; VEX-NEXT: vcvtdq2pd %xmm1, %xmm1
526 ; VEX-NEXT: vpsrld $16, %xmm0, %xmm0
527 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
528 ; VEX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
529 ; VEX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
532 ; AVX512F-LABEL: uitofp_2i32_to_2f64:
534 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
535 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
536 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
537 ; AVX512F-NEXT: vzeroupper
540 ; AVX512VL-LABEL: uitofp_2i32_to_2f64:
542 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
543 ; AVX512VL-NEXT: retq
545 ; AVX512DQ-LABEL: uitofp_2i32_to_2f64:
547 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
548 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
549 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
550 ; AVX512DQ-NEXT: vzeroupper
551 ; AVX512DQ-NEXT: retq
553 ; AVX512VLDQ-LABEL: uitofp_2i32_to_2f64:
554 ; AVX512VLDQ: # BB#0:
555 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
556 ; AVX512VLDQ-NEXT: retq
557 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
558 %cvt = uitofp <2 x i32> %shuf to <2 x double>
559 ret <2 x double> %cvt
562 define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
563 ; SSE-LABEL: uitofp_4i32_to_2f64:
565 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
566 ; SSE-NEXT: pand %xmm0, %xmm1
567 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
568 ; SSE-NEXT: psrld $16, %xmm0
569 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
570 ; SSE-NEXT: mulpd {{.*}}(%rip), %xmm0
571 ; SSE-NEXT: addpd %xmm1, %xmm0
574 ; AVX1-LABEL: uitofp_4i32_to_2f64:
576 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
577 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
578 ; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
579 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
580 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
581 ; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
582 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
583 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
584 ; AVX1-NEXT: vzeroupper
587 ; AVX2-LABEL: uitofp_4i32_to_2f64:
589 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
590 ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
591 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
592 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
593 ; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
594 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
595 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
596 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
597 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
598 ; AVX2-NEXT: vzeroupper
601 ; AVX512F-LABEL: uitofp_4i32_to_2f64:
603 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
604 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
605 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
606 ; AVX512F-NEXT: vzeroupper
609 ; AVX512VL-LABEL: uitofp_4i32_to_2f64:
611 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
612 ; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
613 ; AVX512VL-NEXT: vzeroupper
614 ; AVX512VL-NEXT: retq
616 ; AVX512DQ-LABEL: uitofp_4i32_to_2f64:
618 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
619 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
620 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
621 ; AVX512DQ-NEXT: vzeroupper
622 ; AVX512DQ-NEXT: retq
624 ; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64:
625 ; AVX512VLDQ: # BB#0:
626 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0
627 ; AVX512VLDQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
628 ; AVX512VLDQ-NEXT: vzeroupper
629 ; AVX512VLDQ-NEXT: retq
630 %cvt = uitofp <4 x i32> %a to <4 x double>
631 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
632 ret <2 x double> %shuf
635 define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) {
636 ; SSE-LABEL: uitofp_2i16_to_2f64:
638 ; SSE-NEXT: pxor %xmm1, %xmm1
639 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
640 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
643 ; AVX-LABEL: uitofp_2i16_to_2f64:
645 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
646 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
648 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
649 %cvt = uitofp <2 x i16> %shuf to <2 x double>
650 ret <2 x double> %cvt
653 define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
654 ; SSE-LABEL: uitofp_8i16_to_2f64:
656 ; SSE-NEXT: pxor %xmm1, %xmm1
657 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
658 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
661 ; AVX1-LABEL: uitofp_8i16_to_2f64:
663 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
664 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
665 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
666 ; AVX1-NEXT: vzeroupper
669 ; AVX2-LABEL: uitofp_8i16_to_2f64:
671 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
672 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
673 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
674 ; AVX2-NEXT: vzeroupper
677 ; AVX512-LABEL: uitofp_8i16_to_2f64:
679 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
680 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
681 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
682 ; AVX512-NEXT: vzeroupper
684 %cvt = uitofp <8 x i16> %a to <8 x double>
685 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
686 ret <2 x double> %shuf
689 define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) {
690 ; SSE-LABEL: uitofp_2i8_to_2f64:
692 ; SSE-NEXT: pxor %xmm1, %xmm1
693 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
694 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
695 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
698 ; AVX-LABEL: uitofp_2i8_to_2f64:
700 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
701 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
703 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
704 %cvt = uitofp <2 x i8> %shuf to <2 x double>
705 ret <2 x double> %cvt
708 define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
709 ; SSE-LABEL: uitofp_16i8_to_2f64:
711 ; SSE-NEXT: pxor %xmm1, %xmm1
712 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
713 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
714 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
717 ; AVX1-LABEL: uitofp_16i8_to_2f64:
719 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
720 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
721 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
722 ; AVX1-NEXT: vzeroupper
725 ; AVX2-LABEL: uitofp_16i8_to_2f64:
727 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
728 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
729 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
730 ; AVX2-NEXT: vzeroupper
733 ; AVX512-LABEL: uitofp_16i8_to_2f64:
735 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
736 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
737 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
738 ; AVX512-NEXT: vzeroupper
740 %cvt = uitofp <16 x i8> %a to <16 x double>
741 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
742 ret <2 x double> %shuf
745 define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
746 ; SSE-LABEL: uitofp_4i64_to_4f64:
748 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
749 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
750 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
751 ; SSE-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
752 ; SSE-NEXT: subpd %xmm4, %xmm0
753 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
754 ; SSE-NEXT: addpd %xmm5, %xmm0
755 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
756 ; SSE-NEXT: subpd %xmm4, %xmm3
757 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
758 ; SSE-NEXT: addpd %xmm3, %xmm5
759 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
760 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
761 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
762 ; SSE-NEXT: subpd %xmm4, %xmm1
763 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
764 ; SSE-NEXT: addpd %xmm5, %xmm1
765 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
766 ; SSE-NEXT: subpd %xmm4, %xmm3
767 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
768 ; SSE-NEXT: addpd %xmm3, %xmm2
769 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
772 ; AVX1-LABEL: uitofp_4i64_to_4f64:
774 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
775 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
776 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
777 ; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
778 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
779 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
780 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
781 ; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
782 ; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
783 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
784 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
785 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
786 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
787 ; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
788 ; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
789 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
792 ; AVX2-LABEL: uitofp_4i64_to_4f64:
794 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
795 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
796 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
797 ; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
798 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
799 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
800 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
801 ; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
802 ; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
803 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
804 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
805 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
806 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
807 ; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
808 ; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
809 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
812 ; AVX512F-LABEL: uitofp_4i64_to_4f64:
814 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
815 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
816 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
817 ; AVX512F-NEXT: vmovq %xmm1, %rax
818 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
819 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
820 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
821 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
822 ; AVX512F-NEXT: vmovq %xmm0, %rax
823 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
824 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
825 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
828 ; AVX512VL-LABEL: uitofp_4i64_to_4f64:
830 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
831 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
832 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
833 ; AVX512VL-NEXT: vmovq %xmm1, %rax
834 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
835 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
836 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
837 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
838 ; AVX512VL-NEXT: vmovq %xmm0, %rax
839 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
840 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
841 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
842 ; AVX512VL-NEXT: retq
844 ; AVX512DQ-LABEL: uitofp_4i64_to_4f64:
846 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
847 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
848 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
849 ; AVX512DQ-NEXT: retq
851 ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f64:
852 ; AVX512VLDQ: # BB#0:
853 ; AVX512VLDQ-NEXT: vcvtuqq2pd %ymm0, %ymm0
854 ; AVX512VLDQ-NEXT: retq
855 %cvt = uitofp <4 x i64> %a to <4 x double>
856 ret <4 x double> %cvt
859 define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
860 ; SSE-LABEL: uitofp_4i32_to_4f64:
862 ; SSE-NEXT: movdqa %xmm0, %xmm1
863 ; SSE-NEXT: psrld $16, %xmm1
864 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
865 ; SSE-NEXT: movapd {{.*#+}} xmm2 = [6.553600e+04,6.553600e+04]
866 ; SSE-NEXT: mulpd %xmm2, %xmm1
867 ; SSE-NEXT: movdqa {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
868 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
869 ; SSE-NEXT: pand %xmm3, %xmm0
870 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
871 ; SSE-NEXT: addpd %xmm1, %xmm0
872 ; SSE-NEXT: movdqa %xmm4, %xmm1
873 ; SSE-NEXT: psrld $16, %xmm1
874 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm5
875 ; SSE-NEXT: mulpd %xmm2, %xmm5
876 ; SSE-NEXT: pand %xmm3, %xmm4
877 ; SSE-NEXT: cvtdq2pd %xmm4, %xmm1
878 ; SSE-NEXT: addpd %xmm5, %xmm1
881 ; AVX1-LABEL: uitofp_4i32_to_4f64:
883 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
884 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
885 ; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
886 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
887 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
888 ; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
889 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
892 ; AVX2-LABEL: uitofp_4i32_to_4f64:
894 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
895 ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
896 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
897 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
898 ; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
899 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
900 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
901 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
904 ; AVX512F-LABEL: uitofp_4i32_to_4f64:
906 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
907 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
908 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
911 ; AVX512VL-LABEL: uitofp_4i32_to_4f64:
913 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
914 ; AVX512VL-NEXT: retq
916 ; AVX512DQ-LABEL: uitofp_4i32_to_4f64:
918 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
919 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
920 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
921 ; AVX512DQ-NEXT: retq
923 ; AVX512VLDQ-LABEL: uitofp_4i32_to_4f64:
924 ; AVX512VLDQ: # BB#0:
925 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0
926 ; AVX512VLDQ-NEXT: retq
927 %cvt = uitofp <4 x i32> %a to <4 x double>
928 ret <4 x double> %cvt
931 define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) {
932 ; SSE-LABEL: uitofp_4i16_to_4f64:
934 ; SSE-NEXT: pxor %xmm1, %xmm1
935 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
936 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
937 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
938 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
939 ; SSE-NEXT: movaps %xmm2, %xmm0
942 ; AVX-LABEL: uitofp_4i16_to_4f64:
944 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
945 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
947 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
948 %cvt = uitofp <4 x i16> %shuf to <4 x double>
949 ret <4 x double> %cvt
952 define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
953 ; SSE-LABEL: uitofp_8i16_to_4f64:
955 ; SSE-NEXT: pxor %xmm1, %xmm1
956 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
957 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
958 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
959 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
960 ; SSE-NEXT: movaps %xmm2, %xmm0
963 ; AVX1-LABEL: uitofp_8i16_to_4f64:
965 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
966 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
969 ; AVX2-LABEL: uitofp_8i16_to_4f64:
971 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
972 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
975 ; AVX512-LABEL: uitofp_8i16_to_4f64:
977 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
978 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
979 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
981 %cvt = uitofp <8 x i16> %a to <8 x double>
982 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
983 ret <4 x double> %shuf
986 define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) {
987 ; SSE-LABEL: uitofp_4i8_to_4f64:
989 ; SSE-NEXT: pxor %xmm1, %xmm1
990 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
991 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
992 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
993 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
994 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
995 ; SSE-NEXT: movaps %xmm2, %xmm0
998 ; AVX-LABEL: uitofp_4i8_to_4f64:
1000 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1001 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
1003 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1004 %cvt = uitofp <4 x i8> %shuf to <4 x double>
1005 ret <4 x double> %cvt
1008 define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
1009 ; SSE-LABEL: uitofp_16i8_to_4f64:
1011 ; SSE-NEXT: pxor %xmm1, %xmm1
1012 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1013 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1014 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
1015 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1016 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
1017 ; SSE-NEXT: movaps %xmm2, %xmm0
1020 ; AVX1-LABEL: uitofp_16i8_to_4f64:
1022 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1023 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
1026 ; AVX2-LABEL: uitofp_16i8_to_4f64:
1028 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1029 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
1032 ; AVX512-LABEL: uitofp_16i8_to_4f64:
1034 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1035 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
1036 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1038 %cvt = uitofp <16 x i8> %a to <16 x double>
1039 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1040 ret <4 x double> %shuf
1044 ; Signed Integer to Float
1047 define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
1048 ; SSE-LABEL: sitofp_2i64_to_4f32:
1050 ; SSE-NEXT: movq %xmm0, %rax
1051 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1052 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1053 ; SSE-NEXT: movq %xmm0, %rax
1054 ; SSE-NEXT: xorps %xmm0, %xmm0
1055 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1056 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1057 ; SSE-NEXT: movaps %xmm1, %xmm0
1060 ; VEX-LABEL: sitofp_2i64_to_4f32:
1062 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1063 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1064 ; VEX-NEXT: vmovq %xmm0, %rax
1065 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1066 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1067 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1068 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1071 ; AVX512F-LABEL: sitofp_2i64_to_4f32:
1073 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1074 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1075 ; AVX512F-NEXT: vmovq %xmm0, %rax
1076 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1077 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1078 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1079 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1080 ; AVX512F-NEXT: retq
1082 ; AVX512VL-LABEL: sitofp_2i64_to_4f32:
1084 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1085 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1086 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1087 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1088 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1089 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1090 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1091 ; AVX512VL-NEXT: retq
1093 ; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
1095 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1096 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1097 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1098 ; AVX512DQ-NEXT: vzeroupper
1099 ; AVX512DQ-NEXT: retq
1101 ; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32:
1102 ; AVX512VLDQ: # BB#0:
1103 ; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
1104 ; AVX512VLDQ-NEXT: retq
1105 %cvt = sitofp <2 x i64> %a to <2 x float>
1106 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1107 ret <4 x float> %ext
1110 define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
1111 ; SSE-LABEL: sitofp_2i64_to_4f32_zero:
1113 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1114 ; SSE-NEXT: movq %xmm1, %rax
1115 ; SSE-NEXT: xorps %xmm1, %xmm1
1116 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1117 ; SSE-NEXT: movq %xmm0, %rax
1118 ; SSE-NEXT: xorps %xmm0, %xmm0
1119 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1120 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1121 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1124 ; VEX-LABEL: sitofp_2i64_to_4f32_zero:
1126 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1127 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1128 ; VEX-NEXT: vmovq %xmm0, %rax
1129 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1130 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1133 ; AVX512F-LABEL: sitofp_2i64_to_4f32_zero:
1135 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1136 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1137 ; AVX512F-NEXT: vmovq %xmm0, %rax
1138 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1139 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1140 ; AVX512F-NEXT: retq
1142 ; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero:
1144 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1145 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1146 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1147 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1148 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1149 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1150 ; AVX512VL-NEXT: retq
1152 ; AVX512DQ-LABEL: sitofp_2i64_to_4f32_zero:
1154 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1155 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1156 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1157 ; AVX512DQ-NEXT: vzeroupper
1158 ; AVX512DQ-NEXT: retq
1160 ; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32_zero:
1161 ; AVX512VLDQ: # BB#0:
1162 ; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
1163 ; AVX512VLDQ-NEXT: retq
1164 %cvt = sitofp <2 x i64> %a to <2 x float>
1165 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1166 ret <4 x float> %ext
1169 define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
1170 ; SSE-LABEL: sitofp_4i64_to_4f32_undef:
1172 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
1173 ; SSE-NEXT: movq %xmm0, %rax
1174 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1175 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1176 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1177 ; SSE-NEXT: movq %xmm0, %rax
1178 ; SSE-NEXT: xorps %xmm0, %xmm0
1179 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1180 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1181 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1182 ; SSE-NEXT: movaps %xmm1, %xmm0
1185 ; VEX-LABEL: sitofp_4i64_to_4f32_undef:
1187 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1188 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1189 ; VEX-NEXT: vmovq %xmm0, %rax
1190 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1191 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1192 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1193 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1196 ; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
1198 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1199 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1200 ; AVX512F-NEXT: vmovq %xmm0, %rax
1201 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1202 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1203 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1204 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1205 ; AVX512F-NEXT: retq
1207 ; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
1209 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1210 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1211 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1212 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1213 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1214 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1215 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1216 ; AVX512VL-NEXT: retq
1218 ; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
1220 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1221 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1222 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1223 ; AVX512DQ-NEXT: vzeroupper
1224 ; AVX512DQ-NEXT: retq
1226 ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32_undef:
1227 ; AVX512VLDQ: # BB#0:
1228 ; AVX512VLDQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
1229 ; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
1230 ; AVX512VLDQ-NEXT: vzeroupper
1231 ; AVX512VLDQ-NEXT: retq
1232 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1233 %cvt = sitofp <4 x i64> %ext to <4 x float>
1234 ret <4 x float> %cvt
1237 define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) {
1238 ; SSE-LABEL: sitofp_4i32_to_4f32:
1240 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1243 ; AVX-LABEL: sitofp_4i32_to_4f32:
1245 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1247 %cvt = sitofp <4 x i32> %a to <4 x float>
1248 ret <4 x float> %cvt
1251 define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) {
1252 ; SSE-LABEL: sitofp_4i16_to_4f32:
1254 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1255 ; SSE-NEXT: psrad $16, %xmm0
1256 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1259 ; AVX-LABEL: sitofp_4i16_to_4f32:
1261 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
1262 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1264 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1265 %cvt = sitofp <4 x i16> %shuf to <4 x float>
1266 ret <4 x float> %cvt
1269 define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
1270 ; SSE-LABEL: sitofp_8i16_to_4f32:
1272 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1273 ; SSE-NEXT: psrad $16, %xmm0
1274 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1277 ; AVX1-LABEL: sitofp_8i16_to_4f32:
1279 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
1280 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1281 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
1282 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1283 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1284 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1285 ; AVX1-NEXT: vzeroupper
1288 ; AVX2-LABEL: sitofp_8i16_to_4f32:
1290 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
1291 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1292 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1293 ; AVX2-NEXT: vzeroupper
1296 ; AVX512-LABEL: sitofp_8i16_to_4f32:
1298 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
1299 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
1300 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1301 ; AVX512-NEXT: vzeroupper
1303 %cvt = sitofp <8 x i16> %a to <8 x float>
1304 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1305 ret <4 x float> %shuf
1308 define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) {
1309 ; SSE-LABEL: sitofp_4i8_to_4f32:
1311 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1312 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1313 ; SSE-NEXT: psrad $24, %xmm0
1314 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1317 ; AVX-LABEL: sitofp_4i8_to_4f32:
1319 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
1320 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1322 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1323 %cvt = sitofp <4 x i8> %shuf to <4 x float>
1324 ret <4 x float> %cvt
1327 define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
1328 ; SSE-LABEL: sitofp_16i8_to_4f32:
1330 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1331 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1332 ; SSE-NEXT: psrad $24, %xmm0
1333 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1336 ; AVX1-LABEL: sitofp_16i8_to_4f32:
1338 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
1339 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1340 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
1341 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1342 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1343 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1344 ; AVX1-NEXT: vzeroupper
1347 ; AVX2-LABEL: sitofp_16i8_to_4f32:
1349 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
1350 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1351 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1352 ; AVX2-NEXT: vzeroupper
1355 ; AVX512-LABEL: sitofp_16i8_to_4f32:
1357 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
1358 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
1359 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
1360 ; AVX512-NEXT: vzeroupper
1362 %cvt = sitofp <16 x i8> %a to <16 x float>
1363 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1364 ret <4 x float> %shuf
1367 define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
1368 ; SSE-LABEL: sitofp_4i64_to_4f32:
1370 ; SSE-NEXT: movq %xmm1, %rax
1371 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
1372 ; SSE-NEXT: movq %xmm0, %rax
1373 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
1374 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1375 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1376 ; SSE-NEXT: movq %xmm1, %rax
1377 ; SSE-NEXT: xorps %xmm1, %xmm1
1378 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1379 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1380 ; SSE-NEXT: movq %xmm0, %rax
1381 ; SSE-NEXT: xorps %xmm0, %xmm0
1382 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1383 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1384 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1385 ; SSE-NEXT: movaps %xmm2, %xmm0
1388 ; AVX1-LABEL: sitofp_4i64_to_4f32:
1390 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
1391 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1392 ; AVX1-NEXT: vmovq %xmm0, %rax
1393 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
1394 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1395 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1396 ; AVX1-NEXT: vmovq %xmm0, %rax
1397 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
1398 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1399 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
1400 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
1401 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1402 ; AVX1-NEXT: vzeroupper
1405 ; AVX2-LABEL: sitofp_4i64_to_4f32:
1407 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
1408 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1409 ; AVX2-NEXT: vmovq %xmm0, %rax
1410 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
1411 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1412 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1413 ; AVX2-NEXT: vmovq %xmm0, %rax
1414 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
1415 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1416 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
1417 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
1418 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1419 ; AVX2-NEXT: vzeroupper
1422 ; AVX512F-LABEL: sitofp_4i64_to_4f32:
1424 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1425 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1426 ; AVX512F-NEXT: vmovq %xmm0, %rax
1427 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
1428 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1429 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
1430 ; AVX512F-NEXT: vmovq %xmm0, %rax
1431 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
1432 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1433 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1434 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
1435 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1436 ; AVX512F-NEXT: vzeroupper
1437 ; AVX512F-NEXT: retq
1439 ; AVX512VL-LABEL: sitofp_4i64_to_4f32:
1441 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1442 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1443 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1444 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
1445 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1446 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
1447 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1448 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
1449 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1450 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1451 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
1452 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1453 ; AVX512VL-NEXT: vzeroupper
1454 ; AVX512VL-NEXT: retq
1456 ; AVX512DQ-LABEL: sitofp_4i64_to_4f32:
1458 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1459 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1460 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1461 ; AVX512DQ-NEXT: vzeroupper
1462 ; AVX512DQ-NEXT: retq
1464 ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32:
1465 ; AVX512VLDQ: # BB#0:
1466 ; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
1467 ; AVX512VLDQ-NEXT: vzeroupper
1468 ; AVX512VLDQ-NEXT: retq
1469 %cvt = sitofp <4 x i64> %a to <4 x float>
1470 ret <4 x float> %cvt
1473 define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) {
1474 ; SSE-LABEL: sitofp_8i32_to_8f32:
1476 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1477 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
1480 ; AVX-LABEL: sitofp_8i32_to_8f32:
1482 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
1484 %cvt = sitofp <8 x i32> %a to <8 x float>
1485 ret <8 x float> %cvt
1488 define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
1489 ; SSE-LABEL: sitofp_8i16_to_8f32:
1491 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1492 ; SSE-NEXT: psrad $16, %xmm1
1493 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm2
1494 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
1495 ; SSE-NEXT: psrad $16, %xmm0
1496 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
1497 ; SSE-NEXT: movaps %xmm2, %xmm0
1500 ; AVX1-LABEL: sitofp_8i16_to_8f32:
1502 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
1503 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1504 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
1505 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1506 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1509 ; AVX2-LABEL: sitofp_8i16_to_8f32:
1511 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
1512 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1515 ; AVX512-LABEL: sitofp_8i16_to_8f32:
1517 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
1518 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
1520 %cvt = sitofp <8 x i16> %a to <8 x float>
1521 ret <8 x float> %cvt
1524 define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
1525 ; SSE-LABEL: sitofp_8i8_to_8f32:
1527 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1528 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
1529 ; SSE-NEXT: psrad $24, %xmm1
1530 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm2
1531 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1532 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1533 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1534 ; SSE-NEXT: psrad $24, %xmm0
1535 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
1536 ; SSE-NEXT: movaps %xmm2, %xmm0
1539 ; AVX1-LABEL: sitofp_8i8_to_8f32:
1541 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
1542 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1543 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
1544 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1545 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1548 ; AVX2-LABEL: sitofp_8i8_to_8f32:
1550 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
1551 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1554 ; AVX512-LABEL: sitofp_8i8_to_8f32:
1556 ; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0
1557 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
1559 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1560 %cvt = sitofp <8 x i8> %shuf to <8 x float>
1561 ret <8 x float> %cvt
1564 define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
1565 ; SSE-LABEL: sitofp_16i8_to_8f32:
1567 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1568 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
1569 ; SSE-NEXT: psrad $24, %xmm1
1570 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm2
1571 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1572 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1573 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1574 ; SSE-NEXT: psrad $24, %xmm0
1575 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
1576 ; SSE-NEXT: movaps %xmm2, %xmm0
1579 ; AVX1-LABEL: sitofp_16i8_to_8f32:
1581 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
1582 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1583 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
1584 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1585 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1588 ; AVX2-LABEL: sitofp_16i8_to_8f32:
1590 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
1591 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1594 ; AVX512-LABEL: sitofp_16i8_to_8f32:
1596 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
1597 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
1598 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1600 %cvt = sitofp <16 x i8> %a to <16 x float>
1601 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1602 ret <8 x float> %shuf
1606 ; Unsigned Integer to Float
1609 define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
1610 ; SSE-LABEL: uitofp_2i64_to_4f32:
1612 ; SSE-NEXT: movdqa %xmm0, %xmm1
1613 ; SSE-NEXT: movq %xmm1, %rax
1614 ; SSE-NEXT: testq %rax, %rax
1615 ; SSE-NEXT: js .LBB39_1
1617 ; SSE-NEXT: xorps %xmm0, %xmm0
1618 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1619 ; SSE-NEXT: jmp .LBB39_3
1620 ; SSE-NEXT: .LBB39_1:
1621 ; SSE-NEXT: movq %rax, %rcx
1622 ; SSE-NEXT: shrq %rcx
1623 ; SSE-NEXT: andl $1, %eax
1624 ; SSE-NEXT: orq %rcx, %rax
1625 ; SSE-NEXT: xorps %xmm0, %xmm0
1626 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1627 ; SSE-NEXT: addss %xmm0, %xmm0
1628 ; SSE-NEXT: .LBB39_3:
1629 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1630 ; SSE-NEXT: movq %xmm1, %rax
1631 ; SSE-NEXT: testq %rax, %rax
1632 ; SSE-NEXT: js .LBB39_4
1634 ; SSE-NEXT: xorps %xmm1, %xmm1
1635 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1636 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1638 ; SSE-NEXT: .LBB39_4:
1639 ; SSE-NEXT: movq %rax, %rcx
1640 ; SSE-NEXT: shrq %rcx
1641 ; SSE-NEXT: andl $1, %eax
1642 ; SSE-NEXT: orq %rcx, %rax
1643 ; SSE-NEXT: xorps %xmm1, %xmm1
1644 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1645 ; SSE-NEXT: addss %xmm1, %xmm1
1646 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1649 ; VEX-LABEL: uitofp_2i64_to_4f32:
1651 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1652 ; VEX-NEXT: testq %rax, %rax
1653 ; VEX-NEXT: js .LBB39_1
1655 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1656 ; VEX-NEXT: jmp .LBB39_3
1657 ; VEX-NEXT: .LBB39_1:
1658 ; VEX-NEXT: movq %rax, %rcx
1659 ; VEX-NEXT: shrq %rcx
1660 ; VEX-NEXT: andl $1, %eax
1661 ; VEX-NEXT: orq %rcx, %rax
1662 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1663 ; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
1664 ; VEX-NEXT: .LBB39_3:
1665 ; VEX-NEXT: vmovq %xmm0, %rax
1666 ; VEX-NEXT: testq %rax, %rax
1667 ; VEX-NEXT: js .LBB39_4
1669 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1670 ; VEX-NEXT: jmp .LBB39_6
1671 ; VEX-NEXT: .LBB39_4:
1672 ; VEX-NEXT: movq %rax, %rcx
1673 ; VEX-NEXT: shrq %rcx
1674 ; VEX-NEXT: andl $1, %eax
1675 ; VEX-NEXT: orq %rcx, %rax
1676 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1677 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
1678 ; VEX-NEXT: .LBB39_6:
1679 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1680 ; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1681 ; VEX-NEXT: testq %rax, %rax
1682 ; VEX-NEXT: js .LBB39_8
1684 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1685 ; VEX-NEXT: .LBB39_8:
1686 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1689 ; AVX512F-LABEL: uitofp_2i64_to_4f32:
1691 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1692 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1693 ; AVX512F-NEXT: vmovq %xmm0, %rax
1694 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1695 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1696 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1
1697 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1698 ; AVX512F-NEXT: retq
1700 ; AVX512VL-LABEL: uitofp_2i64_to_4f32:
1702 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1703 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1704 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1705 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1706 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1707 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1
1708 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1709 ; AVX512VL-NEXT: retq
1711 ; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
1713 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1714 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
1715 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1716 ; AVX512DQ-NEXT: vzeroupper
1717 ; AVX512DQ-NEXT: retq
1719 ; AVX512VLDQ-LABEL: uitofp_2i64_to_4f32:
1720 ; AVX512VLDQ: # BB#0:
1721 ; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0
1722 ; AVX512VLDQ-NEXT: retq
1723 %cvt = uitofp <2 x i64> %a to <2 x float>
1724 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1725 ret <4 x float> %ext
1728 define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
1729 ; SSE-LABEL: uitofp_2i64_to_2f32:
1731 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1732 ; SSE-NEXT: movq %xmm1, %rax
1733 ; SSE-NEXT: testq %rax, %rax
1734 ; SSE-NEXT: js .LBB40_1
1736 ; SSE-NEXT: xorps %xmm1, %xmm1
1737 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1738 ; SSE-NEXT: jmp .LBB40_3
1739 ; SSE-NEXT: .LBB40_1:
1740 ; SSE-NEXT: movq %rax, %rcx
1741 ; SSE-NEXT: shrq %rcx
1742 ; SSE-NEXT: andl $1, %eax
1743 ; SSE-NEXT: orq %rcx, %rax
1744 ; SSE-NEXT: xorps %xmm1, %xmm1
1745 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1746 ; SSE-NEXT: addss %xmm1, %xmm1
1747 ; SSE-NEXT: .LBB40_3:
1748 ; SSE-NEXT: movq %xmm0, %rax
1749 ; SSE-NEXT: testq %rax, %rax
1750 ; SSE-NEXT: js .LBB40_4
1752 ; SSE-NEXT: xorps %xmm0, %xmm0
1753 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1754 ; SSE-NEXT: jmp .LBB40_6
1755 ; SSE-NEXT: .LBB40_4:
1756 ; SSE-NEXT: movq %rax, %rcx
1757 ; SSE-NEXT: shrq %rcx
1758 ; SSE-NEXT: andl $1, %eax
1759 ; SSE-NEXT: orq %rcx, %rax
1760 ; SSE-NEXT: xorps %xmm0, %xmm0
1761 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1762 ; SSE-NEXT: addss %xmm0, %xmm0
1763 ; SSE-NEXT: .LBB40_6:
1764 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1765 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1768 ; VEX-LABEL: uitofp_2i64_to_2f32:
1770 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1771 ; VEX-NEXT: testq %rax, %rax
1772 ; VEX-NEXT: js .LBB40_1
1774 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1775 ; VEX-NEXT: jmp .LBB40_3
1776 ; VEX-NEXT: .LBB40_1:
1777 ; VEX-NEXT: movq %rax, %rcx
1778 ; VEX-NEXT: shrq %rcx
1779 ; VEX-NEXT: andl $1, %eax
1780 ; VEX-NEXT: orq %rcx, %rax
1781 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1782 ; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
1783 ; VEX-NEXT: .LBB40_3:
1784 ; VEX-NEXT: vmovq %xmm0, %rax
1785 ; VEX-NEXT: testq %rax, %rax
1786 ; VEX-NEXT: js .LBB40_4
1788 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1789 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1791 ; VEX-NEXT: .LBB40_4:
1792 ; VEX-NEXT: movq %rax, %rcx
1793 ; VEX-NEXT: shrq %rcx
1794 ; VEX-NEXT: andl $1, %eax
1795 ; VEX-NEXT: orq %rcx, %rax
1796 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1797 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
1798 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1801 ; AVX512F-LABEL: uitofp_2i64_to_2f32:
1803 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1804 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1805 ; AVX512F-NEXT: vmovq %xmm0, %rax
1806 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1807 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1808 ; AVX512F-NEXT: retq
1810 ; AVX512VL-LABEL: uitofp_2i64_to_2f32:
1812 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1813 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1814 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1815 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1816 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1817 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1818 ; AVX512VL-NEXT: retq
1820 ; AVX512DQ-LABEL: uitofp_2i64_to_2f32:
1822 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1823 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
1824 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1825 ; AVX512DQ-NEXT: vzeroupper
1826 ; AVX512DQ-NEXT: retq
1828 ; AVX512VLDQ-LABEL: uitofp_2i64_to_2f32:
1829 ; AVX512VLDQ: # BB#0:
1830 ; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0
1831 ; AVX512VLDQ-NEXT: retq
1832 %cvt = uitofp <2 x i64> %a to <2 x float>
1833 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1834 ret <4 x float> %ext
1837 define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
1838 ; SSE-LABEL: uitofp_4i64_to_4f32_undef:
1840 ; SSE-NEXT: movdqa %xmm0, %xmm1
1841 ; SSE-NEXT: testq %rax, %rax
1842 ; SSE-NEXT: xorps %xmm2, %xmm2
1843 ; SSE-NEXT: js .LBB41_2
1845 ; SSE-NEXT: xorps %xmm2, %xmm2
1846 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
1847 ; SSE-NEXT: .LBB41_2:
1848 ; SSE-NEXT: movq %xmm1, %rax
1849 ; SSE-NEXT: testq %rax, %rax
1850 ; SSE-NEXT: js .LBB41_3
1852 ; SSE-NEXT: xorps %xmm0, %xmm0
1853 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1854 ; SSE-NEXT: jmp .LBB41_5
1855 ; SSE-NEXT: .LBB41_3:
1856 ; SSE-NEXT: movq %rax, %rcx
1857 ; SSE-NEXT: shrq %rcx
1858 ; SSE-NEXT: andl $1, %eax
1859 ; SSE-NEXT: orq %rcx, %rax
1860 ; SSE-NEXT: xorps %xmm0, %xmm0
1861 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1862 ; SSE-NEXT: addss %xmm0, %xmm0
1863 ; SSE-NEXT: .LBB41_5:
1864 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1865 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1866 ; SSE-NEXT: movq %xmm1, %rax
1867 ; SSE-NEXT: testq %rax, %rax
1868 ; SSE-NEXT: js .LBB41_6
1870 ; SSE-NEXT: xorps %xmm1, %xmm1
1871 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1872 ; SSE-NEXT: jmp .LBB41_8
1873 ; SSE-NEXT: .LBB41_6:
1874 ; SSE-NEXT: movq %rax, %rcx
1875 ; SSE-NEXT: shrq %rcx
1876 ; SSE-NEXT: andl $1, %eax
1877 ; SSE-NEXT: orq %rcx, %rax
1878 ; SSE-NEXT: xorps %xmm1, %xmm1
1879 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1880 ; SSE-NEXT: addss %xmm1, %xmm1
1881 ; SSE-NEXT: .LBB41_8:
1882 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1883 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1886 ; VEX-LABEL: uitofp_4i64_to_4f32_undef:
1888 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1889 ; VEX-NEXT: testq %rax, %rax
1890 ; VEX-NEXT: js .LBB41_1
1892 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1893 ; VEX-NEXT: jmp .LBB41_3
1894 ; VEX-NEXT: .LBB41_1:
1895 ; VEX-NEXT: movq %rax, %rcx
1896 ; VEX-NEXT: shrq %rcx
1897 ; VEX-NEXT: andl $1, %eax
1898 ; VEX-NEXT: orq %rcx, %rax
1899 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1900 ; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
1901 ; VEX-NEXT: .LBB41_3:
1902 ; VEX-NEXT: vmovq %xmm0, %rax
1903 ; VEX-NEXT: testq %rax, %rax
1904 ; VEX-NEXT: js .LBB41_4
1906 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1907 ; VEX-NEXT: jmp .LBB41_6
1908 ; VEX-NEXT: .LBB41_4:
1909 ; VEX-NEXT: movq %rax, %rcx
1910 ; VEX-NEXT: shrq %rcx
1911 ; VEX-NEXT: andl $1, %eax
1912 ; VEX-NEXT: orq %rcx, %rax
1913 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1914 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
1915 ; VEX-NEXT: .LBB41_6:
1916 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1917 ; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1918 ; VEX-NEXT: testq %rax, %rax
1919 ; VEX-NEXT: js .LBB41_8
1921 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1922 ; VEX-NEXT: .LBB41_8:
1923 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1926 ; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
1928 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1929 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1930 ; AVX512F-NEXT: vmovq %xmm0, %rax
1931 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1932 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1933 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1
1934 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1935 ; AVX512F-NEXT: retq
1937 ; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
1939 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1940 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1941 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1942 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1943 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1944 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1
1945 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1946 ; AVX512VL-NEXT: retq
1948 ; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:
1950 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1951 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
1952 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1953 ; AVX512DQ-NEXT: vzeroupper
1954 ; AVX512DQ-NEXT: retq
1956 ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32_undef:
1957 ; AVX512VLDQ: # BB#0:
1958 ; AVX512VLDQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
1959 ; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
1960 ; AVX512VLDQ-NEXT: vzeroupper
1961 ; AVX512VLDQ-NEXT: retq
1962 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1963 %cvt = uitofp <4 x i64> %ext to <4 x float>
1964 ret <4 x float> %cvt
1967 define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) {
1968 ; SSE-LABEL: uitofp_4i32_to_4f32:
1970 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
1971 ; SSE-NEXT: pand %xmm0, %xmm1
1972 ; SSE-NEXT: por {{.*}}(%rip), %xmm1
1973 ; SSE-NEXT: psrld $16, %xmm0
1974 ; SSE-NEXT: por {{.*}}(%rip), %xmm0
1975 ; SSE-NEXT: addps {{.*}}(%rip), %xmm0
1976 ; SSE-NEXT: addps %xmm1, %xmm0
1979 ; AVX1-LABEL: uitofp_4i32_to_4f32:
1981 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
1982 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
1983 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
1984 ; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1985 ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
1988 ; AVX2-LABEL: uitofp_4i32_to_4f32:
1990 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
1991 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1992 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
1993 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
1994 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1995 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
1996 ; AVX2-NEXT: vaddps %xmm2, %xmm0, %xmm0
1997 ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
2000 ; AVX512F-LABEL: uitofp_4i32_to_4f32:
2002 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
2003 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
2004 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2005 ; AVX512F-NEXT: vzeroupper
2006 ; AVX512F-NEXT: retq
2008 ; AVX512VL-LABEL: uitofp_4i32_to_4f32:
2010 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
2011 ; AVX512VL-NEXT: retq
2013 ; AVX512DQ-LABEL: uitofp_4i32_to_4f32:
2015 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
2016 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
2017 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2018 ; AVX512DQ-NEXT: vzeroupper
2019 ; AVX512DQ-NEXT: retq
2021 ; AVX512VLDQ-LABEL: uitofp_4i32_to_4f32:
2022 ; AVX512VLDQ: # BB#0:
2023 ; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0
2024 ; AVX512VLDQ-NEXT: retq
2025 %cvt = uitofp <4 x i32> %a to <4 x float>
2026 ret <4 x float> %cvt
2029 define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) {
2030 ; SSE-LABEL: uitofp_4i16_to_4f32:
2032 ; SSE-NEXT: pxor %xmm1, %xmm1
2033 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2034 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
2037 ; AVX-LABEL: uitofp_4i16_to_4f32:
2039 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2040 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2042 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2043 %cvt = uitofp <4 x i16> %shuf to <4 x float>
2044 ret <4 x float> %cvt
2047 define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
2048 ; SSE-LABEL: uitofp_8i16_to_4f32:
2050 ; SSE-NEXT: pxor %xmm1, %xmm1
2051 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2052 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
2055 ; AVX1-LABEL: uitofp_8i16_to_4f32:
2057 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2058 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2059 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2060 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2061 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2062 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2063 ; AVX1-NEXT: vzeroupper
2066 ; AVX2-LABEL: uitofp_8i16_to_4f32:
2068 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2069 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2070 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2071 ; AVX2-NEXT: vzeroupper
2074 ; AVX512-LABEL: uitofp_8i16_to_4f32:
2076 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2077 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
2078 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2079 ; AVX512-NEXT: vzeroupper
2081 %cvt = uitofp <8 x i16> %a to <8 x float>
2082 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2083 ret <4 x float> %shuf
2086 define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) {
2087 ; SSE-LABEL: uitofp_4i8_to_4f32:
2089 ; SSE-NEXT: pxor %xmm1, %xmm1
2090 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2091 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2092 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
2095 ; AVX-LABEL: uitofp_4i8_to_4f32:
2097 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2098 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2100 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2101 %cvt = uitofp <4 x i8> %shuf to <4 x float>
2102 ret <4 x float> %cvt
2105 define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
2106 ; SSE-LABEL: uitofp_16i8_to_4f32:
2108 ; SSE-NEXT: pxor %xmm1, %xmm1
2109 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2110 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2111 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
2114 ; AVX1-LABEL: uitofp_16i8_to_4f32:
2116 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2117 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2118 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2119 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2120 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2121 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2122 ; AVX1-NEXT: vzeroupper
2125 ; AVX2-LABEL: uitofp_16i8_to_4f32:
2127 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2128 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2129 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2130 ; AVX2-NEXT: vzeroupper
2133 ; AVX512-LABEL: uitofp_16i8_to_4f32:
2135 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2136 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
2137 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2138 ; AVX512-NEXT: vzeroupper
2140 %cvt = uitofp <16 x i8> %a to <16 x float>
2141 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2142 ret <4 x float> %shuf
2145 define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
2146 ; SSE-LABEL: uitofp_4i64_to_4f32:
2148 ; SSE-NEXT: movq %xmm1, %rax
2149 ; SSE-NEXT: testq %rax, %rax
2150 ; SSE-NEXT: js .LBB47_1
2152 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
2153 ; SSE-NEXT: jmp .LBB47_3
2154 ; SSE-NEXT: .LBB47_1:
2155 ; SSE-NEXT: movq %rax, %rcx
2156 ; SSE-NEXT: shrq %rcx
2157 ; SSE-NEXT: andl $1, %eax
2158 ; SSE-NEXT: orq %rcx, %rax
2159 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
2160 ; SSE-NEXT: addss %xmm3, %xmm3
2161 ; SSE-NEXT: .LBB47_3:
2162 ; SSE-NEXT: movq %xmm0, %rax
2163 ; SSE-NEXT: testq %rax, %rax
2164 ; SSE-NEXT: js .LBB47_4
2166 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
2167 ; SSE-NEXT: jmp .LBB47_6
2168 ; SSE-NEXT: .LBB47_4:
2169 ; SSE-NEXT: movq %rax, %rcx
2170 ; SSE-NEXT: shrq %rcx
2171 ; SSE-NEXT: andl $1, %eax
2172 ; SSE-NEXT: orq %rcx, %rax
2173 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
2174 ; SSE-NEXT: addss %xmm2, %xmm2
2175 ; SSE-NEXT: .LBB47_6:
2176 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2177 ; SSE-NEXT: movq %xmm1, %rax
2178 ; SSE-NEXT: testq %rax, %rax
2179 ; SSE-NEXT: js .LBB47_7
2181 ; SSE-NEXT: xorps %xmm1, %xmm1
2182 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
2183 ; SSE-NEXT: jmp .LBB47_9
2184 ; SSE-NEXT: .LBB47_7:
2185 ; SSE-NEXT: movq %rax, %rcx
2186 ; SSE-NEXT: shrq %rcx
2187 ; SSE-NEXT: andl $1, %eax
2188 ; SSE-NEXT: orq %rcx, %rax
2189 ; SSE-NEXT: xorps %xmm1, %xmm1
2190 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
2191 ; SSE-NEXT: addss %xmm1, %xmm1
2192 ; SSE-NEXT: .LBB47_9:
2193 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2194 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2195 ; SSE-NEXT: movq %xmm0, %rax
2196 ; SSE-NEXT: testq %rax, %rax
2197 ; SSE-NEXT: js .LBB47_10
2198 ; SSE-NEXT: # BB#11:
2199 ; SSE-NEXT: xorps %xmm0, %xmm0
2200 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
2201 ; SSE-NEXT: jmp .LBB47_12
2202 ; SSE-NEXT: .LBB47_10:
2203 ; SSE-NEXT: movq %rax, %rcx
2204 ; SSE-NEXT: shrq %rcx
2205 ; SSE-NEXT: andl $1, %eax
2206 ; SSE-NEXT: orq %rcx, %rax
2207 ; SSE-NEXT: xorps %xmm0, %xmm0
2208 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
2209 ; SSE-NEXT: addss %xmm0, %xmm0
2210 ; SSE-NEXT: .LBB47_12:
2211 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2212 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
2213 ; SSE-NEXT: movaps %xmm2, %xmm0
2216 ; AVX1-LABEL: uitofp_4i64_to_4f32:
2218 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
2219 ; AVX1-NEXT: testq %rax, %rax
2220 ; AVX1-NEXT: js .LBB47_1
2221 ; AVX1-NEXT: # BB#2:
2222 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
2223 ; AVX1-NEXT: jmp .LBB47_3
2224 ; AVX1-NEXT: .LBB47_1:
2225 ; AVX1-NEXT: movq %rax, %rcx
2226 ; AVX1-NEXT: shrq %rcx
2227 ; AVX1-NEXT: andl $1, %eax
2228 ; AVX1-NEXT: orq %rcx, %rax
2229 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
2230 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
2231 ; AVX1-NEXT: .LBB47_3:
2232 ; AVX1-NEXT: vmovq %xmm0, %rax
2233 ; AVX1-NEXT: testq %rax, %rax
2234 ; AVX1-NEXT: js .LBB47_4
2235 ; AVX1-NEXT: # BB#5:
2236 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
2237 ; AVX1-NEXT: jmp .LBB47_6
2238 ; AVX1-NEXT: .LBB47_4:
2239 ; AVX1-NEXT: movq %rax, %rcx
2240 ; AVX1-NEXT: shrq %rcx
2241 ; AVX1-NEXT: andl $1, %eax
2242 ; AVX1-NEXT: orq %rcx, %rax
2243 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
2244 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
2245 ; AVX1-NEXT: .LBB47_6:
2246 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2247 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2248 ; AVX1-NEXT: vmovq %xmm0, %rax
2249 ; AVX1-NEXT: testq %rax, %rax
2250 ; AVX1-NEXT: js .LBB47_7
2251 ; AVX1-NEXT: # BB#8:
2252 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
2253 ; AVX1-NEXT: jmp .LBB47_9
2254 ; AVX1-NEXT: .LBB47_7:
2255 ; AVX1-NEXT: movq %rax, %rcx
2256 ; AVX1-NEXT: shrq %rcx
2257 ; AVX1-NEXT: andl $1, %eax
2258 ; AVX1-NEXT: orq %rcx, %rax
2259 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
2260 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
2261 ; AVX1-NEXT: .LBB47_9:
2262 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2263 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
2264 ; AVX1-NEXT: testq %rax, %rax
2265 ; AVX1-NEXT: js .LBB47_10
2266 ; AVX1-NEXT: # BB#11:
2267 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
2268 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2269 ; AVX1-NEXT: vzeroupper
2271 ; AVX1-NEXT: .LBB47_10:
2272 ; AVX1-NEXT: movq %rax, %rcx
2273 ; AVX1-NEXT: shrq %rcx
2274 ; AVX1-NEXT: andl $1, %eax
2275 ; AVX1-NEXT: orq %rcx, %rax
2276 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
2277 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
2278 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2279 ; AVX1-NEXT: vzeroupper
2282 ; AVX2-LABEL: uitofp_4i64_to_4f32:
2284 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
2285 ; AVX2-NEXT: testq %rax, %rax
2286 ; AVX2-NEXT: js .LBB47_1
2287 ; AVX2-NEXT: # BB#2:
2288 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
2289 ; AVX2-NEXT: jmp .LBB47_3
2290 ; AVX2-NEXT: .LBB47_1:
2291 ; AVX2-NEXT: movq %rax, %rcx
2292 ; AVX2-NEXT: shrq %rcx
2293 ; AVX2-NEXT: andl $1, %eax
2294 ; AVX2-NEXT: orq %rcx, %rax
2295 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
2296 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1
2297 ; AVX2-NEXT: .LBB47_3:
2298 ; AVX2-NEXT: vmovq %xmm0, %rax
2299 ; AVX2-NEXT: testq %rax, %rax
2300 ; AVX2-NEXT: js .LBB47_4
2301 ; AVX2-NEXT: # BB#5:
2302 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
2303 ; AVX2-NEXT: jmp .LBB47_6
2304 ; AVX2-NEXT: .LBB47_4:
2305 ; AVX2-NEXT: movq %rax, %rcx
2306 ; AVX2-NEXT: shrq %rcx
2307 ; AVX2-NEXT: andl $1, %eax
2308 ; AVX2-NEXT: orq %rcx, %rax
2309 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
2310 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
2311 ; AVX2-NEXT: .LBB47_6:
2312 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2313 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2314 ; AVX2-NEXT: vmovq %xmm0, %rax
2315 ; AVX2-NEXT: testq %rax, %rax
2316 ; AVX2-NEXT: js .LBB47_7
2317 ; AVX2-NEXT: # BB#8:
2318 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
2319 ; AVX2-NEXT: jmp .LBB47_9
2320 ; AVX2-NEXT: .LBB47_7:
2321 ; AVX2-NEXT: movq %rax, %rcx
2322 ; AVX2-NEXT: shrq %rcx
2323 ; AVX2-NEXT: andl $1, %eax
2324 ; AVX2-NEXT: orq %rcx, %rax
2325 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
2326 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
2327 ; AVX2-NEXT: .LBB47_9:
2328 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2329 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
2330 ; AVX2-NEXT: testq %rax, %rax
2331 ; AVX2-NEXT: js .LBB47_10
2332 ; AVX2-NEXT: # BB#11:
2333 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
2334 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2335 ; AVX2-NEXT: vzeroupper
2337 ; AVX2-NEXT: .LBB47_10:
2338 ; AVX2-NEXT: movq %rax, %rcx
2339 ; AVX2-NEXT: shrq %rcx
2340 ; AVX2-NEXT: andl $1, %eax
2341 ; AVX2-NEXT: orq %rcx, %rax
2342 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
2343 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
2344 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2345 ; AVX2-NEXT: vzeroupper
2348 ; AVX512F-LABEL: uitofp_4i64_to_4f32:
2350 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2351 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
2352 ; AVX512F-NEXT: vmovq %xmm0, %rax
2353 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
2354 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2355 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
2356 ; AVX512F-NEXT: vmovq %xmm0, %rax
2357 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
2358 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2359 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2360 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
2361 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2362 ; AVX512F-NEXT: vzeroupper
2363 ; AVX512F-NEXT: retq
2365 ; AVX512VL-LABEL: uitofp_4i64_to_4f32:
2367 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2368 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
2369 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2370 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
2371 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2372 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
2373 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2374 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
2375 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2376 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2377 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
2378 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2379 ; AVX512VL-NEXT: vzeroupper
2380 ; AVX512VL-NEXT: retq
2382 ; AVX512DQ-LABEL: uitofp_4i64_to_4f32:
2384 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
2385 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
2386 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2387 ; AVX512DQ-NEXT: vzeroupper
2388 ; AVX512DQ-NEXT: retq
2390 ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32:
2391 ; AVX512VLDQ: # BB#0:
2392 ; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
2393 ; AVX512VLDQ-NEXT: vzeroupper
2394 ; AVX512VLDQ-NEXT: retq
2395 %cvt = uitofp <4 x i64> %a to <4 x float>
2396 ret <4 x float> %cvt
2399 define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) {
2400 ; SSE-LABEL: uitofp_8i32_to_8f32:
2402 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
2403 ; SSE-NEXT: movdqa %xmm0, %xmm3
2404 ; SSE-NEXT: pand %xmm2, %xmm3
2405 ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
2406 ; SSE-NEXT: por %xmm4, %xmm3
2407 ; SSE-NEXT: psrld $16, %xmm0
2408 ; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
2409 ; SSE-NEXT: por %xmm5, %xmm0
2410 ; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
2411 ; SSE-NEXT: addps %xmm6, %xmm0
2412 ; SSE-NEXT: addps %xmm3, %xmm0
2413 ; SSE-NEXT: pand %xmm1, %xmm2
2414 ; SSE-NEXT: por %xmm4, %xmm2
2415 ; SSE-NEXT: psrld $16, %xmm1
2416 ; SSE-NEXT: por %xmm5, %xmm1
2417 ; SSE-NEXT: addps %xmm6, %xmm1
2418 ; SSE-NEXT: addps %xmm2, %xmm1
2421 ; AVX1-LABEL: uitofp_8i32_to_8f32:
2423 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
2424 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2425 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
2426 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2427 ; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
2428 ; AVX1-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
2429 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
2430 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2431 ; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
2434 ; AVX2-LABEL: uitofp_8i32_to_8f32:
2436 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
2437 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2438 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
2439 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
2440 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
2441 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
2442 ; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
2443 ; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
2446 ; AVX512F-LABEL: uitofp_8i32_to_8f32:
2448 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
2449 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
2450 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
2451 ; AVX512F-NEXT: retq
2453 ; AVX512VL-LABEL: uitofp_8i32_to_8f32:
2455 ; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0
2456 ; AVX512VL-NEXT: retq
2458 ; AVX512DQ-LABEL: uitofp_8i32_to_8f32:
2460 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
2461 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
2462 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
2463 ; AVX512DQ-NEXT: retq
2465 ; AVX512VLDQ-LABEL: uitofp_8i32_to_8f32:
2466 ; AVX512VLDQ: # BB#0:
2467 ; AVX512VLDQ-NEXT: vcvtudq2ps %ymm0, %ymm0
2468 ; AVX512VLDQ-NEXT: retq
2469 %cvt = uitofp <8 x i32> %a to <8 x float>
2470 ret <8 x float> %cvt
2473 define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) {
2474 ; SSE-LABEL: uitofp_8i16_to_8f32:
2476 ; SSE-NEXT: pxor %xmm1, %xmm1
2477 ; SSE-NEXT: movdqa %xmm0, %xmm2
2478 ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2479 ; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
2480 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2481 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
2482 ; SSE-NEXT: movaps %xmm2, %xmm0
2485 ; AVX1-LABEL: uitofp_8i16_to_8f32:
2487 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2488 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2489 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2490 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2491 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2494 ; AVX2-LABEL: uitofp_8i16_to_8f32:
2496 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2497 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2500 ; AVX512-LABEL: uitofp_8i16_to_8f32:
2502 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2503 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
2505 %cvt = uitofp <8 x i16> %a to <8 x float>
2506 ret <8 x float> %cvt
2509 define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) {
2510 ; SSE-LABEL: uitofp_8i8_to_8f32:
2512 ; SSE-NEXT: pxor %xmm1, %xmm1
2513 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2514 ; SSE-NEXT: movdqa %xmm0, %xmm2
2515 ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2516 ; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
2517 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2518 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
2519 ; SSE-NEXT: movaps %xmm2, %xmm0
2522 ; AVX1-LABEL: uitofp_8i8_to_8f32:
2524 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2525 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2526 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2527 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2528 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2531 ; AVX2-LABEL: uitofp_8i8_to_8f32:
2533 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2534 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2537 ; AVX512-LABEL: uitofp_8i8_to_8f32:
2539 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2540 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
2542 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2543 %cvt = uitofp <8 x i8> %shuf to <8 x float>
2544 ret <8 x float> %cvt
2547 define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
2548 ; SSE-LABEL: uitofp_16i8_to_8f32:
2550 ; SSE-NEXT: pxor %xmm1, %xmm1
2551 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2552 ; SSE-NEXT: movdqa %xmm0, %xmm2
2553 ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2554 ; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
2555 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2556 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
2557 ; SSE-NEXT: movaps %xmm2, %xmm0
2560 ; AVX1-LABEL: uitofp_16i8_to_8f32:
2562 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2563 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2564 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2565 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2566 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2569 ; AVX2-LABEL: uitofp_16i8_to_8f32:
2571 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2572 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2575 ; AVX512-LABEL: uitofp_16i8_to_8f32:
2577 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2578 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
2579 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
2581 %cvt = uitofp <16 x i8> %a to <16 x float>
2582 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2583 ret <8 x float> %shuf
2587 ; Load Signed Integer to Double
2590 define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) {
2591 ; SSE-LABEL: sitofp_load_2i64_to_2f64:
2593 ; SSE-NEXT: movdqa (%rdi), %xmm1
2594 ; SSE-NEXT: movq %xmm1, %rax
2595 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
2596 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2597 ; SSE-NEXT: movq %xmm1, %rax
2598 ; SSE-NEXT: xorps %xmm1, %xmm1
2599 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
2600 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2603 ; VEX-LABEL: sitofp_load_2i64_to_2f64:
2605 ; VEX-NEXT: vmovdqa (%rdi), %xmm0
2606 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
2607 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
2608 ; VEX-NEXT: vmovq %xmm0, %rax
2609 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2610 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2613 ; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
2615 ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
2616 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2617 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
2618 ; AVX512F-NEXT: vmovq %xmm0, %rax
2619 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2620 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2621 ; AVX512F-NEXT: retq
2623 ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
2625 ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
2626 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2627 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
2628 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2629 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2630 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2631 ; AVX512VL-NEXT: retq
2633 ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
2635 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
2636 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
2637 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2638 ; AVX512DQ-NEXT: vzeroupper
2639 ; AVX512DQ-NEXT: retq
2641 ; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64:
2642 ; AVX512VLDQ: # BB#0:
2643 ; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %xmm0
2644 ; AVX512VLDQ-NEXT: retq
2645 %ld = load <2 x i64>, <2 x i64> *%a
2646 %cvt = sitofp <2 x i64> %ld to <2 x double>
2647 ret <2 x double> %cvt
2650 define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) {
2651 ; SSE-LABEL: sitofp_load_2i32_to_2f64:
2653 ; SSE-NEXT: cvtdq2pd (%rdi), %xmm0
2656 ; VEX-LABEL: sitofp_load_2i32_to_2f64:
2658 ; VEX-NEXT: vcvtdq2pd (%rdi), %xmm0
2661 ; AVX512F-LABEL: sitofp_load_2i32_to_2f64:
2663 ; AVX512F-NEXT: vcvtdq2pd (%rdi), %xmm0
2664 ; AVX512F-NEXT: retq
2666 ; AVX512VL-LABEL: sitofp_load_2i32_to_2f64:
2668 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
2669 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2670 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
2671 ; AVX512VL-NEXT: retq
2673 ; AVX512DQ-LABEL: sitofp_load_2i32_to_2f64:
2675 ; AVX512DQ-NEXT: vcvtdq2pd (%rdi), %xmm0
2676 ; AVX512DQ-NEXT: retq
2678 ; AVX512VLDQ-LABEL: sitofp_load_2i32_to_2f64:
2679 ; AVX512VLDQ: # BB#0:
2680 ; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
2681 ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2682 ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
2683 ; AVX512VLDQ-NEXT: retq
2684 %ld = load <2 x i32>, <2 x i32> *%a
2685 %cvt = sitofp <2 x i32> %ld to <2 x double>
2686 ret <2 x double> %cvt
2689 define <2 x double> @sitofp_load_2i16_to_2f64(<2 x i16> *%a) {
2690 ; SSE-LABEL: sitofp_load_2i16_to_2f64:
2692 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2693 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
2694 ; SSE-NEXT: psrad $16, %xmm0
2695 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
2698 ; AVX-LABEL: sitofp_load_2i16_to_2f64:
2700 ; AVX-NEXT: vpmovsxwq (%rdi), %xmm0
2701 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2702 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
2704 %ld = load <2 x i16>, <2 x i16> *%a
2705 %cvt = sitofp <2 x i16> %ld to <2 x double>
2706 ret <2 x double> %cvt
2709 define <2 x double> @sitofp_load_2i8_to_2f64(<2 x i8> *%a) {
2710 ; SSE-LABEL: sitofp_load_2i8_to_2f64:
2712 ; SSE-NEXT: movzwl (%rdi), %eax
2713 ; SSE-NEXT: movd %eax, %xmm0
2714 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2715 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2716 ; SSE-NEXT: psrad $24, %xmm0
2717 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
2720 ; AVX-LABEL: sitofp_load_2i8_to_2f64:
2722 ; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
2723 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2724 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
2726 %ld = load <2 x i8>, <2 x i8> *%a
2727 %cvt = sitofp <2 x i8> %ld to <2 x double>
2728 ret <2 x double> %cvt
2731 define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) {
2732 ; SSE-LABEL: sitofp_load_4i64_to_4f64:
2734 ; SSE-NEXT: movdqa (%rdi), %xmm1
2735 ; SSE-NEXT: movdqa 16(%rdi), %xmm2
2736 ; SSE-NEXT: movq %xmm1, %rax
2737 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
2738 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2739 ; SSE-NEXT: movq %xmm1, %rax
2740 ; SSE-NEXT: xorps %xmm1, %xmm1
2741 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
2742 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2743 ; SSE-NEXT: movq %xmm2, %rax
2744 ; SSE-NEXT: xorps %xmm1, %xmm1
2745 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
2746 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
2747 ; SSE-NEXT: movq %xmm2, %rax
2748 ; SSE-NEXT: xorps %xmm2, %xmm2
2749 ; SSE-NEXT: cvtsi2sdq %rax, %xmm2
2750 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2753 ; AVX1-LABEL: sitofp_load_4i64_to_4f64:
2755 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
2756 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2757 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
2758 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
2759 ; AVX1-NEXT: vmovq %xmm1, %rax
2760 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2761 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2762 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
2763 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
2764 ; AVX1-NEXT: vmovq %xmm0, %rax
2765 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2766 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2767 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2770 ; AVX2-LABEL: sitofp_load_4i64_to_4f64:
2772 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
2773 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2774 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
2775 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
2776 ; AVX2-NEXT: vmovq %xmm1, %rax
2777 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2778 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2779 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
2780 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
2781 ; AVX2-NEXT: vmovq %xmm0, %rax
2782 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2783 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2784 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2787 ; AVX512F-LABEL: sitofp_load_4i64_to_4f64:
2789 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
2790 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
2791 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
2792 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
2793 ; AVX512F-NEXT: vmovq %xmm1, %rax
2794 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2795 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2796 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2797 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
2798 ; AVX512F-NEXT: vmovq %xmm0, %rax
2799 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2800 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2801 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2802 ; AVX512F-NEXT: retq
2804 ; AVX512VL-LABEL: sitofp_load_4i64_to_4f64:
2806 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
2807 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
2808 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
2809 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
2810 ; AVX512VL-NEXT: vmovq %xmm1, %rax
2811 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2812 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2813 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2814 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
2815 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2816 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2817 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2818 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2819 ; AVX512VL-NEXT: retq
2821 ; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64:
2823 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
2824 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
2825 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
2826 ; AVX512DQ-NEXT: retq
2828 ; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f64:
2829 ; AVX512VLDQ: # BB#0:
2830 ; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %ymm0
2831 ; AVX512VLDQ-NEXT: retq
2832 %ld = load <4 x i64>, <4 x i64> *%a
2833 %cvt = sitofp <4 x i64> %ld to <4 x double>
2834 ret <4 x double> %cvt
2837 define <4 x double> @sitofp_load_4i32_to_4f64(<4 x i32> *%a) {
2838 ; SSE-LABEL: sitofp_load_4i32_to_4f64:
2840 ; SSE-NEXT: movdqa (%rdi), %xmm1
2841 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
2842 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2843 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
2846 ; AVX-LABEL: sitofp_load_4i32_to_4f64:
2848 ; AVX-NEXT: vcvtdq2pd (%rdi), %ymm0
2850 %ld = load <4 x i32>, <4 x i32> *%a
2851 %cvt = sitofp <4 x i32> %ld to <4 x double>
2852 ret <4 x double> %cvt
2855 define <4 x double> @sitofp_load_4i16_to_4f64(<4 x i16> *%a) {
2856 ; SSE-LABEL: sitofp_load_4i16_to_4f64:
2858 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2859 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2860 ; SSE-NEXT: psrad $16, %xmm1
2861 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
2862 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2863 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
2866 ; AVX-LABEL: sitofp_load_4i16_to_4f64:
2868 ; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
2869 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
2871 %ld = load <4 x i16>, <4 x i16> *%a
2872 %cvt = sitofp <4 x i16> %ld to <4 x double>
2873 ret <4 x double> %cvt
2876 define <4 x double> @sitofp_load_4i8_to_4f64(<4 x i8> *%a) {
2877 ; SSE-LABEL: sitofp_load_4i8_to_4f64:
2879 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2880 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2881 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2882 ; SSE-NEXT: psrad $24, %xmm1
2883 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
2884 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2885 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
2888 ; AVX-LABEL: sitofp_load_4i8_to_4f64:
2890 ; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
2891 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
2893 %ld = load <4 x i8>, <4 x i8> *%a
2894 %cvt = sitofp <4 x i8> %ld to <4 x double>
2895 ret <4 x double> %cvt
2899 ; Load Unsigned Integer to Double
2902 define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) {
2903 ; SSE-LABEL: uitofp_load_2i64_to_2f64:
2905 ; SSE-NEXT: movdqa (%rdi), %xmm1
2906 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
2907 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
2908 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2909 ; SSE-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
2910 ; SSE-NEXT: subpd %xmm4, %xmm1
2911 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
2912 ; SSE-NEXT: addpd %xmm1, %xmm0
2913 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
2914 ; SSE-NEXT: subpd %xmm4, %xmm3
2915 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
2916 ; SSE-NEXT: addpd %xmm3, %xmm1
2917 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2920 ; VEX-LABEL: uitofp_load_2i64_to_2f64:
2922 ; VEX-NEXT: vmovdqa (%rdi), %xmm0
2923 ; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
2924 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2925 ; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
2926 ; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
2927 ; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2928 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2929 ; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
2930 ; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
2933 ; AVX512F-LABEL: uitofp_load_2i64_to_2f64:
2935 ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
2936 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2937 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
2938 ; AVX512F-NEXT: vmovq %xmm0, %rax
2939 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
2940 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2941 ; AVX512F-NEXT: retq
2943 ; AVX512VL-LABEL: uitofp_load_2i64_to_2f64:
2945 ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
2946 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2947 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
2948 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2949 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
2950 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2951 ; AVX512VL-NEXT: retq
2953 ; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64:
2955 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
2956 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
2957 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2958 ; AVX512DQ-NEXT: vzeroupper
2959 ; AVX512DQ-NEXT: retq
2961 ; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64:
2962 ; AVX512VLDQ: # BB#0:
2963 ; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %xmm0
2964 ; AVX512VLDQ-NEXT: retq
2965 %ld = load <2 x i64>, <2 x i64> *%a
2966 %cvt = uitofp <2 x i64> %ld to <2 x double>
2967 ret <2 x double> %cvt
2970 define <2 x double> @uitofp_load_2i32_to_2f64(<2 x i32> *%a) {
2971 ; SSE-LABEL: uitofp_load_2i32_to_2f64:
2973 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2974 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
2975 ; SSE-NEXT: pand %xmm0, %xmm1
2976 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
2977 ; SSE-NEXT: psrld $16, %xmm0
2978 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
2979 ; SSE-NEXT: mulpd {{.*}}(%rip), %xmm0
2980 ; SSE-NEXT: addpd %xmm1, %xmm0
2983 ; VEX-LABEL: uitofp_load_2i32_to_2f64:
2985 ; VEX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2986 ; VEX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2987 ; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
2988 ; VEX-NEXT: vcvtdq2pd %xmm1, %xmm1
2989 ; VEX-NEXT: vpsrld $16, %xmm0, %xmm0
2990 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
2991 ; VEX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
2992 ; VEX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
2995 ; AVX512F-LABEL: uitofp_load_2i32_to_2f64:
2997 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2998 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
2999 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
3000 ; AVX512F-NEXT: vzeroupper
3001 ; AVX512F-NEXT: retq
3003 ; AVX512VL-LABEL: uitofp_load_2i32_to_2f64:
3005 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3006 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3007 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
3008 ; AVX512VL-NEXT: retq
3010 ; AVX512DQ-LABEL: uitofp_load_2i32_to_2f64:
3012 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
3013 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
3014 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
3015 ; AVX512DQ-NEXT: vzeroupper
3016 ; AVX512DQ-NEXT: retq
3018 ; AVX512VLDQ-LABEL: uitofp_load_2i32_to_2f64:
3019 ; AVX512VLDQ: # BB#0:
3020 ; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
3021 ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3022 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
3023 ; AVX512VLDQ-NEXT: retq
3024 %ld = load <2 x i32>, <2 x i32> *%a
3025 %cvt = uitofp <2 x i32> %ld to <2 x double>
3026 ret <2 x double> %cvt
3029 define <2 x double> @uitofp_load_2i16_to_2f64(<2 x i16> *%a) {
3030 ; SSE-LABEL: uitofp_load_2i16_to_2f64:
3032 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3033 ; SSE-NEXT: pxor %xmm1, %xmm1
3034 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3035 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
3038 ; VEX-LABEL: uitofp_load_2i16_to_2f64:
3040 ; VEX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3041 ; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3042 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
3045 ; AVX512F-LABEL: uitofp_load_2i16_to_2f64:
3047 ; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3048 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3049 ; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
3050 ; AVX512F-NEXT: retq
3052 ; AVX512VL-LABEL: uitofp_load_2i16_to_2f64:
3054 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
3055 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3056 ; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
3057 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3058 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
3059 ; AVX512VL-NEXT: retq
3061 ; AVX512DQ-LABEL: uitofp_load_2i16_to_2f64:
3063 ; AVX512DQ-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3064 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3065 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3066 ; AVX512DQ-NEXT: retq
3068 ; AVX512VLDQ-LABEL: uitofp_load_2i16_to_2f64:
3069 ; AVX512VLDQ: # BB#0:
3070 ; AVX512VLDQ-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
3071 ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3072 ; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
3073 ; AVX512VLDQ-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3074 ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3075 ; AVX512VLDQ-NEXT: retq
3076 %ld = load <2 x i16>, <2 x i16> *%a
3077 %cvt = uitofp <2 x i16> %ld to <2 x double>
3078 ret <2 x double> %cvt
3081 define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) {
3082 ; SSE-LABEL: uitofp_load_2i8_to_2f64:
3084 ; SSE-NEXT: movzwl (%rdi), %eax
3085 ; SSE-NEXT: movd %eax, %xmm0
3086 ; SSE-NEXT: pxor %xmm1, %xmm1
3087 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3088 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3089 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
3092 ; VEX-LABEL: uitofp_load_2i8_to_2f64:
3094 ; VEX-NEXT: movzwl (%rdi), %eax
3095 ; VEX-NEXT: vmovd %eax, %xmm0
3096 ; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3097 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
3100 ; AVX512F-LABEL: uitofp_load_2i8_to_2f64:
3102 ; AVX512F-NEXT: movzwl (%rdi), %eax
3103 ; AVX512F-NEXT: vmovd %eax, %xmm0
3104 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3105 ; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
3106 ; AVX512F-NEXT: retq
3108 ; AVX512VL-LABEL: uitofp_load_2i8_to_2f64:
3110 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
3111 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
3112 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3113 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
3114 ; AVX512VL-NEXT: retq
3116 ; AVX512DQ-LABEL: uitofp_load_2i8_to_2f64:
3118 ; AVX512DQ-NEXT: movzwl (%rdi), %eax
3119 ; AVX512DQ-NEXT: vmovd %eax, %xmm0
3120 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3121 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3122 ; AVX512DQ-NEXT: retq
3124 ; AVX512VLDQ-LABEL: uitofp_load_2i8_to_2f64:
3125 ; AVX512VLDQ: # BB#0:
3126 ; AVX512VLDQ-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
3127 ; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
3128 ; AVX512VLDQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3129 ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3130 ; AVX512VLDQ-NEXT: retq
3131 %ld = load <2 x i8>, <2 x i8> *%a
3132 %cvt = uitofp <2 x i8> %ld to <2 x double>
3133 ret <2 x double> %cvt
3136 define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) {
3137 ; SSE-LABEL: uitofp_load_4i64_to_4f64:
3139 ; SSE-NEXT: movdqa (%rdi), %xmm1
3140 ; SSE-NEXT: movdqa 16(%rdi), %xmm2
3141 ; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
3142 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
3143 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
3144 ; SSE-NEXT: movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25]
3145 ; SSE-NEXT: subpd %xmm5, %xmm1
3146 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
3147 ; SSE-NEXT: addpd %xmm1, %xmm0
3148 ; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
3149 ; SSE-NEXT: subpd %xmm5, %xmm4
3150 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[2,3,0,1]
3151 ; SSE-NEXT: addpd %xmm4, %xmm1
3152 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3153 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
3154 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3155 ; SSE-NEXT: subpd %xmm5, %xmm2
3156 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
3157 ; SSE-NEXT: addpd %xmm2, %xmm1
3158 ; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
3159 ; SSE-NEXT: subpd %xmm5, %xmm4
3160 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
3161 ; SSE-NEXT: addpd %xmm4, %xmm2
3162 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3165 ; AVX1-LABEL: uitofp_load_4i64_to_4f64:
3167 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
3168 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3169 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
3170 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3171 ; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
3172 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3173 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3174 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3175 ; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
3176 ; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
3177 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3178 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3179 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
3180 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3181 ; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
3182 ; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
3183 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3186 ; AVX2-LABEL: uitofp_load_4i64_to_4f64:
3188 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3189 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3190 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
3191 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3192 ; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
3193 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3194 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3195 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3196 ; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
3197 ; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
3198 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3199 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3200 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
3201 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3202 ; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
3203 ; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
3204 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3207 ; AVX512F-LABEL: uitofp_load_4i64_to_4f64:
3209 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3210 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
3211 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
3212 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
3213 ; AVX512F-NEXT: vmovq %xmm1, %rax
3214 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
3215 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3216 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3217 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
3218 ; AVX512F-NEXT: vmovq %xmm0, %rax
3219 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
3220 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3221 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3222 ; AVX512F-NEXT: retq
3224 ; AVX512VL-LABEL: uitofp_load_4i64_to_4f64:
3226 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
3227 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
3228 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
3229 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
3230 ; AVX512VL-NEXT: vmovq %xmm1, %rax
3231 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
3232 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3233 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3234 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
3235 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3236 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
3237 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3238 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3239 ; AVX512VL-NEXT: retq
3241 ; AVX512DQ-LABEL: uitofp_load_4i64_to_4f64:
3243 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
3244 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
3245 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
3246 ; AVX512DQ-NEXT: retq
3248 ; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f64:
3249 ; AVX512VLDQ: # BB#0:
3250 ; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %ymm0
3251 ; AVX512VLDQ-NEXT: retq
3252 %ld = load <4 x i64>, <4 x i64> *%a
3253 %cvt = uitofp <4 x i64> %ld to <4 x double>
3254 ret <4 x double> %cvt
3257 define <4 x double> @uitofp_load_4i32_to_4f64(<4 x i32> *%a) {
3258 ; SSE-LABEL: uitofp_load_4i32_to_4f64:
3260 ; SSE-NEXT: movdqa (%rdi), %xmm0
3261 ; SSE-NEXT: movdqa %xmm0, %xmm1
3262 ; SSE-NEXT: psrld $16, %xmm1
3263 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
3264 ; SSE-NEXT: movapd {{.*#+}} xmm2 = [6.553600e+04,6.553600e+04]
3265 ; SSE-NEXT: mulpd %xmm2, %xmm1
3266 ; SSE-NEXT: movdqa {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
3267 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
3268 ; SSE-NEXT: pand %xmm3, %xmm0
3269 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
3270 ; SSE-NEXT: addpd %xmm1, %xmm0
3271 ; SSE-NEXT: movdqa %xmm4, %xmm1
3272 ; SSE-NEXT: psrld $16, %xmm1
3273 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm5
3274 ; SSE-NEXT: mulpd %xmm2, %xmm5
3275 ; SSE-NEXT: pand %xmm3, %xmm4
3276 ; SSE-NEXT: cvtdq2pd %xmm4, %xmm1
3277 ; SSE-NEXT: addpd %xmm5, %xmm1
3280 ; AVX1-LABEL: uitofp_load_4i32_to_4f64:
3282 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
3283 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
3284 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
3285 ; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
3286 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
3287 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
3288 ; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
3289 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
3292 ; AVX2-LABEL: uitofp_load_4i32_to_4f64:
3294 ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
3295 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
3296 ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
3297 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
3298 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
3299 ; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
3300 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
3301 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
3302 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
3305 ; AVX512F-LABEL: uitofp_load_4i32_to_4f64:
3307 ; AVX512F-NEXT: vmovaps (%rdi), %xmm0
3308 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
3309 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
3310 ; AVX512F-NEXT: retq
3312 ; AVX512VL-LABEL: uitofp_load_4i32_to_4f64:
3314 ; AVX512VL-NEXT: vcvtudq2pd (%rdi), %ymm0
3315 ; AVX512VL-NEXT: retq
3317 ; AVX512DQ-LABEL: uitofp_load_4i32_to_4f64:
3319 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
3320 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
3321 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
3322 ; AVX512DQ-NEXT: retq
3324 ; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f64:
3325 ; AVX512VLDQ: # BB#0:
3326 ; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %ymm0
3327 ; AVX512VLDQ-NEXT: retq
3328 %ld = load <4 x i32>, <4 x i32> *%a
3329 %cvt = uitofp <4 x i32> %ld to <4 x double>
3330 ret <4 x double> %cvt
3333 define <4 x double> @uitofp_load_4i16_to_4f64(<4 x i16> *%a) {
3334 ; SSE-LABEL: uitofp_load_4i16_to_4f64:
3336 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
3337 ; SSE-NEXT: pxor %xmm0, %xmm0
3338 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3339 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
3340 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3341 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
3344 ; AVX-LABEL: uitofp_load_4i16_to_4f64:
3346 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3347 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
3349 %ld = load <4 x i16>, <4 x i16> *%a
3350 %cvt = uitofp <4 x i16> %ld to <4 x double>
3351 ret <4 x double> %cvt
3354 define <4 x double> @uitofp_load_4i8_to_4f64(<4 x i8> *%a) {
3355 ; SSE-LABEL: uitofp_load_4i8_to_4f64:
3357 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
3358 ; SSE-NEXT: pxor %xmm0, %xmm0
3359 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3360 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3361 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
3362 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3363 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
3366 ; AVX-LABEL: uitofp_load_4i8_to_4f64:
3368 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
3369 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
3371 %ld = load <4 x i8>, <4 x i8> *%a
3372 %cvt = uitofp <4 x i8> %ld to <4 x double>
3373 ret <4 x double> %cvt
3377 ; Load Signed Integer to Float
3380 define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
3381 ; SSE-LABEL: sitofp_load_4i64_to_4f32:
3383 ; SSE-NEXT: movdqa (%rdi), %xmm1
3384 ; SSE-NEXT: movdqa 16(%rdi), %xmm2
3385 ; SSE-NEXT: movq %xmm2, %rax
3386 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
3387 ; SSE-NEXT: movq %xmm1, %rax
3388 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
3389 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3390 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
3391 ; SSE-NEXT: movq %xmm2, %rax
3392 ; SSE-NEXT: xorps %xmm2, %xmm2
3393 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3394 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3395 ; SSE-NEXT: movq %xmm1, %rax
3396 ; SSE-NEXT: xorps %xmm1, %xmm1
3397 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3398 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3399 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3402 ; AVX1-LABEL: sitofp_load_4i64_to_4f32:
3404 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
3405 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3406 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3407 ; AVX1-NEXT: vmovq %xmm0, %rax
3408 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3409 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3410 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3411 ; AVX1-NEXT: vmovq %xmm0, %rax
3412 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3413 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3414 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3415 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3416 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3417 ; AVX1-NEXT: vzeroupper
3420 ; AVX2-LABEL: sitofp_load_4i64_to_4f32:
3422 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3423 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3424 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3425 ; AVX2-NEXT: vmovq %xmm0, %rax
3426 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3427 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3428 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3429 ; AVX2-NEXT: vmovq %xmm0, %rax
3430 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3431 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3432 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3433 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3434 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3435 ; AVX2-NEXT: vzeroupper
3438 ; AVX512F-LABEL: sitofp_load_4i64_to_4f32:
3440 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3441 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3442 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3443 ; AVX512F-NEXT: vmovq %xmm0, %rax
3444 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3445 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3446 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
3447 ; AVX512F-NEXT: vmovq %xmm0, %rax
3448 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3449 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3450 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3451 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3452 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3453 ; AVX512F-NEXT: vzeroupper
3454 ; AVX512F-NEXT: retq
3456 ; AVX512VL-LABEL: sitofp_load_4i64_to_4f32:
3458 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
3459 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3460 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3461 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3462 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3463 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3464 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
3465 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3466 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3467 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3468 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3469 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3470 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3471 ; AVX512VL-NEXT: vzeroupper
3472 ; AVX512VL-NEXT: retq
3474 ; AVX512DQ-LABEL: sitofp_load_4i64_to_4f32:
3476 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
3477 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
3478 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
3479 ; AVX512DQ-NEXT: vzeroupper
3480 ; AVX512DQ-NEXT: retq
3482 ; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f32:
3483 ; AVX512VLDQ: # BB#0:
3484 ; AVX512VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0
3485 ; AVX512VLDQ-NEXT: retq
3486 %ld = load <4 x i64>, <4 x i64> *%a
3487 %cvt = sitofp <4 x i64> %ld to <4 x float>
3488 ret <4 x float> %cvt
3491 define <4 x float> @sitofp_load_4i32_to_4f32(<4 x i32> *%a) {
3492 ; SSE-LABEL: sitofp_load_4i32_to_4f32:
3494 ; SSE-NEXT: cvtdq2ps (%rdi), %xmm0
3497 ; AVX-LABEL: sitofp_load_4i32_to_4f32:
3499 ; AVX-NEXT: vcvtdq2ps (%rdi), %xmm0
3501 %ld = load <4 x i32>, <4 x i32> *%a
3502 %cvt = sitofp <4 x i32> %ld to <4 x float>
3503 ret <4 x float> %cvt
3506 define <4 x float> @sitofp_load_4i16_to_4f32(<4 x i16> *%a) {
3507 ; SSE-LABEL: sitofp_load_4i16_to_4f32:
3509 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3510 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3511 ; SSE-NEXT: psrad $16, %xmm0
3512 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
3515 ; AVX-LABEL: sitofp_load_4i16_to_4f32:
3517 ; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
3518 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
3520 %ld = load <4 x i16>, <4 x i16> *%a
3521 %cvt = sitofp <4 x i16> %ld to <4 x float>
3522 ret <4 x float> %cvt
3525 define <4 x float> @sitofp_load_4i8_to_4f32(<4 x i8> *%a) {
3526 ; SSE-LABEL: sitofp_load_4i8_to_4f32:
3528 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3529 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3530 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3531 ; SSE-NEXT: psrad $24, %xmm0
3532 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
3535 ; AVX-LABEL: sitofp_load_4i8_to_4f32:
3537 ; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
3538 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
3540 %ld = load <4 x i8>, <4 x i8> *%a
3541 %cvt = sitofp <4 x i8> %ld to <4 x float>
3542 ret <4 x float> %cvt
3545 define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
3546 ; SSE-LABEL: sitofp_load_8i64_to_8f32:
3548 ; SSE-NEXT: movdqa (%rdi), %xmm1
3549 ; SSE-NEXT: movdqa 16(%rdi), %xmm2
3550 ; SSE-NEXT: movdqa 32(%rdi), %xmm3
3551 ; SSE-NEXT: movdqa 48(%rdi), %xmm4
3552 ; SSE-NEXT: movq %xmm2, %rax
3553 ; SSE-NEXT: cvtsi2ssq %rax, %xmm5
3554 ; SSE-NEXT: movq %xmm1, %rax
3555 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
3556 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
3557 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
3558 ; SSE-NEXT: movq %xmm2, %rax
3559 ; SSE-NEXT: xorps %xmm2, %xmm2
3560 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3561 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3562 ; SSE-NEXT: movq %xmm1, %rax
3563 ; SSE-NEXT: xorps %xmm1, %xmm1
3564 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3565 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3566 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3567 ; SSE-NEXT: movq %xmm4, %rax
3568 ; SSE-NEXT: xorps %xmm2, %xmm2
3569 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3570 ; SSE-NEXT: movq %xmm3, %rax
3571 ; SSE-NEXT: xorps %xmm1, %xmm1
3572 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3573 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3574 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
3575 ; SSE-NEXT: movq %xmm2, %rax
3576 ; SSE-NEXT: xorps %xmm2, %xmm2
3577 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3578 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
3579 ; SSE-NEXT: movq %xmm3, %rax
3580 ; SSE-NEXT: xorps %xmm3, %xmm3
3581 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
3582 ; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
3583 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
3586 ; AVX1-LABEL: sitofp_load_8i64_to_8f32:
3588 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
3589 ; AVX1-NEXT: vmovdqa 32(%rdi), %ymm1
3590 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
3591 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3592 ; AVX1-NEXT: vmovq %xmm1, %rax
3593 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
3594 ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3595 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3596 ; AVX1-NEXT: vmovq %xmm1, %rax
3597 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3598 ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3599 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
3600 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm1
3601 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
3602 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3603 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3604 ; AVX1-NEXT: vmovq %xmm0, %rax
3605 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3606 ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3607 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3608 ; AVX1-NEXT: vmovq %xmm0, %rax
3609 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3610 ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3611 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3612 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0
3613 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
3614 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3617 ; AVX2-LABEL: sitofp_load_8i64_to_8f32:
3619 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3620 ; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
3621 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
3622 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3623 ; AVX2-NEXT: vmovq %xmm1, %rax
3624 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
3625 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3626 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
3627 ; AVX2-NEXT: vmovq %xmm1, %rax
3628 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3629 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3630 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
3631 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm1
3632 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
3633 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3634 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3635 ; AVX2-NEXT: vmovq %xmm0, %rax
3636 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3637 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3638 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3639 ; AVX2-NEXT: vmovq %xmm0, %rax
3640 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3641 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3642 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3643 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0
3644 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
3645 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3648 ; AVX512F-LABEL: sitofp_load_8i64_to_8f32:
3650 ; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0
3651 ; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm1
3652 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
3653 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3654 ; AVX512F-NEXT: vmovq %xmm1, %rax
3655 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm1
3656 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3657 ; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm2
3658 ; AVX512F-NEXT: vmovq %xmm2, %rax
3659 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
3660 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
3661 ; AVX512F-NEXT: vpextrq $1, %xmm2, %rax
3662 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3663 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3664 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3665 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3666 ; AVX512F-NEXT: vmovq %xmm0, %rax
3667 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3668 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3669 ; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
3670 ; AVX512F-NEXT: vmovq %xmm0, %rax
3671 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3672 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3673 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3674 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0
3675 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
3676 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3677 ; AVX512F-NEXT: retq
3679 ; AVX512VL-LABEL: sitofp_load_8i64_to_8f32:
3681 ; AVX512VL-NEXT: vmovdqa64 (%rdi), %zmm0
3682 ; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm1
3683 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
3684 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3685 ; AVX512VL-NEXT: vmovq %xmm1, %rax
3686 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm1
3687 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3688 ; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm2
3689 ; AVX512VL-NEXT: vmovq %xmm2, %rax
3690 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
3691 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
3692 ; AVX512VL-NEXT: vpextrq $1, %xmm2, %rax
3693 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3694 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3695 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3696 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3697 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3698 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3699 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3700 ; AVX512VL-NEXT: vextracti32x4 $1, %zmm0, %xmm0
3701 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3702 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3703 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3704 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3705 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0
3706 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
3707 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3708 ; AVX512VL-NEXT: retq
3710 ; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32:
3712 ; AVX512DQ-NEXT: vcvtqq2ps (%rdi), %ymm0
3713 ; AVX512DQ-NEXT: retq
3715 ; AVX512VLDQ-LABEL: sitofp_load_8i64_to_8f32:
3716 ; AVX512VLDQ: # BB#0:
3717 ; AVX512VLDQ-NEXT: vcvtqq2ps (%rdi), %ymm0
3718 ; AVX512VLDQ-NEXT: retq
3719 %ld = load <8 x i64>, <8 x i64> *%a
3720 %cvt = sitofp <8 x i64> %ld to <8 x float>
3721 ret <8 x float> %cvt
3724 define <8 x float> @sitofp_load_8i32_to_8f32(<8 x i32> *%a) {
3725 ; SSE-LABEL: sitofp_load_8i32_to_8f32:
3727 ; SSE-NEXT: cvtdq2ps (%rdi), %xmm0
3728 ; SSE-NEXT: cvtdq2ps 16(%rdi), %xmm1
3731 ; AVX-LABEL: sitofp_load_8i32_to_8f32:
3733 ; AVX-NEXT: vcvtdq2ps (%rdi), %ymm0
3735 %ld = load <8 x i32>, <8 x i32> *%a
3736 %cvt = sitofp <8 x i32> %ld to <8 x float>
3737 ret <8 x float> %cvt
3740 define <8 x float> @sitofp_load_8i16_to_8f32(<8 x i16> *%a) {
3741 ; SSE-LABEL: sitofp_load_8i16_to_8f32:
3743 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3744 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3745 ; SSE-NEXT: psrad $16, %xmm0
3746 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
3747 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
3748 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
3749 ; SSE-NEXT: psrad $16, %xmm1
3750 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
3753 ; AVX1-LABEL: sitofp_load_8i16_to_8f32:
3755 ; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0
3756 ; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1
3757 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3758 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
3761 ; AVX2-LABEL: sitofp_load_8i16_to_8f32:
3763 ; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0
3764 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
3767 ; AVX512-LABEL: sitofp_load_8i16_to_8f32:
3769 ; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0
3770 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
3772 %ld = load <8 x i16>, <8 x i16> *%a
3773 %cvt = sitofp <8 x i16> %ld to <8 x float>
3774 ret <8 x float> %cvt
3777 define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) {
3778 ; SSE-LABEL: sitofp_load_8i8_to_8f32:
3780 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3781 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3782 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3783 ; SSE-NEXT: psrad $24, %xmm0
3784 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
3785 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
3786 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3787 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
3788 ; SSE-NEXT: psrad $24, %xmm1
3789 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
3792 ; AVX1-LABEL: sitofp_load_8i8_to_8f32:
3794 ; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0
3795 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
3796 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
3797 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
3798 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3799 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
3802 ; AVX2-LABEL: sitofp_load_8i8_to_8f32:
3804 ; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0
3805 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
3808 ; AVX512-LABEL: sitofp_load_8i8_to_8f32:
3810 ; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0
3811 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
3813 %ld = load <8 x i8>, <8 x i8> *%a
3814 %cvt = sitofp <8 x i8> %ld to <8 x float>
3815 ret <8 x float> %cvt
3819 ; Load Unsigned Integer to Float
3822 define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
3823 ; SSE-LABEL: uitofp_load_4i64_to_4f32:
3825 ; SSE-NEXT: movdqa (%rdi), %xmm1
3826 ; SSE-NEXT: movdqa 16(%rdi), %xmm3
3827 ; SSE-NEXT: movq %xmm3, %rax
3828 ; SSE-NEXT: testq %rax, %rax
3829 ; SSE-NEXT: js .LBB76_1
3831 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3832 ; SSE-NEXT: jmp .LBB76_3
3833 ; SSE-NEXT: .LBB76_1:
3834 ; SSE-NEXT: movq %rax, %rcx
3835 ; SSE-NEXT: shrq %rcx
3836 ; SSE-NEXT: andl $1, %eax
3837 ; SSE-NEXT: orq %rcx, %rax
3838 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3839 ; SSE-NEXT: addss %xmm2, %xmm2
3840 ; SSE-NEXT: .LBB76_3:
3841 ; SSE-NEXT: movq %xmm1, %rax
3842 ; SSE-NEXT: testq %rax, %rax
3843 ; SSE-NEXT: js .LBB76_4
3845 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
3846 ; SSE-NEXT: jmp .LBB76_6
3847 ; SSE-NEXT: .LBB76_4:
3848 ; SSE-NEXT: movq %rax, %rcx
3849 ; SSE-NEXT: shrq %rcx
3850 ; SSE-NEXT: andl $1, %eax
3851 ; SSE-NEXT: orq %rcx, %rax
3852 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
3853 ; SSE-NEXT: addss %xmm0, %xmm0
3854 ; SSE-NEXT: .LBB76_6:
3855 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
3856 ; SSE-NEXT: movq %xmm3, %rax
3857 ; SSE-NEXT: testq %rax, %rax
3858 ; SSE-NEXT: js .LBB76_7
3860 ; SSE-NEXT: xorps %xmm3, %xmm3
3861 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
3862 ; SSE-NEXT: jmp .LBB76_9
3863 ; SSE-NEXT: .LBB76_7:
3864 ; SSE-NEXT: movq %rax, %rcx
3865 ; SSE-NEXT: shrq %rcx
3866 ; SSE-NEXT: andl $1, %eax
3867 ; SSE-NEXT: orq %rcx, %rax
3868 ; SSE-NEXT: xorps %xmm3, %xmm3
3869 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
3870 ; SSE-NEXT: addss %xmm3, %xmm3
3871 ; SSE-NEXT: .LBB76_9:
3872 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3873 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3874 ; SSE-NEXT: movq %xmm1, %rax
3875 ; SSE-NEXT: testq %rax, %rax
3876 ; SSE-NEXT: js .LBB76_10
3877 ; SSE-NEXT: # BB#11:
3878 ; SSE-NEXT: xorps %xmm1, %xmm1
3879 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3880 ; SSE-NEXT: jmp .LBB76_12
3881 ; SSE-NEXT: .LBB76_10:
3882 ; SSE-NEXT: movq %rax, %rcx
3883 ; SSE-NEXT: shrq %rcx
3884 ; SSE-NEXT: andl $1, %eax
3885 ; SSE-NEXT: orq %rcx, %rax
3886 ; SSE-NEXT: xorps %xmm1, %xmm1
3887 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3888 ; SSE-NEXT: addss %xmm1, %xmm1
3889 ; SSE-NEXT: .LBB76_12:
3890 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
3891 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3894 ; AVX1-LABEL: uitofp_load_4i64_to_4f32:
3896 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
3897 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3898 ; AVX1-NEXT: testq %rax, %rax
3899 ; AVX1-NEXT: js .LBB76_1
3900 ; AVX1-NEXT: # BB#2:
3901 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3902 ; AVX1-NEXT: jmp .LBB76_3
3903 ; AVX1-NEXT: .LBB76_1:
3904 ; AVX1-NEXT: movq %rax, %rcx
3905 ; AVX1-NEXT: shrq %rcx
3906 ; AVX1-NEXT: andl $1, %eax
3907 ; AVX1-NEXT: orq %rcx, %rax
3908 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3909 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
3910 ; AVX1-NEXT: .LBB76_3:
3911 ; AVX1-NEXT: vmovq %xmm0, %rax
3912 ; AVX1-NEXT: testq %rax, %rax
3913 ; AVX1-NEXT: js .LBB76_4
3914 ; AVX1-NEXT: # BB#5:
3915 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3916 ; AVX1-NEXT: jmp .LBB76_6
3917 ; AVX1-NEXT: .LBB76_4:
3918 ; AVX1-NEXT: movq %rax, %rcx
3919 ; AVX1-NEXT: shrq %rcx
3920 ; AVX1-NEXT: andl $1, %eax
3921 ; AVX1-NEXT: orq %rcx, %rax
3922 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3923 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
3924 ; AVX1-NEXT: .LBB76_6:
3925 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3926 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3927 ; AVX1-NEXT: vmovq %xmm0, %rax
3928 ; AVX1-NEXT: testq %rax, %rax
3929 ; AVX1-NEXT: js .LBB76_7
3930 ; AVX1-NEXT: # BB#8:
3931 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3932 ; AVX1-NEXT: jmp .LBB76_9
3933 ; AVX1-NEXT: .LBB76_7:
3934 ; AVX1-NEXT: movq %rax, %rcx
3935 ; AVX1-NEXT: shrq %rcx
3936 ; AVX1-NEXT: andl $1, %eax
3937 ; AVX1-NEXT: orq %rcx, %rax
3938 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3939 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
3940 ; AVX1-NEXT: .LBB76_9:
3941 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3942 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3943 ; AVX1-NEXT: testq %rax, %rax
3944 ; AVX1-NEXT: js .LBB76_10
3945 ; AVX1-NEXT: # BB#11:
3946 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3947 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3948 ; AVX1-NEXT: vzeroupper
3950 ; AVX1-NEXT: .LBB76_10:
3951 ; AVX1-NEXT: movq %rax, %rcx
3952 ; AVX1-NEXT: shrq %rcx
3953 ; AVX1-NEXT: andl $1, %eax
3954 ; AVX1-NEXT: orq %rcx, %rax
3955 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3956 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
3957 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3958 ; AVX1-NEXT: vzeroupper
3961 ; AVX2-LABEL: uitofp_load_4i64_to_4f32:
3963 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3964 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3965 ; AVX2-NEXT: testq %rax, %rax
3966 ; AVX2-NEXT: js .LBB76_1
3967 ; AVX2-NEXT: # BB#2:
3968 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3969 ; AVX2-NEXT: jmp .LBB76_3
3970 ; AVX2-NEXT: .LBB76_1:
3971 ; AVX2-NEXT: movq %rax, %rcx
3972 ; AVX2-NEXT: shrq %rcx
3973 ; AVX2-NEXT: andl $1, %eax
3974 ; AVX2-NEXT: orq %rcx, %rax
3975 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3976 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1
3977 ; AVX2-NEXT: .LBB76_3:
3978 ; AVX2-NEXT: vmovq %xmm0, %rax
3979 ; AVX2-NEXT: testq %rax, %rax
3980 ; AVX2-NEXT: js .LBB76_4
3981 ; AVX2-NEXT: # BB#5:
3982 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3983 ; AVX2-NEXT: jmp .LBB76_6
3984 ; AVX2-NEXT: .LBB76_4:
3985 ; AVX2-NEXT: movq %rax, %rcx
3986 ; AVX2-NEXT: shrq %rcx
3987 ; AVX2-NEXT: andl $1, %eax
3988 ; AVX2-NEXT: orq %rcx, %rax
3989 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3990 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
3991 ; AVX2-NEXT: .LBB76_6:
3992 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3993 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3994 ; AVX2-NEXT: vmovq %xmm0, %rax
3995 ; AVX2-NEXT: testq %rax, %rax
3996 ; AVX2-NEXT: js .LBB76_7
3997 ; AVX2-NEXT: # BB#8:
3998 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3999 ; AVX2-NEXT: jmp .LBB76_9
4000 ; AVX2-NEXT: .LBB76_7:
4001 ; AVX2-NEXT: movq %rax, %rcx
4002 ; AVX2-NEXT: shrq %rcx
4003 ; AVX2-NEXT: andl $1, %eax
4004 ; AVX2-NEXT: orq %rcx, %rax
4005 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
4006 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
4007 ; AVX2-NEXT: .LBB76_9:
4008 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
4009 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
4010 ; AVX2-NEXT: testq %rax, %rax
4011 ; AVX2-NEXT: js .LBB76_10
4012 ; AVX2-NEXT: # BB#11:
4013 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
4014 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
4015 ; AVX2-NEXT: vzeroupper
4017 ; AVX2-NEXT: .LBB76_10:
4018 ; AVX2-NEXT: movq %rax, %rcx
4019 ; AVX2-NEXT: shrq %rcx
4020 ; AVX2-NEXT: andl $1, %eax
4021 ; AVX2-NEXT: orq %rcx, %rax
4022 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
4023 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
4024 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
4025 ; AVX2-NEXT: vzeroupper
4028 ; AVX512F-LABEL: uitofp_load_4i64_to_4f32:
4030 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
4031 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
4032 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
4033 ; AVX512F-NEXT: vmovq %xmm0, %rax
4034 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
4035 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
4036 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
4037 ; AVX512F-NEXT: vmovq %xmm0, %rax
4038 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
4039 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
4040 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
4041 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
4042 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
4043 ; AVX512F-NEXT: vzeroupper
4044 ; AVX512F-NEXT: retq
4046 ; AVX512VL-LABEL: uitofp_load_4i64_to_4f32:
4048 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
4049 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
4050 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
4051 ; AVX512VL-NEXT: vmovq %xmm0, %rax
4052 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
4053 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
4054 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
4055 ; AVX512VL-NEXT: vmovq %xmm0, %rax
4056 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
4057 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
4058 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
4059 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
4060 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
4061 ; AVX512VL-NEXT: vzeroupper
4062 ; AVX512VL-NEXT: retq
4064 ; AVX512DQ-LABEL: uitofp_load_4i64_to_4f32:
4066 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
4067 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
4068 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
4069 ; AVX512DQ-NEXT: vzeroupper
4070 ; AVX512DQ-NEXT: retq
4072 ; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f32:
4073 ; AVX512VLDQ: # BB#0:
4074 ; AVX512VLDQ-NEXT: vcvtuqq2psy (%rdi), %xmm0
4075 ; AVX512VLDQ-NEXT: retq
4076 %ld = load <4 x i64>, <4 x i64> *%a
4077 %cvt = uitofp <4 x i64> %ld to <4 x float>
4078 ret <4 x float> %cvt
4081 define <4 x float> @uitofp_load_4i32_to_4f32(<4 x i32> *%a) {
4082 ; SSE-LABEL: uitofp_load_4i32_to_4f32:
4084 ; SSE-NEXT: movdqa (%rdi), %xmm0
4085 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
4086 ; SSE-NEXT: pand %xmm0, %xmm1
4087 ; SSE-NEXT: por {{.*}}(%rip), %xmm1
4088 ; SSE-NEXT: psrld $16, %xmm0
4089 ; SSE-NEXT: por {{.*}}(%rip), %xmm0
4090 ; SSE-NEXT: addps {{.*}}(%rip), %xmm0
4091 ; SSE-NEXT: addps %xmm1, %xmm0
4094 ; AVX1-LABEL: uitofp_load_4i32_to_4f32:
4096 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
4097 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4098 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
4099 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4100 ; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
4101 ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
4104 ; AVX2-LABEL: uitofp_load_4i32_to_4f32:
4106 ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
4107 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
4108 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
4109 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
4110 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
4111 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
4112 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
4113 ; AVX2-NEXT: vaddps %xmm2, %xmm0, %xmm0
4114 ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
4117 ; AVX512F-LABEL: uitofp_load_4i32_to_4f32:
4119 ; AVX512F-NEXT: vmovaps (%rdi), %xmm0
4120 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
4121 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
4122 ; AVX512F-NEXT: vzeroupper
4123 ; AVX512F-NEXT: retq
4125 ; AVX512VL-LABEL: uitofp_load_4i32_to_4f32:
4127 ; AVX512VL-NEXT: vcvtudq2ps (%rdi), %xmm0
4128 ; AVX512VL-NEXT: retq
4130 ; AVX512DQ-LABEL: uitofp_load_4i32_to_4f32:
4132 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
4133 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
4134 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
4135 ; AVX512DQ-NEXT: vzeroupper
4136 ; AVX512DQ-NEXT: retq
4138 ; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f32:
4139 ; AVX512VLDQ: # BB#0:
4140 ; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %xmm0
4141 ; AVX512VLDQ-NEXT: retq
4142 %ld = load <4 x i32>, <4 x i32> *%a
4143 %cvt = uitofp <4 x i32> %ld to <4 x float>
4144 ret <4 x float> %cvt
4147 define <4 x float> @uitofp_load_4i16_to_4f32(<4 x i16> *%a) {
4148 ; SSE-LABEL: uitofp_load_4i16_to_4f32:
4150 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
4151 ; SSE-NEXT: pxor %xmm1, %xmm1
4152 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4153 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4156 ; AVX-LABEL: uitofp_load_4i16_to_4f32:
4158 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4159 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
4161 %ld = load <4 x i16>, <4 x i16> *%a
4162 %cvt = uitofp <4 x i16> %ld to <4 x float>
4163 ret <4 x float> %cvt
4166 define <4 x float> @uitofp_load_4i8_to_4f32(<4 x i8> *%a) {
4167 ; SSE-LABEL: uitofp_load_4i8_to_4f32:
4169 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4170 ; SSE-NEXT: pxor %xmm1, %xmm1
4171 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4172 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4173 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4176 ; AVX-LABEL: uitofp_load_4i8_to_4f32:
4178 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4179 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
4181 %ld = load <4 x i8>, <4 x i8> *%a
4182 %cvt = uitofp <4 x i8> %ld to <4 x float>
4183 ret <4 x float> %cvt
4186 define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
4187 ; SSE-LABEL: uitofp_load_8i64_to_8f32:
4189 ; SSE-NEXT: movdqa (%rdi), %xmm1
4190 ; SSE-NEXT: movdqa 16(%rdi), %xmm5
4191 ; SSE-NEXT: movdqa 32(%rdi), %xmm2
4192 ; SSE-NEXT: movdqa 48(%rdi), %xmm3
4193 ; SSE-NEXT: movq %xmm5, %rax
4194 ; SSE-NEXT: testq %rax, %rax
4195 ; SSE-NEXT: js .LBB80_1
4197 ; SSE-NEXT: cvtsi2ssq %rax, %xmm4
4198 ; SSE-NEXT: jmp .LBB80_3
4199 ; SSE-NEXT: .LBB80_1:
4200 ; SSE-NEXT: movq %rax, %rcx
4201 ; SSE-NEXT: shrq %rcx
4202 ; SSE-NEXT: andl $1, %eax
4203 ; SSE-NEXT: orq %rcx, %rax
4204 ; SSE-NEXT: cvtsi2ssq %rax, %xmm4
4205 ; SSE-NEXT: addss %xmm4, %xmm4
4206 ; SSE-NEXT: .LBB80_3:
4207 ; SSE-NEXT: movq %xmm1, %rax
4208 ; SSE-NEXT: testq %rax, %rax
4209 ; SSE-NEXT: js .LBB80_4
4211 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
4212 ; SSE-NEXT: jmp .LBB80_6
4213 ; SSE-NEXT: .LBB80_4:
4214 ; SSE-NEXT: movq %rax, %rcx
4215 ; SSE-NEXT: shrq %rcx
4216 ; SSE-NEXT: andl $1, %eax
4217 ; SSE-NEXT: orq %rcx, %rax
4218 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
4219 ; SSE-NEXT: addss %xmm0, %xmm0
4220 ; SSE-NEXT: .LBB80_6:
4221 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
4222 ; SSE-NEXT: movq %xmm5, %rax
4223 ; SSE-NEXT: testq %rax, %rax
4224 ; SSE-NEXT: js .LBB80_7
4226 ; SSE-NEXT: cvtsi2ssq %rax, %xmm6
4227 ; SSE-NEXT: jmp .LBB80_9
4228 ; SSE-NEXT: .LBB80_7:
4229 ; SSE-NEXT: movq %rax, %rcx
4230 ; SSE-NEXT: shrq %rcx
4231 ; SSE-NEXT: andl $1, %eax
4232 ; SSE-NEXT: orq %rcx, %rax
4233 ; SSE-NEXT: cvtsi2ssq %rax, %xmm6
4234 ; SSE-NEXT: addss %xmm6, %xmm6
4235 ; SSE-NEXT: .LBB80_9:
4236 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
4237 ; SSE-NEXT: movq %xmm1, %rax
4238 ; SSE-NEXT: testq %rax, %rax
4239 ; SSE-NEXT: js .LBB80_10
4240 ; SSE-NEXT: # BB#11:
4241 ; SSE-NEXT: xorps %xmm5, %xmm5
4242 ; SSE-NEXT: cvtsi2ssq %rax, %xmm5
4243 ; SSE-NEXT: jmp .LBB80_12
4244 ; SSE-NEXT: .LBB80_10:
4245 ; SSE-NEXT: movq %rax, %rcx
4246 ; SSE-NEXT: shrq %rcx
4247 ; SSE-NEXT: andl $1, %eax
4248 ; SSE-NEXT: orq %rcx, %rax
4249 ; SSE-NEXT: xorps %xmm5, %xmm5
4250 ; SSE-NEXT: cvtsi2ssq %rax, %xmm5
4251 ; SSE-NEXT: addss %xmm5, %xmm5
4252 ; SSE-NEXT: .LBB80_12:
4253 ; SSE-NEXT: movq %xmm3, %rax
4254 ; SSE-NEXT: testq %rax, %rax
4255 ; SSE-NEXT: js .LBB80_13
4256 ; SSE-NEXT: # BB#14:
4257 ; SSE-NEXT: cvtsi2ssq %rax, %xmm7
4258 ; SSE-NEXT: jmp .LBB80_15
4259 ; SSE-NEXT: .LBB80_13:
4260 ; SSE-NEXT: movq %rax, %rcx
4261 ; SSE-NEXT: shrq %rcx
4262 ; SSE-NEXT: andl $1, %eax
4263 ; SSE-NEXT: orq %rcx, %rax
4264 ; SSE-NEXT: cvtsi2ssq %rax, %xmm7
4265 ; SSE-NEXT: addss %xmm7, %xmm7
4266 ; SSE-NEXT: .LBB80_15:
4267 ; SSE-NEXT: movq %xmm2, %rax
4268 ; SSE-NEXT: testq %rax, %rax
4269 ; SSE-NEXT: js .LBB80_16
4270 ; SSE-NEXT: # BB#17:
4271 ; SSE-NEXT: xorps %xmm1, %xmm1
4272 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
4273 ; SSE-NEXT: jmp .LBB80_18
4274 ; SSE-NEXT: .LBB80_16:
4275 ; SSE-NEXT: movq %rax, %rcx
4276 ; SSE-NEXT: shrq %rcx
4277 ; SSE-NEXT: andl $1, %eax
4278 ; SSE-NEXT: orq %rcx, %rax
4279 ; SSE-NEXT: xorps %xmm1, %xmm1
4280 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
4281 ; SSE-NEXT: addss %xmm1, %xmm1
4282 ; SSE-NEXT: .LBB80_18:
4283 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
4284 ; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
4285 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
4286 ; SSE-NEXT: movq %xmm3, %rax
4287 ; SSE-NEXT: testq %rax, %rax
4288 ; SSE-NEXT: js .LBB80_19
4289 ; SSE-NEXT: # BB#20:
4290 ; SSE-NEXT: xorps %xmm3, %xmm3
4291 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
4292 ; SSE-NEXT: jmp .LBB80_21
4293 ; SSE-NEXT: .LBB80_19:
4294 ; SSE-NEXT: movq %rax, %rcx
4295 ; SSE-NEXT: shrq %rcx
4296 ; SSE-NEXT: andl $1, %eax
4297 ; SSE-NEXT: orq %rcx, %rax
4298 ; SSE-NEXT: xorps %xmm3, %xmm3
4299 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
4300 ; SSE-NEXT: addss %xmm3, %xmm3
4301 ; SSE-NEXT: .LBB80_21:
4302 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
4303 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
4304 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
4305 ; SSE-NEXT: movq %xmm2, %rax
4306 ; SSE-NEXT: testq %rax, %rax
4307 ; SSE-NEXT: js .LBB80_22
4308 ; SSE-NEXT: # BB#23:
4309 ; SSE-NEXT: xorps %xmm2, %xmm2
4310 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
4311 ; SSE-NEXT: jmp .LBB80_24
4312 ; SSE-NEXT: .LBB80_22:
4313 ; SSE-NEXT: movq %rax, %rcx
4314 ; SSE-NEXT: shrq %rcx
4315 ; SSE-NEXT: andl $1, %eax
4316 ; SSE-NEXT: orq %rcx, %rax
4317 ; SSE-NEXT: xorps %xmm2, %xmm2
4318 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
4319 ; SSE-NEXT: addss %xmm2, %xmm2
4320 ; SSE-NEXT: .LBB80_24:
4321 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
4322 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4325 ; AVX1-LABEL: uitofp_load_8i64_to_8f32:
4327 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
4328 ; AVX1-NEXT: vmovdqa 32(%rdi), %ymm2
4329 ; AVX1-NEXT: vpextrq $1, %xmm2, %rax
4330 ; AVX1-NEXT: testq %rax, %rax
4331 ; AVX1-NEXT: js .LBB80_1
4332 ; AVX1-NEXT: # BB#2:
4333 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
4334 ; AVX1-NEXT: jmp .LBB80_3
4335 ; AVX1-NEXT: .LBB80_1:
4336 ; AVX1-NEXT: movq %rax, %rcx
4337 ; AVX1-NEXT: shrq %rcx
4338 ; AVX1-NEXT: andl $1, %eax
4339 ; AVX1-NEXT: orq %rcx, %rax
4340 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
4341 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
4342 ; AVX1-NEXT: .LBB80_3:
4343 ; AVX1-NEXT: vmovq %xmm2, %rax
4344 ; AVX1-NEXT: testq %rax, %rax
4345 ; AVX1-NEXT: js .LBB80_4
4346 ; AVX1-NEXT: # BB#5:
4347 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm4
4348 ; AVX1-NEXT: jmp .LBB80_6
4349 ; AVX1-NEXT: .LBB80_4:
4350 ; AVX1-NEXT: movq %rax, %rcx
4351 ; AVX1-NEXT: shrq %rcx
4352 ; AVX1-NEXT: andl $1, %eax
4353 ; AVX1-NEXT: orq %rcx, %rax
4354 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
4355 ; AVX1-NEXT: vaddss %xmm3, %xmm3, %xmm4
4356 ; AVX1-NEXT: .LBB80_6:
4357 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
4358 ; AVX1-NEXT: vmovq %xmm2, %rax
4359 ; AVX1-NEXT: testq %rax, %rax
4360 ; AVX1-NEXT: js .LBB80_7
4361 ; AVX1-NEXT: # BB#8:
4362 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3
4363 ; AVX1-NEXT: jmp .LBB80_9
4364 ; AVX1-NEXT: .LBB80_7:
4365 ; AVX1-NEXT: movq %rax, %rcx
4366 ; AVX1-NEXT: shrq %rcx
4367 ; AVX1-NEXT: andl $1, %eax
4368 ; AVX1-NEXT: orq %rcx, %rax
4369 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3
4370 ; AVX1-NEXT: vaddss %xmm3, %xmm3, %xmm3
4371 ; AVX1-NEXT: .LBB80_9:
4372 ; AVX1-NEXT: vpextrq $1, %xmm2, %rax
4373 ; AVX1-NEXT: testq %rax, %rax
4374 ; AVX1-NEXT: js .LBB80_10
4375 ; AVX1-NEXT: # BB#11:
4376 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2
4377 ; AVX1-NEXT: jmp .LBB80_12
4378 ; AVX1-NEXT: .LBB80_10:
4379 ; AVX1-NEXT: movq %rax, %rcx
4380 ; AVX1-NEXT: shrq %rcx
4381 ; AVX1-NEXT: andl $1, %eax
4382 ; AVX1-NEXT: orq %rcx, %rax
4383 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2
4384 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
4385 ; AVX1-NEXT: .LBB80_12:
4386 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
4387 ; AVX1-NEXT: testq %rax, %rax
4388 ; AVX1-NEXT: js .LBB80_13
4389 ; AVX1-NEXT: # BB#14:
4390 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5
4391 ; AVX1-NEXT: jmp .LBB80_15
4392 ; AVX1-NEXT: .LBB80_13:
4393 ; AVX1-NEXT: movq %rax, %rcx
4394 ; AVX1-NEXT: shrq %rcx
4395 ; AVX1-NEXT: andl $1, %eax
4396 ; AVX1-NEXT: orq %rcx, %rax
4397 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5
4398 ; AVX1-NEXT: vaddss %xmm5, %xmm5, %xmm5
4399 ; AVX1-NEXT: .LBB80_15:
4400 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[2,3]
4401 ; AVX1-NEXT: vmovq %xmm0, %rax
4402 ; AVX1-NEXT: testq %rax, %rax
4403 ; AVX1-NEXT: js .LBB80_16
4404 ; AVX1-NEXT: # BB#17:
4405 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm4
4406 ; AVX1-NEXT: jmp .LBB80_18
4407 ; AVX1-NEXT: .LBB80_16:
4408 ; AVX1-NEXT: movq %rax, %rcx
4409 ; AVX1-NEXT: shrq %rcx
4410 ; AVX1-NEXT: andl $1, %eax
4411 ; AVX1-NEXT: orq %rcx, %rax
4412 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm4
4413 ; AVX1-NEXT: vaddss %xmm4, %xmm4, %xmm4
4414 ; AVX1-NEXT: .LBB80_18:
4415 ; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
4416 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
4417 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4418 ; AVX1-NEXT: vmovq %xmm3, %rax
4419 ; AVX1-NEXT: testq %rax, %rax
4420 ; AVX1-NEXT: js .LBB80_19
4421 ; AVX1-NEXT: # BB#20:
4422 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0
4423 ; AVX1-NEXT: jmp .LBB80_21
4424 ; AVX1-NEXT: .LBB80_19:
4425 ; AVX1-NEXT: movq %rax, %rcx
4426 ; AVX1-NEXT: shrq %rcx
4427 ; AVX1-NEXT: andl $1, %eax
4428 ; AVX1-NEXT: orq %rcx, %rax
4429 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0
4430 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
4431 ; AVX1-NEXT: .LBB80_21:
4432 ; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm0[0],xmm4[3]
4433 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm2[0]
4434 ; AVX1-NEXT: vpextrq $1, %xmm3, %rax
4435 ; AVX1-NEXT: testq %rax, %rax
4436 ; AVX1-NEXT: js .LBB80_22
4437 ; AVX1-NEXT: # BB#23:
4438 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm1
4439 ; AVX1-NEXT: jmp .LBB80_24
4440 ; AVX1-NEXT: .LBB80_22:
4441 ; AVX1-NEXT: movq %rax, %rcx
4442 ; AVX1-NEXT: shrq %rcx
4443 ; AVX1-NEXT: andl $1, %eax
4444 ; AVX1-NEXT: orq %rcx, %rax
4445 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm1
4446 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
4447 ; AVX1-NEXT: .LBB80_24:
4448 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm4[0,1,2],xmm1[0]
4449 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4452 ; AVX2-LABEL: uitofp_load_8i64_to_8f32:
4454 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
4455 ; AVX2-NEXT: vmovdqa 32(%rdi), %ymm2
4456 ; AVX2-NEXT: vpextrq $1, %xmm2, %rax
4457 ; AVX2-NEXT: testq %rax, %rax
4458 ; AVX2-NEXT: js .LBB80_1
4459 ; AVX2-NEXT: # BB#2:
4460 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
4461 ; AVX2-NEXT: jmp .LBB80_3
4462 ; AVX2-NEXT: .LBB80_1:
4463 ; AVX2-NEXT: movq %rax, %rcx
4464 ; AVX2-NEXT: shrq %rcx
4465 ; AVX2-NEXT: andl $1, %eax
4466 ; AVX2-NEXT: orq %rcx, %rax
4467 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
4468 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1
4469 ; AVX2-NEXT: .LBB80_3:
4470 ; AVX2-NEXT: vmovq %xmm2, %rax
4471 ; AVX2-NEXT: testq %rax, %rax
4472 ; AVX2-NEXT: js .LBB80_4
4473 ; AVX2-NEXT: # BB#5:
4474 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm4
4475 ; AVX2-NEXT: jmp .LBB80_6
4476 ; AVX2-NEXT: .LBB80_4:
4477 ; AVX2-NEXT: movq %rax, %rcx
4478 ; AVX2-NEXT: shrq %rcx
4479 ; AVX2-NEXT: andl $1, %eax
4480 ; AVX2-NEXT: orq %rcx, %rax
4481 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
4482 ; AVX2-NEXT: vaddss %xmm3, %xmm3, %xmm4
4483 ; AVX2-NEXT: .LBB80_6:
4484 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
4485 ; AVX2-NEXT: vmovq %xmm2, %rax
4486 ; AVX2-NEXT: testq %rax, %rax
4487 ; AVX2-NEXT: js .LBB80_7
4488 ; AVX2-NEXT: # BB#8:
4489 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3
4490 ; AVX2-NEXT: jmp .LBB80_9
4491 ; AVX2-NEXT: .LBB80_7:
4492 ; AVX2-NEXT: movq %rax, %rcx
4493 ; AVX2-NEXT: shrq %rcx
4494 ; AVX2-NEXT: andl $1, %eax
4495 ; AVX2-NEXT: orq %rcx, %rax
4496 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3
4497 ; AVX2-NEXT: vaddss %xmm3, %xmm3, %xmm3
4498 ; AVX2-NEXT: .LBB80_9:
4499 ; AVX2-NEXT: vpextrq $1, %xmm2, %rax
4500 ; AVX2-NEXT: testq %rax, %rax
4501 ; AVX2-NEXT: js .LBB80_10
4502 ; AVX2-NEXT: # BB#11:
4503 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2
4504 ; AVX2-NEXT: jmp .LBB80_12
4505 ; AVX2-NEXT: .LBB80_10:
4506 ; AVX2-NEXT: movq %rax, %rcx
4507 ; AVX2-NEXT: shrq %rcx
4508 ; AVX2-NEXT: andl $1, %eax
4509 ; AVX2-NEXT: orq %rcx, %rax
4510 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2
4511 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
4512 ; AVX2-NEXT: .LBB80_12:
4513 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
4514 ; AVX2-NEXT: testq %rax, %rax
4515 ; AVX2-NEXT: js .LBB80_13
4516 ; AVX2-NEXT: # BB#14:
4517 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5
4518 ; AVX2-NEXT: jmp .LBB80_15
4519 ; AVX2-NEXT: .LBB80_13:
4520 ; AVX2-NEXT: movq %rax, %rcx
4521 ; AVX2-NEXT: shrq %rcx
4522 ; AVX2-NEXT: andl $1, %eax
4523 ; AVX2-NEXT: orq %rcx, %rax
4524 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5
4525 ; AVX2-NEXT: vaddss %xmm5, %xmm5, %xmm5
4526 ; AVX2-NEXT: .LBB80_15:
4527 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[2,3]
4528 ; AVX2-NEXT: vmovq %xmm0, %rax
4529 ; AVX2-NEXT: testq %rax, %rax
4530 ; AVX2-NEXT: js .LBB80_16
4531 ; AVX2-NEXT: # BB#17:
4532 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm4
4533 ; AVX2-NEXT: jmp .LBB80_18
4534 ; AVX2-NEXT: .LBB80_16:
4535 ; AVX2-NEXT: movq %rax, %rcx
4536 ; AVX2-NEXT: shrq %rcx
4537 ; AVX2-NEXT: andl $1, %eax
4538 ; AVX2-NEXT: orq %rcx, %rax
4539 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm4
4540 ; AVX2-NEXT: vaddss %xmm4, %xmm4, %xmm4
4541 ; AVX2-NEXT: .LBB80_18:
4542 ; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
4543 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
4544 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
4545 ; AVX2-NEXT: vmovq %xmm3, %rax
4546 ; AVX2-NEXT: testq %rax, %rax
4547 ; AVX2-NEXT: js .LBB80_19
4548 ; AVX2-NEXT: # BB#20:
4549 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0
4550 ; AVX2-NEXT: jmp .LBB80_21
4551 ; AVX2-NEXT: .LBB80_19:
4552 ; AVX2-NEXT: movq %rax, %rcx
4553 ; AVX2-NEXT: shrq %rcx
4554 ; AVX2-NEXT: andl $1, %eax
4555 ; AVX2-NEXT: orq %rcx, %rax
4556 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0
4557 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
4558 ; AVX2-NEXT: .LBB80_21:
4559 ; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm0[0],xmm4[3]
4560 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm2[0]
4561 ; AVX2-NEXT: vpextrq $1, %xmm3, %rax
4562 ; AVX2-NEXT: testq %rax, %rax
4563 ; AVX2-NEXT: js .LBB80_22
4564 ; AVX2-NEXT: # BB#23:
4565 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm1
4566 ; AVX2-NEXT: jmp .LBB80_24
4567 ; AVX2-NEXT: .LBB80_22:
4568 ; AVX2-NEXT: movq %rax, %rcx
4569 ; AVX2-NEXT: shrq %rcx
4570 ; AVX2-NEXT: andl $1, %eax
4571 ; AVX2-NEXT: orq %rcx, %rax
4572 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm1
4573 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1
4574 ; AVX2-NEXT: .LBB80_24:
4575 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm4[0,1,2],xmm1[0]
4576 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4579 ; AVX512F-LABEL: uitofp_load_8i64_to_8f32:
4581 ; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0
4582 ; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm1
4583 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
4584 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
4585 ; AVX512F-NEXT: vmovq %xmm1, %rax
4586 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm1
4587 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
4588 ; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm2
4589 ; AVX512F-NEXT: vmovq %xmm2, %rax
4590 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm3
4591 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
4592 ; AVX512F-NEXT: vpextrq $1, %xmm2, %rax
4593 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2
4594 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4595 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
4596 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2
4597 ; AVX512F-NEXT: vmovq %xmm0, %rax
4598 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3
4599 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
4600 ; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
4601 ; AVX512F-NEXT: vmovq %xmm0, %rax
4602 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3
4603 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
4604 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
4605 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm0
4606 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
4607 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4608 ; AVX512F-NEXT: retq
4610 ; AVX512VL-LABEL: uitofp_load_8i64_to_8f32:
4612 ; AVX512VL-NEXT: vmovdqa64 (%rdi), %zmm0
4613 ; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm1
4614 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
4615 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
4616 ; AVX512VL-NEXT: vmovq %xmm1, %rax
4617 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm1
4618 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
4619 ; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm2
4620 ; AVX512VL-NEXT: vmovq %xmm2, %rax
4621 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm3
4622 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
4623 ; AVX512VL-NEXT: vpextrq $1, %xmm2, %rax
4624 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2
4625 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4626 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
4627 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2
4628 ; AVX512VL-NEXT: vmovq %xmm0, %rax
4629 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3
4630 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
4631 ; AVX512VL-NEXT: vextracti32x4 $1, %zmm0, %xmm0
4632 ; AVX512VL-NEXT: vmovq %xmm0, %rax
4633 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3
4634 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
4635 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
4636 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm0
4637 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
4638 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4639 ; AVX512VL-NEXT: retq
4641 ; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32:
4643 ; AVX512DQ-NEXT: vcvtuqq2ps (%rdi), %ymm0
4644 ; AVX512DQ-NEXT: retq
4646 ; AVX512VLDQ-LABEL: uitofp_load_8i64_to_8f32:
4647 ; AVX512VLDQ: # BB#0:
4648 ; AVX512VLDQ-NEXT: vcvtuqq2ps (%rdi), %ymm0
4649 ; AVX512VLDQ-NEXT: retq
4650 %ld = load <8 x i64>, <8 x i64> *%a
4651 %cvt = uitofp <8 x i64> %ld to <8 x float>
4652 ret <8 x float> %cvt
4655 define <8 x float> @uitofp_load_8i32_to_8f32(<8 x i32> *%a) {
4656 ; SSE-LABEL: uitofp_load_8i32_to_8f32:
4658 ; SSE-NEXT: movdqa (%rdi), %xmm0
4659 ; SSE-NEXT: movdqa 16(%rdi), %xmm1
4660 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
4661 ; SSE-NEXT: movdqa %xmm0, %xmm3
4662 ; SSE-NEXT: pand %xmm2, %xmm3
4663 ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
4664 ; SSE-NEXT: por %xmm4, %xmm3
4665 ; SSE-NEXT: psrld $16, %xmm0
4666 ; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
4667 ; SSE-NEXT: por %xmm5, %xmm0
4668 ; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
4669 ; SSE-NEXT: addps %xmm6, %xmm0
4670 ; SSE-NEXT: addps %xmm3, %xmm0
4671 ; SSE-NEXT: pand %xmm1, %xmm2
4672 ; SSE-NEXT: por %xmm4, %xmm2
4673 ; SSE-NEXT: psrld $16, %xmm1
4674 ; SSE-NEXT: por %xmm5, %xmm1
4675 ; SSE-NEXT: addps %xmm6, %xmm1
4676 ; SSE-NEXT: addps %xmm2, %xmm1
4679 ; AVX1-LABEL: uitofp_load_8i32_to_8f32:
4681 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
4682 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
4683 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4684 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
4685 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
4686 ; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
4687 ; AVX1-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
4688 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
4689 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4690 ; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
4693 ; AVX2-LABEL: uitofp_load_8i32_to_8f32:
4695 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
4696 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
4697 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
4698 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
4699 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
4700 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
4701 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
4702 ; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
4703 ; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
4706 ; AVX512F-LABEL: uitofp_load_8i32_to_8f32:
4708 ; AVX512F-NEXT: vmovaps (%rdi), %ymm0
4709 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
4710 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
4711 ; AVX512F-NEXT: retq
4713 ; AVX512VL-LABEL: uitofp_load_8i32_to_8f32:
4715 ; AVX512VL-NEXT: vcvtudq2ps (%rdi), %ymm0
4716 ; AVX512VL-NEXT: retq
4718 ; AVX512DQ-LABEL: uitofp_load_8i32_to_8f32:
4720 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
4721 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
4722 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
4723 ; AVX512DQ-NEXT: retq
4725 ; AVX512VLDQ-LABEL: uitofp_load_8i32_to_8f32:
4726 ; AVX512VLDQ: # BB#0:
4727 ; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %ymm0
4728 ; AVX512VLDQ-NEXT: retq
4729 %ld = load <8 x i32>, <8 x i32> *%a
4730 %cvt = uitofp <8 x i32> %ld to <8 x float>
4731 ret <8 x float> %cvt
4734 define <8 x float> @uitofp_load_8i16_to_8f32(<8 x i16> *%a) {
4735 ; SSE-LABEL: uitofp_load_8i16_to_8f32:
4737 ; SSE-NEXT: movdqa (%rdi), %xmm1
4738 ; SSE-NEXT: pxor %xmm2, %xmm2
4739 ; SSE-NEXT: movdqa %xmm1, %xmm0
4740 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4741 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4742 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4743 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
4746 ; AVX1-LABEL: uitofp_load_8i16_to_8f32:
4748 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4749 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4750 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4751 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4754 ; AVX2-LABEL: uitofp_load_8i16_to_8f32:
4756 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
4757 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4760 ; AVX512-LABEL: uitofp_load_8i16_to_8f32:
4762 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
4763 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4765 %ld = load <8 x i16>, <8 x i16> *%a
4766 %cvt = uitofp <8 x i16> %ld to <8 x float>
4767 ret <8 x float> %cvt
4770 define <8 x float> @uitofp_load_8i8_to_8f32(<8 x i8> *%a) {
4771 ; SSE-LABEL: uitofp_load_8i8_to_8f32:
4773 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
4774 ; SSE-NEXT: pxor %xmm2, %xmm2
4775 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4776 ; SSE-NEXT: movdqa %xmm1, %xmm0
4777 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4778 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4779 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4780 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
4783 ; AVX1-LABEL: uitofp_load_8i8_to_8f32:
4785 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4786 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4787 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4788 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4791 ; AVX2-LABEL: uitofp_load_8i8_to_8f32:
4793 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
4794 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4797 ; AVX512-LABEL: uitofp_load_8i8_to_8f32:
4799 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
4800 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4802 %ld = load <8 x i8>, <8 x i8> *%a
4803 %cvt = uitofp <8 x i8> %ld to <8 x float>
4804 ret <8 x float> %cvt
4811 %Arguments = type <{ <8 x i8>, <8 x i16>, <8 x float>* }>
4812 define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) {
4813 ; SSE-LABEL: aggregate_sitofp_8i16_to_8f32:
4815 ; SSE-NEXT: movq 24(%rdi), %rax
4816 ; SSE-NEXT: movdqu 8(%rdi), %xmm0
4817 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
4818 ; SSE-NEXT: psrad $16, %xmm1
4819 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
4820 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
4821 ; SSE-NEXT: psrad $16, %xmm0
4822 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4823 ; SSE-NEXT: movaps %xmm0, 16(%rax)
4824 ; SSE-NEXT: movaps %xmm1, (%rax)
4827 ; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32:
4829 ; AVX1-NEXT: movq 24(%rdi), %rax
4830 ; AVX1-NEXT: vmovdqu 8(%rdi), %xmm0
4831 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
4832 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
4833 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
4834 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4835 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4836 ; AVX1-NEXT: vmovaps %ymm0, (%rax)
4837 ; AVX1-NEXT: vzeroupper
4840 ; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32:
4842 ; AVX2-NEXT: movq 24(%rdi), %rax
4843 ; AVX2-NEXT: vpmovsxwd 8(%rdi), %ymm0
4844 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4845 ; AVX2-NEXT: vmovaps %ymm0, (%rax)
4846 ; AVX2-NEXT: vzeroupper
4849 ; AVX512-LABEL: aggregate_sitofp_8i16_to_8f32:
4851 ; AVX512-NEXT: movq 24(%rdi), %rax
4852 ; AVX512-NEXT: vpmovsxwd 8(%rdi), %ymm0
4853 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4854 ; AVX512-NEXT: vmovaps %ymm0, (%rax)
4855 ; AVX512-NEXT: vzeroupper
4857 %1 = load %Arguments, %Arguments* %a0, align 1
4858 %2 = extractvalue %Arguments %1, 1
4859 %3 = extractvalue %Arguments %1, 2
4860 %4 = sitofp <8 x i16> %2 to <8 x float>
4861 store <8 x float> %4, <8 x float>* %3, align 32
4865 define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind {
4866 ; SSE-LABEL: sitofp_i32_to_2f64:
4868 ; SSE-NEXT: cvtsi2sdl %edi, %xmm0
4871 ; AVX-LABEL: sitofp_i32_to_2f64:
4873 ; AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0
4875 %cvt = sitofp i32 %a1 to double
4876 %res = insertelement <2 x double> %a0, double %cvt, i32 0
4877 ret <2 x double> %res
4880 define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind {
4881 ; SSE-LABEL: sitofp_i32_to_4f32:
4883 ; SSE-NEXT: cvtsi2ssl %edi, %xmm0
4886 ; AVX-LABEL: sitofp_i32_to_4f32:
4888 ; AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
4890 %cvt = sitofp i32 %a1 to float
4891 %res = insertelement <4 x float> %a0, float %cvt, i32 0
4892 ret <4 x float> %res
4895 define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind {
4896 ; SSE-LABEL: sitofp_i64_to_2f64:
4898 ; SSE-NEXT: cvtsi2sdq %rdi, %xmm0
4901 ; AVX-LABEL: sitofp_i64_to_2f64:
4903 ; AVX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
4905 %cvt = sitofp i64 %a1 to double
4906 %res = insertelement <2 x double> %a0, double %cvt, i32 0
4907 ret <2 x double> %res
4910 define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind {
4911 ; SSE-LABEL: sitofp_i64_to_4f32:
4913 ; SSE-NEXT: cvtsi2ssq %rdi, %xmm0
4916 ; AVX-LABEL: sitofp_i64_to_4f32:
4918 ; AVX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
4920 %cvt = sitofp i64 %a1 to float
4921 %res = insertelement <4 x float> %a0, float %cvt, i32 0
4922 ret <4 x float> %res