1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=VEX --check-prefix=AVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VLDQ
10 ; 32-bit tests to make sure we're not doing anything stupid.
11 ; RUN: llc < %s -mtriple=i686-unknown-unknown
12 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse
13 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2
16 ; Signed Integer to Double
19 define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
20 ; SSE-LABEL: sitofp_2i64_to_2f64:
22 ; SSE-NEXT: movd %xmm0, %rax
23 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
24 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
25 ; SSE-NEXT: movd %xmm0, %rax
26 ; SSE-NEXT: xorps %xmm0, %xmm0
27 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
28 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
29 ; SSE-NEXT: movapd %xmm1, %xmm0
32 ; VEX-LABEL: sitofp_2i64_to_2f64:
34 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
35 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
36 ; VEX-NEXT: vmovq %xmm0, %rax
37 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
38 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
41 ; AVX512F-LABEL: sitofp_2i64_to_2f64:
43 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
44 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
45 ; AVX512F-NEXT: vmovq %xmm0, %rax
46 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
47 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
50 ; AVX512VL-LABEL: sitofp_2i64_to_2f64:
52 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
53 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
54 ; AVX512VL-NEXT: vmovq %xmm0, %rax
55 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
56 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
59 ; AVX512DQ-LABEL: sitofp_2i64_to_2f64:
61 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
62 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
63 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
66 ; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64:
68 ; AVX512VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
69 ; AVX512VLDQ-NEXT: retq
70 %cvt = sitofp <2 x i64> %a to <2 x double>
74 define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) {
75 ; SSE-LABEL: sitofp_2i32_to_2f64:
77 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
80 ; AVX-LABEL: sitofp_2i32_to_2f64:
82 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
84 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
85 %cvt = sitofp <2 x i32> %shuf to <2 x double>
89 define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
90 ; SSE-LABEL: sitofp_4i32_to_2f64:
92 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
95 ; VEX-LABEL: sitofp_4i32_to_2f64:
97 ; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0
98 ; VEX-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
99 ; VEX-NEXT: vzeroupper
102 ; AVX512-LABEL: sitofp_4i32_to_2f64:
104 ; AVX512-NEXT: vcvtdq2pd %xmm0, %ymm0
105 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
107 %cvt = sitofp <4 x i32> %a to <4 x double>
108 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
109 ret <2 x double> %shuf
112 define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) {
113 ; SSE-LABEL: sitofp_2i16_to_2f64:
115 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
116 ; SSE-NEXT: psrad $16, %xmm0
117 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
120 ; AVX-LABEL: sitofp_2i16_to_2f64:
122 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
123 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
125 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
126 %cvt = sitofp <2 x i16> %shuf to <2 x double>
127 ret <2 x double> %cvt
130 define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
131 ; SSE-LABEL: sitofp_8i16_to_2f64:
133 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
134 ; SSE-NEXT: psrad $16, %xmm0
135 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
138 ; AVX1-LABEL: sitofp_8i16_to_2f64:
140 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
141 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
142 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
143 ; AVX1-NEXT: vzeroupper
146 ; AVX2-LABEL: sitofp_8i16_to_2f64:
148 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
149 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
150 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
151 ; AVX2-NEXT: vzeroupper
154 ; AVX512-LABEL: sitofp_8i16_to_2f64:
156 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
157 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
158 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
160 %cvt = sitofp <8 x i16> %a to <8 x double>
161 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
162 ret <2 x double> %shuf
165 define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) {
166 ; SSE-LABEL: sitofp_2i8_to_2f64:
168 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
169 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
170 ; SSE-NEXT: psrad $24, %xmm0
171 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
174 ; AVX-LABEL: sitofp_2i8_to_2f64:
176 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
177 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
179 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
180 %cvt = sitofp <2 x i8> %shuf to <2 x double>
181 ret <2 x double> %cvt
184 define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
185 ; SSE-LABEL: sitofp_16i8_to_2f64:
187 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
188 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
189 ; SSE-NEXT: psrad $24, %xmm0
190 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
193 ; AVX1-LABEL: sitofp_16i8_to_2f64:
195 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
196 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
197 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
198 ; AVX1-NEXT: vzeroupper
201 ; AVX2-LABEL: sitofp_16i8_to_2f64:
203 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
204 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
205 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
206 ; AVX2-NEXT: vzeroupper
209 ; AVX512-LABEL: sitofp_16i8_to_2f64:
211 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
212 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
213 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
215 %cvt = sitofp <16 x i8> %a to <16 x double>
216 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
217 ret <2 x double> %shuf
220 define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
221 ; SSE-LABEL: sitofp_4i64_to_4f64:
223 ; SSE-NEXT: movd %xmm0, %rax
224 ; SSE-NEXT: cvtsi2sdq %rax, %xmm2
225 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
226 ; SSE-NEXT: movd %xmm0, %rax
227 ; SSE-NEXT: xorps %xmm0, %xmm0
228 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
229 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
230 ; SSE-NEXT: movd %xmm1, %rax
231 ; SSE-NEXT: cvtsi2sdq %rax, %xmm3
232 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
233 ; SSE-NEXT: movd %xmm0, %rax
234 ; SSE-NEXT: xorps %xmm0, %xmm0
235 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
236 ; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
237 ; SSE-NEXT: movapd %xmm2, %xmm0
238 ; SSE-NEXT: movapd %xmm3, %xmm1
241 ; AVX1-LABEL: sitofp_4i64_to_4f64:
243 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
244 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
245 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
246 ; AVX1-NEXT: vmovq %xmm1, %rax
247 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
248 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
249 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
250 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
251 ; AVX1-NEXT: vmovq %xmm0, %rax
252 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
253 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
254 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
257 ; AVX2-LABEL: sitofp_4i64_to_4f64:
259 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
260 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
261 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
262 ; AVX2-NEXT: vmovq %xmm1, %rax
263 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
264 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
265 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
266 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
267 ; AVX2-NEXT: vmovq %xmm0, %rax
268 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
269 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
270 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
273 ; AVX512F-LABEL: sitofp_4i64_to_4f64:
275 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
276 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
277 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
278 ; AVX512F-NEXT: vmovq %xmm1, %rax
279 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
280 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
281 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
282 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
283 ; AVX512F-NEXT: vmovq %xmm0, %rax
284 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
285 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
286 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
289 ; AVX512VL-LABEL: sitofp_4i64_to_4f64:
291 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
292 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
293 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
294 ; AVX512VL-NEXT: vmovq %xmm1, %rax
295 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
296 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
297 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
298 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
299 ; AVX512VL-NEXT: vmovq %xmm0, %rax
300 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
301 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
302 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
303 ; AVX512VL-NEXT: retq
305 ; AVX512DQ-LABEL: sitofp_4i64_to_4f64:
307 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
308 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
309 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
310 ; AVX512DQ-NEXT: retq
312 ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f64:
313 ; AVX512VLDQ: # BB#0:
314 ; AVX512VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0
315 ; AVX512VLDQ-NEXT: retq
316 %cvt = sitofp <4 x i64> %a to <4 x double>
317 ret <4 x double> %cvt
320 define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) {
321 ; SSE-LABEL: sitofp_4i32_to_4f64:
323 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
324 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
325 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
326 ; SSE-NEXT: movaps %xmm2, %xmm0
329 ; AVX-LABEL: sitofp_4i32_to_4f64:
331 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
333 %cvt = sitofp <4 x i32> %a to <4 x double>
334 ret <4 x double> %cvt
337 define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) {
338 ; SSE-LABEL: sitofp_4i16_to_4f64:
340 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
341 ; SSE-NEXT: psrad $16, %xmm1
342 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
343 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
344 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
347 ; AVX-LABEL: sitofp_4i16_to_4f64:
349 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
350 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
352 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
353 %cvt = sitofp <4 x i16> %shuf to <4 x double>
354 ret <4 x double> %cvt
357 define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
358 ; SSE-LABEL: sitofp_8i16_to_4f64:
360 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
361 ; SSE-NEXT: psrad $16, %xmm1
362 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
363 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
364 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
367 ; AVX1-LABEL: sitofp_8i16_to_4f64:
369 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
370 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
373 ; AVX2-LABEL: sitofp_8i16_to_4f64:
375 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
376 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
379 ; AVX512-LABEL: sitofp_8i16_to_4f64:
381 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
382 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
383 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
385 %cvt = sitofp <8 x i16> %a to <8 x double>
386 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
387 ret <4 x double> %shuf
390 define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) {
391 ; SSE-LABEL: sitofp_4i8_to_4f64:
393 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
394 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
395 ; SSE-NEXT: psrad $24, %xmm1
396 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
397 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
398 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
401 ; AVX-LABEL: sitofp_4i8_to_4f64:
403 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
404 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
406 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
407 %cvt = sitofp <4 x i8> %shuf to <4 x double>
408 ret <4 x double> %cvt
411 define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
412 ; SSE-LABEL: sitofp_16i8_to_4f64:
414 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
415 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
416 ; SSE-NEXT: psrad $24, %xmm1
417 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
418 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
419 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
422 ; AVX1-LABEL: sitofp_16i8_to_4f64:
424 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
425 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
428 ; AVX2-LABEL: sitofp_16i8_to_4f64:
430 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
431 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
434 ; AVX512-LABEL: sitofp_16i8_to_4f64:
436 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
437 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
438 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
440 %cvt = sitofp <16 x i8> %a to <16 x double>
441 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
442 ret <4 x double> %shuf
446 ; Unsigned Integer to Double
449 define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
450 ; SSE-LABEL: uitofp_2i64_to_2f64:
452 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
453 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
454 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
455 ; SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
456 ; SSE-NEXT: subpd %xmm3, %xmm0
457 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
458 ; SSE-NEXT: addpd %xmm4, %xmm0
459 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
460 ; SSE-NEXT: subpd %xmm3, %xmm2
461 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
462 ; SSE-NEXT: addpd %xmm2, %xmm1
463 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
466 ; VEX-LABEL: uitofp_2i64_to_2f64:
468 ; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
469 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
470 ; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
471 ; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
472 ; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
473 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
474 ; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
475 ; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
478 ; AVX512F-LABEL: uitofp_2i64_to_2f64:
480 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
481 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
482 ; AVX512F-NEXT: vmovq %xmm0, %rax
483 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
484 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
487 ; AVX512VL-LABEL: uitofp_2i64_to_2f64:
489 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
490 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
491 ; AVX512VL-NEXT: vmovq %xmm0, %rax
492 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
493 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
494 ; AVX512VL-NEXT: retq
496 ; AVX512DQ-LABEL: uitofp_2i64_to_2f64:
498 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
499 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
500 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
501 ; AVX512DQ-NEXT: retq
503 ; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64:
504 ; AVX512VLDQ: # BB#0:
505 ; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0
506 ; AVX512VLDQ-NEXT: retq
507 %cvt = uitofp <2 x i64> %a to <2 x double>
508 ret <2 x double> %cvt
511 define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) {
512 ; SSE-LABEL: uitofp_2i32_to_2f64:
514 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
515 ; SSE-NEXT: pand %xmm0, %xmm1
516 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
517 ; SSE-NEXT: psrld $16, %xmm0
518 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
519 ; SSE-NEXT: mulpd {{.*}}(%rip), %xmm0
520 ; SSE-NEXT: addpd %xmm1, %xmm0
523 ; VEX-LABEL: uitofp_2i32_to_2f64:
525 ; VEX-NEXT: vpxor %xmm1, %xmm1, %xmm1
526 ; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
527 ; VEX-NEXT: vcvtdq2pd %xmm1, %xmm1
528 ; VEX-NEXT: vpsrld $16, %xmm0, %xmm0
529 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
530 ; VEX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
531 ; VEX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
534 ; AVX512F-LABEL: uitofp_2i32_to_2f64:
536 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
537 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
538 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
541 ; AVX512VL-LABEL: uitofp_2i32_to_2f64:
543 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
544 ; AVX512VL-NEXT: retq
546 ; AVX512DQ-LABEL: uitofp_2i32_to_2f64:
548 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
549 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
550 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
551 ; AVX512DQ-NEXT: retq
553 ; AVX512VLDQ-LABEL: uitofp_2i32_to_2f64:
554 ; AVX512VLDQ: # BB#0:
555 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
556 ; AVX512VLDQ-NEXT: retq
557 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
558 %cvt = uitofp <2 x i32> %shuf to <2 x double>
559 ret <2 x double> %cvt
562 define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
563 ; SSE-LABEL: uitofp_4i32_to_2f64:
565 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
566 ; SSE-NEXT: pand %xmm0, %xmm1
567 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
568 ; SSE-NEXT: psrld $16, %xmm0
569 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
570 ; SSE-NEXT: mulpd {{.*}}(%rip), %xmm0
571 ; SSE-NEXT: addpd %xmm1, %xmm0
574 ; AVX1-LABEL: uitofp_4i32_to_2f64:
576 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
577 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
578 ; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
579 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
580 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
581 ; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
582 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
583 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
584 ; AVX1-NEXT: vzeroupper
587 ; AVX2-LABEL: uitofp_4i32_to_2f64:
589 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
590 ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
591 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
592 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
593 ; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
594 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
595 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
596 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
597 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
598 ; AVX2-NEXT: vzeroupper
601 ; AVX512F-LABEL: uitofp_4i32_to_2f64:
603 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
604 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
605 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
608 ; AVX512VL-LABEL: uitofp_4i32_to_2f64:
610 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
611 ; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
612 ; AVX512VL-NEXT: retq
614 ; AVX512DQ-LABEL: uitofp_4i32_to_2f64:
616 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
617 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
618 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
619 ; AVX512DQ-NEXT: retq
621 ; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64:
622 ; AVX512VLDQ: # BB#0:
623 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0
624 ; AVX512VLDQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
625 ; AVX512VLDQ-NEXT: retq
626 %cvt = uitofp <4 x i32> %a to <4 x double>
627 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
628 ret <2 x double> %shuf
631 define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) {
632 ; SSE-LABEL: uitofp_2i16_to_2f64:
634 ; SSE-NEXT: pxor %xmm1, %xmm1
635 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
636 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
639 ; AVX-LABEL: uitofp_2i16_to_2f64:
641 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
642 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
644 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
645 %cvt = uitofp <2 x i16> %shuf to <2 x double>
646 ret <2 x double> %cvt
649 define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
650 ; SSE-LABEL: uitofp_8i16_to_2f64:
652 ; SSE-NEXT: pxor %xmm1, %xmm1
653 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
654 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
657 ; AVX1-LABEL: uitofp_8i16_to_2f64:
659 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
660 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
661 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
662 ; AVX1-NEXT: vzeroupper
665 ; AVX2-LABEL: uitofp_8i16_to_2f64:
667 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
668 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
669 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
670 ; AVX2-NEXT: vzeroupper
673 ; AVX512-LABEL: uitofp_8i16_to_2f64:
675 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
676 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
677 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
679 %cvt = uitofp <8 x i16> %a to <8 x double>
680 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
681 ret <2 x double> %shuf
684 define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) {
685 ; SSE-LABEL: uitofp_2i8_to_2f64:
687 ; SSE-NEXT: pxor %xmm1, %xmm1
688 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
689 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
690 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
693 ; AVX-LABEL: uitofp_2i8_to_2f64:
695 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
696 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
698 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
699 %cvt = uitofp <2 x i8> %shuf to <2 x double>
700 ret <2 x double> %cvt
703 define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
704 ; SSE-LABEL: uitofp_16i8_to_2f64:
706 ; SSE-NEXT: pxor %xmm1, %xmm1
707 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
708 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
709 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
712 ; AVX1-LABEL: uitofp_16i8_to_2f64:
714 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
715 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
716 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
717 ; AVX1-NEXT: vzeroupper
720 ; AVX2-LABEL: uitofp_16i8_to_2f64:
722 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
723 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
724 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
725 ; AVX2-NEXT: vzeroupper
728 ; AVX512-LABEL: uitofp_16i8_to_2f64:
730 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
731 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
732 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
734 %cvt = uitofp <16 x i8> %a to <16 x double>
735 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
736 ret <2 x double> %shuf
739 define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
740 ; SSE-LABEL: uitofp_4i64_to_4f64:
742 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
743 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
744 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
745 ; SSE-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
746 ; SSE-NEXT: subpd %xmm4, %xmm0
747 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
748 ; SSE-NEXT: addpd %xmm5, %xmm0
749 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
750 ; SSE-NEXT: subpd %xmm4, %xmm3
751 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
752 ; SSE-NEXT: addpd %xmm3, %xmm5
753 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
754 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
755 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
756 ; SSE-NEXT: subpd %xmm4, %xmm1
757 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
758 ; SSE-NEXT: addpd %xmm5, %xmm1
759 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
760 ; SSE-NEXT: subpd %xmm4, %xmm3
761 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
762 ; SSE-NEXT: addpd %xmm3, %xmm2
763 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
766 ; AVX1-LABEL: uitofp_4i64_to_4f64:
768 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
769 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
770 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
771 ; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
772 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
773 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
774 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
775 ; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
776 ; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
777 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
778 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
779 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
780 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
781 ; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
782 ; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
783 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
786 ; AVX2-LABEL: uitofp_4i64_to_4f64:
788 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
789 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
790 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
791 ; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
792 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
793 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
794 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
795 ; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
796 ; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
797 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
798 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
799 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
800 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
801 ; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
802 ; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
803 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
806 ; AVX512F-LABEL: uitofp_4i64_to_4f64:
808 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
809 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
810 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
811 ; AVX512F-NEXT: vmovq %xmm1, %rax
812 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
813 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
814 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
815 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
816 ; AVX512F-NEXT: vmovq %xmm0, %rax
817 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
818 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
819 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
822 ; AVX512VL-LABEL: uitofp_4i64_to_4f64:
824 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
825 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
826 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
827 ; AVX512VL-NEXT: vmovq %xmm1, %rax
828 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
829 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
830 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
831 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
832 ; AVX512VL-NEXT: vmovq %xmm0, %rax
833 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
834 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
835 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
836 ; AVX512VL-NEXT: retq
838 ; AVX512DQ-LABEL: uitofp_4i64_to_4f64:
840 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
841 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
842 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
843 ; AVX512DQ-NEXT: retq
845 ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f64:
846 ; AVX512VLDQ: # BB#0:
847 ; AVX512VLDQ-NEXT: vcvtuqq2pd %ymm0, %ymm0
848 ; AVX512VLDQ-NEXT: retq
849 %cvt = uitofp <4 x i64> %a to <4 x double>
850 ret <4 x double> %cvt
853 define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
854 ; SSE-LABEL: uitofp_4i32_to_4f64:
856 ; SSE-NEXT: movdqa %xmm0, %xmm1
857 ; SSE-NEXT: psrld $16, %xmm1
858 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
859 ; SSE-NEXT: movapd {{.*#+}} xmm2 = [6.553600e+04,6.553600e+04]
860 ; SSE-NEXT: mulpd %xmm2, %xmm1
861 ; SSE-NEXT: movdqa {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
862 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
863 ; SSE-NEXT: pand %xmm3, %xmm0
864 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
865 ; SSE-NEXT: addpd %xmm1, %xmm0
866 ; SSE-NEXT: movdqa %xmm4, %xmm1
867 ; SSE-NEXT: psrld $16, %xmm1
868 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm5
869 ; SSE-NEXT: mulpd %xmm2, %xmm5
870 ; SSE-NEXT: pand %xmm3, %xmm4
871 ; SSE-NEXT: cvtdq2pd %xmm4, %xmm1
872 ; SSE-NEXT: addpd %xmm5, %xmm1
875 ; AVX1-LABEL: uitofp_4i32_to_4f64:
877 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
878 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
879 ; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
880 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
881 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
882 ; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
883 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
886 ; AVX2-LABEL: uitofp_4i32_to_4f64:
888 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
889 ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
890 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
891 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
892 ; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
893 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
894 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
895 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
898 ; AVX512F-LABEL: uitofp_4i32_to_4f64:
900 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
901 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
902 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
905 ; AVX512VL-LABEL: uitofp_4i32_to_4f64:
907 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0
908 ; AVX512VL-NEXT: retq
910 ; AVX512DQ-LABEL: uitofp_4i32_to_4f64:
912 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
913 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
914 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
915 ; AVX512DQ-NEXT: retq
917 ; AVX512VLDQ-LABEL: uitofp_4i32_to_4f64:
918 ; AVX512VLDQ: # BB#0:
919 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0
920 ; AVX512VLDQ-NEXT: retq
921 %cvt = uitofp <4 x i32> %a to <4 x double>
922 ret <4 x double> %cvt
925 define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) {
926 ; SSE-LABEL: uitofp_4i16_to_4f64:
928 ; SSE-NEXT: pxor %xmm1, %xmm1
929 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
930 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
931 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
932 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
933 ; SSE-NEXT: movaps %xmm2, %xmm0
936 ; AVX-LABEL: uitofp_4i16_to_4f64:
938 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
939 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
941 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
942 %cvt = uitofp <4 x i16> %shuf to <4 x double>
943 ret <4 x double> %cvt
946 define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
947 ; SSE-LABEL: uitofp_8i16_to_4f64:
949 ; SSE-NEXT: pxor %xmm1, %xmm1
950 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
951 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
952 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
953 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
954 ; SSE-NEXT: movaps %xmm2, %xmm0
957 ; AVX1-LABEL: uitofp_8i16_to_4f64:
959 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
960 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
963 ; AVX2-LABEL: uitofp_8i16_to_4f64:
965 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
966 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
969 ; AVX512-LABEL: uitofp_8i16_to_4f64:
971 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
972 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
973 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
975 %cvt = uitofp <8 x i16> %a to <8 x double>
976 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
977 ret <4 x double> %shuf
980 define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) {
981 ; SSE-LABEL: uitofp_4i8_to_4f64:
983 ; SSE-NEXT: pxor %xmm1, %xmm1
984 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
985 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
986 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
987 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
988 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
989 ; SSE-NEXT: movaps %xmm2, %xmm0
992 ; AVX-LABEL: uitofp_4i8_to_4f64:
994 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
995 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
997 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
998 %cvt = uitofp <4 x i8> %shuf to <4 x double>
999 ret <4 x double> %cvt
1002 define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
1003 ; SSE-LABEL: uitofp_16i8_to_4f64:
1005 ; SSE-NEXT: pxor %xmm1, %xmm1
1006 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1007 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1008 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
1009 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1010 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
1011 ; SSE-NEXT: movaps %xmm2, %xmm0
1014 ; AVX1-LABEL: uitofp_16i8_to_4f64:
1016 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1017 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
1020 ; AVX2-LABEL: uitofp_16i8_to_4f64:
1022 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1023 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
1026 ; AVX512-LABEL: uitofp_16i8_to_4f64:
1028 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1029 ; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0
1030 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1032 %cvt = uitofp <16 x i8> %a to <16 x double>
1033 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1034 ret <4 x double> %shuf
1038 ; Signed Integer to Float
1041 define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
1042 ; SSE-LABEL: sitofp_2i64_to_4f32:
1044 ; SSE-NEXT: movd %xmm0, %rax
1045 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1046 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1047 ; SSE-NEXT: movd %xmm0, %rax
1048 ; SSE-NEXT: xorps %xmm0, %xmm0
1049 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1050 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1051 ; SSE-NEXT: movaps %xmm1, %xmm0
1054 ; VEX-LABEL: sitofp_2i64_to_4f32:
1056 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1057 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1058 ; VEX-NEXT: vmovq %xmm0, %rax
1059 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1060 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1061 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1062 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1065 ; AVX512F-LABEL: sitofp_2i64_to_4f32:
1067 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1068 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1069 ; AVX512F-NEXT: vmovq %xmm0, %rax
1070 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1071 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1072 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1073 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1074 ; AVX512F-NEXT: retq
1076 ; AVX512VL-LABEL: sitofp_2i64_to_4f32:
1078 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1079 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1080 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1081 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1082 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1083 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1084 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1085 ; AVX512VL-NEXT: retq
1087 ; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
1089 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1090 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1091 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1092 ; AVX512DQ-NEXT: retq
1094 ; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32:
1095 ; AVX512VLDQ: # BB#0:
1096 ; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
1097 ; AVX512VLDQ-NEXT: retq
1098 %cvt = sitofp <2 x i64> %a to <2 x float>
1099 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1100 ret <4 x float> %ext
1103 define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
1104 ; SSE-LABEL: sitofp_2i64_to_4f32_zero:
1106 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1107 ; SSE-NEXT: movd %xmm1, %rax
1108 ; SSE-NEXT: xorps %xmm1, %xmm1
1109 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1110 ; SSE-NEXT: movd %xmm0, %rax
1111 ; SSE-NEXT: xorps %xmm0, %xmm0
1112 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1113 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1114 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1117 ; VEX-LABEL: sitofp_2i64_to_4f32_zero:
1119 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1120 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1121 ; VEX-NEXT: vmovq %xmm0, %rax
1122 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1123 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1126 ; AVX512F-LABEL: sitofp_2i64_to_4f32_zero:
1128 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1129 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1130 ; AVX512F-NEXT: vmovq %xmm0, %rax
1131 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1132 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1133 ; AVX512F-NEXT: retq
1135 ; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero:
1137 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1138 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1139 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1140 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1141 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1142 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1143 ; AVX512VL-NEXT: retq
1145 ; AVX512DQ-LABEL: sitofp_2i64_to_4f32_zero:
1147 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1148 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1149 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1150 ; AVX512DQ-NEXT: retq
1152 ; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32_zero:
1153 ; AVX512VLDQ: # BB#0:
1154 ; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
1155 ; AVX512VLDQ-NEXT: retq
1156 %cvt = sitofp <2 x i64> %a to <2 x float>
1157 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1158 ret <4 x float> %ext
1161 define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
1162 ; SSE-LABEL: sitofp_4i64_to_4f32_undef:
1164 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
1165 ; SSE-NEXT: movd %xmm0, %rax
1166 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1167 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1168 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1169 ; SSE-NEXT: movd %xmm0, %rax
1170 ; SSE-NEXT: xorps %xmm0, %xmm0
1171 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1172 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1173 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1174 ; SSE-NEXT: movaps %xmm1, %xmm0
1177 ; VEX-LABEL: sitofp_4i64_to_4f32_undef:
1179 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1180 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1181 ; VEX-NEXT: vmovq %xmm0, %rax
1182 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1183 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1184 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1185 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1188 ; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
1190 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1191 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1192 ; AVX512F-NEXT: vmovq %xmm0, %rax
1193 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1194 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1195 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1196 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1197 ; AVX512F-NEXT: retq
1199 ; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
1201 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1202 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1203 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1204 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1205 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1206 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1207 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1208 ; AVX512VL-NEXT: retq
1210 ; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
1212 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1213 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1214 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1215 ; AVX512DQ-NEXT: retq
1217 ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32_undef:
1218 ; AVX512VLDQ: # BB#0:
1219 ; AVX512VLDQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
1220 ; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
1221 ; AVX512VLDQ-NEXT: retq
1222 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1223 %cvt = sitofp <4 x i64> %ext to <4 x float>
1224 ret <4 x float> %cvt
1227 define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) {
1228 ; SSE-LABEL: sitofp_4i32_to_4f32:
1230 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1233 ; AVX-LABEL: sitofp_4i32_to_4f32:
1235 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1237 %cvt = sitofp <4 x i32> %a to <4 x float>
1238 ret <4 x float> %cvt
1241 define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) {
1242 ; SSE-LABEL: sitofp_4i16_to_4f32:
1244 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1245 ; SSE-NEXT: psrad $16, %xmm0
1246 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1249 ; AVX-LABEL: sitofp_4i16_to_4f32:
1251 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
1252 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1254 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1255 %cvt = sitofp <4 x i16> %shuf to <4 x float>
1256 ret <4 x float> %cvt
1259 define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
1260 ; SSE-LABEL: sitofp_8i16_to_4f32:
1262 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1263 ; SSE-NEXT: psrad $16, %xmm0
1264 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1267 ; AVX1-LABEL: sitofp_8i16_to_4f32:
1269 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
1270 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1271 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
1272 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1273 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1274 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1275 ; AVX1-NEXT: vzeroupper
1278 ; AVX2-LABEL: sitofp_8i16_to_4f32:
1280 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
1281 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1282 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1283 ; AVX2-NEXT: vzeroupper
1286 ; AVX512-LABEL: sitofp_8i16_to_4f32:
1288 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
1289 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
1290 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1292 %cvt = sitofp <8 x i16> %a to <8 x float>
1293 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1294 ret <4 x float> %shuf
1297 define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) {
1298 ; SSE-LABEL: sitofp_4i8_to_4f32:
1300 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1301 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1302 ; SSE-NEXT: psrad $24, %xmm0
1303 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1306 ; AVX-LABEL: sitofp_4i8_to_4f32:
1308 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
1309 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
1311 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1312 %cvt = sitofp <4 x i8> %shuf to <4 x float>
1313 ret <4 x float> %cvt
1316 define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
1317 ; SSE-LABEL: sitofp_16i8_to_4f32:
1319 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1320 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1321 ; SSE-NEXT: psrad $24, %xmm0
1322 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1325 ; AVX1-LABEL: sitofp_16i8_to_4f32:
1327 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
1328 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1329 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
1330 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1331 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1332 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1333 ; AVX1-NEXT: vzeroupper
1336 ; AVX2-LABEL: sitofp_16i8_to_4f32:
1338 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
1339 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1340 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1341 ; AVX2-NEXT: vzeroupper
1344 ; AVX512-LABEL: sitofp_16i8_to_4f32:
1346 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
1347 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
1348 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
1350 %cvt = sitofp <16 x i8> %a to <16 x float>
1351 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1352 ret <4 x float> %shuf
1355 define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
1356 ; SSE-LABEL: sitofp_4i64_to_4f32:
1358 ; SSE-NEXT: movd %xmm1, %rax
1359 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
1360 ; SSE-NEXT: movd %xmm0, %rax
1361 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
1362 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1363 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1364 ; SSE-NEXT: movd %xmm1, %rax
1365 ; SSE-NEXT: xorps %xmm1, %xmm1
1366 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1367 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1368 ; SSE-NEXT: movd %xmm0, %rax
1369 ; SSE-NEXT: xorps %xmm0, %xmm0
1370 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1371 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1372 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
1373 ; SSE-NEXT: movaps %xmm2, %xmm0
1376 ; AVX1-LABEL: sitofp_4i64_to_4f32:
1378 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
1379 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1380 ; AVX1-NEXT: vmovq %xmm0, %rax
1381 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
1382 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1383 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1384 ; AVX1-NEXT: vmovq %xmm0, %rax
1385 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
1386 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1387 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
1388 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
1389 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1390 ; AVX1-NEXT: vzeroupper
1393 ; AVX2-LABEL: sitofp_4i64_to_4f32:
1395 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
1396 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1397 ; AVX2-NEXT: vmovq %xmm0, %rax
1398 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
1399 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1400 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
1401 ; AVX2-NEXT: vmovq %xmm0, %rax
1402 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
1403 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1404 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
1405 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
1406 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1407 ; AVX2-NEXT: vzeroupper
1410 ; AVX512F-LABEL: sitofp_4i64_to_4f32:
1412 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1413 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1414 ; AVX512F-NEXT: vmovq %xmm0, %rax
1415 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
1416 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1417 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
1418 ; AVX512F-NEXT: vmovq %xmm0, %rax
1419 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
1420 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1421 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1422 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
1423 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1424 ; AVX512F-NEXT: retq
1426 ; AVX512VL-LABEL: sitofp_4i64_to_4f32:
1428 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1429 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1430 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1431 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
1432 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
1433 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
1434 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1435 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
1436 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
1437 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1438 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
1439 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1440 ; AVX512VL-NEXT: retq
1442 ; AVX512DQ-LABEL: sitofp_4i64_to_4f32:
1444 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1445 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
1446 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1447 ; AVX512DQ-NEXT: retq
1449 ; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32:
1450 ; AVX512VLDQ: # BB#0:
1451 ; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
1452 ; AVX512VLDQ-NEXT: retq
1453 %cvt = sitofp <4 x i64> %a to <4 x float>
1454 ret <4 x float> %cvt
1457 define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) {
1458 ; SSE-LABEL: sitofp_8i32_to_8f32:
1460 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1461 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
1464 ; AVX-LABEL: sitofp_8i32_to_8f32:
1466 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
1468 %cvt = sitofp <8 x i32> %a to <8 x float>
1469 ret <8 x float> %cvt
1472 define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
1473 ; SSE-LABEL: sitofp_8i16_to_8f32:
1475 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1476 ; SSE-NEXT: psrad $16, %xmm1
1477 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm2
1478 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
1479 ; SSE-NEXT: psrad $16, %xmm0
1480 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
1481 ; SSE-NEXT: movaps %xmm2, %xmm0
1484 ; AVX1-LABEL: sitofp_8i16_to_8f32:
1486 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
1487 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1488 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
1489 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1490 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1493 ; AVX2-LABEL: sitofp_8i16_to_8f32:
1495 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
1496 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1499 ; AVX512-LABEL: sitofp_8i16_to_8f32:
1501 ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
1502 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
1504 %cvt = sitofp <8 x i16> %a to <8 x float>
1505 ret <8 x float> %cvt
1508 define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
1509 ; SSE-LABEL: sitofp_8i8_to_8f32:
1511 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1512 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
1513 ; SSE-NEXT: psrad $24, %xmm1
1514 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm2
1515 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1516 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1517 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1518 ; SSE-NEXT: psrad $24, %xmm0
1519 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
1520 ; SSE-NEXT: movaps %xmm2, %xmm0
1523 ; AVX1-LABEL: sitofp_8i8_to_8f32:
1525 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
1526 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1527 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
1528 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1529 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1532 ; AVX2-LABEL: sitofp_8i8_to_8f32:
1534 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
1535 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1538 ; AVX512-LABEL: sitofp_8i8_to_8f32:
1540 ; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0
1541 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
1543 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1544 %cvt = sitofp <8 x i8> %shuf to <8 x float>
1545 ret <8 x float> %cvt
1548 define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
1549 ; SSE-LABEL: sitofp_16i8_to_8f32:
1551 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1552 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
1553 ; SSE-NEXT: psrad $24, %xmm1
1554 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm2
1555 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1556 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1557 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1558 ; SSE-NEXT: psrad $24, %xmm0
1559 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
1560 ; SSE-NEXT: movaps %xmm2, %xmm0
1563 ; AVX1-LABEL: sitofp_16i8_to_8f32:
1565 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
1566 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1567 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
1568 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1569 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
1572 ; AVX2-LABEL: sitofp_16i8_to_8f32:
1574 ; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
1575 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
1578 ; AVX512-LABEL: sitofp_16i8_to_8f32:
1580 ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
1581 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
1582 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1584 %cvt = sitofp <16 x i8> %a to <16 x float>
1585 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1586 ret <8 x float> %shuf
1590 ; Unsigned Integer to Float
1593 define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
1594 ; SSE-LABEL: uitofp_2i64_to_4f32:
1596 ; SSE-NEXT: movdqa %xmm0, %xmm1
1597 ; SSE-NEXT: movd %xmm1, %rax
1598 ; SSE-NEXT: testq %rax, %rax
1599 ; SSE-NEXT: js .LBB39_1
1601 ; SSE-NEXT: xorps %xmm0, %xmm0
1602 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1603 ; SSE-NEXT: jmp .LBB39_3
1604 ; SSE-NEXT: .LBB39_1:
1605 ; SSE-NEXT: movq %rax, %rcx
1606 ; SSE-NEXT: shrq %rcx
1607 ; SSE-NEXT: andl $1, %eax
1608 ; SSE-NEXT: orq %rcx, %rax
1609 ; SSE-NEXT: xorps %xmm0, %xmm0
1610 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1611 ; SSE-NEXT: addss %xmm0, %xmm0
1612 ; SSE-NEXT: .LBB39_3:
1613 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1614 ; SSE-NEXT: movd %xmm1, %rax
1615 ; SSE-NEXT: testq %rax, %rax
1616 ; SSE-NEXT: js .LBB39_4
1618 ; SSE-NEXT: xorps %xmm1, %xmm1
1619 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1620 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1622 ; SSE-NEXT: .LBB39_4:
1623 ; SSE-NEXT: movq %rax, %rcx
1624 ; SSE-NEXT: shrq %rcx
1625 ; SSE-NEXT: andl $1, %eax
1626 ; SSE-NEXT: orq %rcx, %rax
1627 ; SSE-NEXT: xorps %xmm1, %xmm1
1628 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1629 ; SSE-NEXT: addss %xmm1, %xmm1
1630 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1633 ; VEX-LABEL: uitofp_2i64_to_4f32:
1635 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1636 ; VEX-NEXT: testq %rax, %rax
1637 ; VEX-NEXT: js .LBB39_1
1639 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1640 ; VEX-NEXT: jmp .LBB39_3
1641 ; VEX-NEXT: .LBB39_1:
1642 ; VEX-NEXT: movq %rax, %rcx
1643 ; VEX-NEXT: shrq %rcx
1644 ; VEX-NEXT: andl $1, %eax
1645 ; VEX-NEXT: orq %rcx, %rax
1646 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1647 ; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
1648 ; VEX-NEXT: .LBB39_3:
1649 ; VEX-NEXT: vmovq %xmm0, %rax
1650 ; VEX-NEXT: testq %rax, %rax
1651 ; VEX-NEXT: js .LBB39_4
1653 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1654 ; VEX-NEXT: jmp .LBB39_6
1655 ; VEX-NEXT: .LBB39_4:
1656 ; VEX-NEXT: movq %rax, %rcx
1657 ; VEX-NEXT: shrq %rcx
1658 ; VEX-NEXT: andl $1, %eax
1659 ; VEX-NEXT: orq %rcx, %rax
1660 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1661 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
1662 ; VEX-NEXT: .LBB39_6:
1663 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1664 ; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1665 ; VEX-NEXT: testq %rax, %rax
1666 ; VEX-NEXT: js .LBB39_8
1668 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1669 ; VEX-NEXT: .LBB39_8:
1670 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1673 ; AVX512F-LABEL: uitofp_2i64_to_4f32:
1675 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1676 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1677 ; AVX512F-NEXT: vmovq %xmm0, %rax
1678 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1679 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1680 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1
1681 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1682 ; AVX512F-NEXT: retq
1684 ; AVX512VL-LABEL: uitofp_2i64_to_4f32:
1686 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1687 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1688 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1689 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1690 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1691 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1
1692 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1693 ; AVX512VL-NEXT: retq
1695 ; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
1697 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1698 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
1699 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1700 ; AVX512DQ-NEXT: retq
1702 ; AVX512VLDQ-LABEL: uitofp_2i64_to_4f32:
1703 ; AVX512VLDQ: # BB#0:
1704 ; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0
1705 ; AVX512VLDQ-NEXT: retq
1706 %cvt = uitofp <2 x i64> %a to <2 x float>
1707 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1708 ret <4 x float> %ext
1711 define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
1712 ; SSE-LABEL: uitofp_2i64_to_2f32:
1714 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1715 ; SSE-NEXT: movd %xmm1, %rax
1716 ; SSE-NEXT: testq %rax, %rax
1717 ; SSE-NEXT: js .LBB40_1
1719 ; SSE-NEXT: xorps %xmm1, %xmm1
1720 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1721 ; SSE-NEXT: jmp .LBB40_3
1722 ; SSE-NEXT: .LBB40_1:
1723 ; SSE-NEXT: movq %rax, %rcx
1724 ; SSE-NEXT: shrq %rcx
1725 ; SSE-NEXT: andl $1, %eax
1726 ; SSE-NEXT: orq %rcx, %rax
1727 ; SSE-NEXT: xorps %xmm1, %xmm1
1728 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1729 ; SSE-NEXT: addss %xmm1, %xmm1
1730 ; SSE-NEXT: .LBB40_3:
1731 ; SSE-NEXT: movd %xmm0, %rax
1732 ; SSE-NEXT: testq %rax, %rax
1733 ; SSE-NEXT: js .LBB40_4
1735 ; SSE-NEXT: xorps %xmm0, %xmm0
1736 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1737 ; SSE-NEXT: jmp .LBB40_6
1738 ; SSE-NEXT: .LBB40_4:
1739 ; SSE-NEXT: movq %rax, %rcx
1740 ; SSE-NEXT: shrq %rcx
1741 ; SSE-NEXT: andl $1, %eax
1742 ; SSE-NEXT: orq %rcx, %rax
1743 ; SSE-NEXT: xorps %xmm0, %xmm0
1744 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1745 ; SSE-NEXT: addss %xmm0, %xmm0
1746 ; SSE-NEXT: .LBB40_6:
1747 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1748 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1751 ; VEX-LABEL: uitofp_2i64_to_2f32:
1753 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1754 ; VEX-NEXT: testq %rax, %rax
1755 ; VEX-NEXT: js .LBB40_1
1757 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1758 ; VEX-NEXT: jmp .LBB40_3
1759 ; VEX-NEXT: .LBB40_1:
1760 ; VEX-NEXT: movq %rax, %rcx
1761 ; VEX-NEXT: shrq %rcx
1762 ; VEX-NEXT: andl $1, %eax
1763 ; VEX-NEXT: orq %rcx, %rax
1764 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1765 ; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
1766 ; VEX-NEXT: .LBB40_3:
1767 ; VEX-NEXT: vmovq %xmm0, %rax
1768 ; VEX-NEXT: testq %rax, %rax
1769 ; VEX-NEXT: js .LBB40_4
1771 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1772 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1774 ; VEX-NEXT: .LBB40_4:
1775 ; VEX-NEXT: movq %rax, %rcx
1776 ; VEX-NEXT: shrq %rcx
1777 ; VEX-NEXT: andl $1, %eax
1778 ; VEX-NEXT: orq %rcx, %rax
1779 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1780 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
1781 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1784 ; AVX512F-LABEL: uitofp_2i64_to_2f32:
1786 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1787 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1788 ; AVX512F-NEXT: vmovq %xmm0, %rax
1789 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1790 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
1791 ; AVX512F-NEXT: retq
1793 ; AVX512VL-LABEL: uitofp_2i64_to_2f32:
1795 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1796 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1797 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1798 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1799 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1800 ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1801 ; AVX512VL-NEXT: retq
1803 ; AVX512DQ-LABEL: uitofp_2i64_to_2f32:
1805 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1806 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
1807 ; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1808 ; AVX512DQ-NEXT: retq
1810 ; AVX512VLDQ-LABEL: uitofp_2i64_to_2f32:
1811 ; AVX512VLDQ: # BB#0:
1812 ; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0
1813 ; AVX512VLDQ-NEXT: retq
1814 %cvt = uitofp <2 x i64> %a to <2 x float>
1815 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1816 ret <4 x float> %ext
1819 define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
1820 ; SSE-LABEL: uitofp_4i64_to_4f32_undef:
1822 ; SSE-NEXT: movdqa %xmm0, %xmm1
1823 ; SSE-NEXT: testq %rax, %rax
1824 ; SSE-NEXT: xorps %xmm2, %xmm2
1825 ; SSE-NEXT: js .LBB41_2
1827 ; SSE-NEXT: xorps %xmm2, %xmm2
1828 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
1829 ; SSE-NEXT: .LBB41_2:
1830 ; SSE-NEXT: movd %xmm1, %rax
1831 ; SSE-NEXT: testq %rax, %rax
1832 ; SSE-NEXT: js .LBB41_3
1834 ; SSE-NEXT: xorps %xmm0, %xmm0
1835 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1836 ; SSE-NEXT: jmp .LBB41_5
1837 ; SSE-NEXT: .LBB41_3:
1838 ; SSE-NEXT: movq %rax, %rcx
1839 ; SSE-NEXT: shrq %rcx
1840 ; SSE-NEXT: andl $1, %eax
1841 ; SSE-NEXT: orq %rcx, %rax
1842 ; SSE-NEXT: xorps %xmm0, %xmm0
1843 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
1844 ; SSE-NEXT: addss %xmm0, %xmm0
1845 ; SSE-NEXT: .LBB41_5:
1846 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1847 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1848 ; SSE-NEXT: movd %xmm1, %rax
1849 ; SSE-NEXT: testq %rax, %rax
1850 ; SSE-NEXT: js .LBB41_6
1852 ; SSE-NEXT: xorps %xmm1, %xmm1
1853 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1854 ; SSE-NEXT: jmp .LBB41_8
1855 ; SSE-NEXT: .LBB41_6:
1856 ; SSE-NEXT: movq %rax, %rcx
1857 ; SSE-NEXT: shrq %rcx
1858 ; SSE-NEXT: andl $1, %eax
1859 ; SSE-NEXT: orq %rcx, %rax
1860 ; SSE-NEXT: xorps %xmm1, %xmm1
1861 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
1862 ; SSE-NEXT: addss %xmm1, %xmm1
1863 ; SSE-NEXT: .LBB41_8:
1864 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1865 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1868 ; VEX-LABEL: uitofp_4i64_to_4f32_undef:
1870 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
1871 ; VEX-NEXT: testq %rax, %rax
1872 ; VEX-NEXT: js .LBB41_1
1874 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1875 ; VEX-NEXT: jmp .LBB41_3
1876 ; VEX-NEXT: .LBB41_1:
1877 ; VEX-NEXT: movq %rax, %rcx
1878 ; VEX-NEXT: shrq %rcx
1879 ; VEX-NEXT: andl $1, %eax
1880 ; VEX-NEXT: orq %rcx, %rax
1881 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
1882 ; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1
1883 ; VEX-NEXT: .LBB41_3:
1884 ; VEX-NEXT: vmovq %xmm0, %rax
1885 ; VEX-NEXT: testq %rax, %rax
1886 ; VEX-NEXT: js .LBB41_4
1888 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1889 ; VEX-NEXT: jmp .LBB41_6
1890 ; VEX-NEXT: .LBB41_4:
1891 ; VEX-NEXT: movq %rax, %rcx
1892 ; VEX-NEXT: shrq %rcx
1893 ; VEX-NEXT: andl $1, %eax
1894 ; VEX-NEXT: orq %rcx, %rax
1895 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
1896 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
1897 ; VEX-NEXT: .LBB41_6:
1898 ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1899 ; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1900 ; VEX-NEXT: testq %rax, %rax
1901 ; VEX-NEXT: js .LBB41_8
1903 ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
1904 ; VEX-NEXT: .LBB41_8:
1905 ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1908 ; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
1910 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
1911 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1912 ; AVX512F-NEXT: vmovq %xmm0, %rax
1913 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1914 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1915 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1
1916 ; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1917 ; AVX512F-NEXT: retq
1919 ; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
1921 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
1922 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
1923 ; AVX512VL-NEXT: vmovq %xmm0, %rax
1924 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0
1925 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1926 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm1
1927 ; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1928 ; AVX512VL-NEXT: retq
1930 ; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:
1932 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1933 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
1934 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
1935 ; AVX512DQ-NEXT: retq
1937 ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32_undef:
1938 ; AVX512VLDQ: # BB#0:
1939 ; AVX512VLDQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
1940 ; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
1941 ; AVX512VLDQ-NEXT: retq
1942 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1943 %cvt = uitofp <4 x i64> %ext to <4 x float>
1944 ret <4 x float> %cvt
1947 define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) {
1948 ; SSE-LABEL: uitofp_4i32_to_4f32:
1950 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
1951 ; SSE-NEXT: pand %xmm0, %xmm1
1952 ; SSE-NEXT: por {{.*}}(%rip), %xmm1
1953 ; SSE-NEXT: psrld $16, %xmm0
1954 ; SSE-NEXT: por {{.*}}(%rip), %xmm0
1955 ; SSE-NEXT: addps {{.*}}(%rip), %xmm0
1956 ; SSE-NEXT: addps %xmm1, %xmm0
1959 ; AVX1-LABEL: uitofp_4i32_to_4f32:
1961 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
1962 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
1963 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
1964 ; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1965 ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
1968 ; AVX2-LABEL: uitofp_4i32_to_4f32:
1970 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
1971 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1972 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
1973 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
1974 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1975 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
1976 ; AVX2-NEXT: vaddps %xmm2, %xmm0, %xmm0
1977 ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
1980 ; AVX512F-LABEL: uitofp_4i32_to_4f32:
1982 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1983 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
1984 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
1985 ; AVX512F-NEXT: retq
1987 ; AVX512VL-LABEL: uitofp_4i32_to_4f32:
1989 ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
1990 ; AVX512VL-NEXT: retq
1992 ; AVX512DQ-LABEL: uitofp_4i32_to_4f32:
1994 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
1995 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
1996 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
1997 ; AVX512DQ-NEXT: retq
1999 ; AVX512VLDQ-LABEL: uitofp_4i32_to_4f32:
2000 ; AVX512VLDQ: # BB#0:
2001 ; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0
2002 ; AVX512VLDQ-NEXT: retq
2003 %cvt = uitofp <4 x i32> %a to <4 x float>
2004 ret <4 x float> %cvt
2007 define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) {
2008 ; SSE-LABEL: uitofp_4i16_to_4f32:
2010 ; SSE-NEXT: pxor %xmm1, %xmm1
2011 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2012 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
2015 ; AVX-LABEL: uitofp_4i16_to_4f32:
2017 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2018 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2020 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2021 %cvt = uitofp <4 x i16> %shuf to <4 x float>
2022 ret <4 x float> %cvt
2025 define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
2026 ; SSE-LABEL: uitofp_8i16_to_4f32:
2028 ; SSE-NEXT: pxor %xmm1, %xmm1
2029 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2030 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
2033 ; AVX1-LABEL: uitofp_8i16_to_4f32:
2035 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2036 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2037 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2038 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2039 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2040 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2041 ; AVX1-NEXT: vzeroupper
2044 ; AVX2-LABEL: uitofp_8i16_to_4f32:
2046 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2047 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2048 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2049 ; AVX2-NEXT: vzeroupper
2052 ; AVX512-LABEL: uitofp_8i16_to_4f32:
2054 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2055 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
2056 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2058 %cvt = uitofp <8 x i16> %a to <8 x float>
2059 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2060 ret <4 x float> %shuf
2063 define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) {
2064 ; SSE-LABEL: uitofp_4i8_to_4f32:
2066 ; SSE-NEXT: pxor %xmm1, %xmm1
2067 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2068 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2069 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
2072 ; AVX-LABEL: uitofp_4i8_to_4f32:
2074 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2075 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
2077 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2078 %cvt = uitofp <4 x i8> %shuf to <4 x float>
2079 ret <4 x float> %cvt
2082 define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
2083 ; SSE-LABEL: uitofp_16i8_to_4f32:
2085 ; SSE-NEXT: pxor %xmm1, %xmm1
2086 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2087 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2088 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
2091 ; AVX1-LABEL: uitofp_16i8_to_4f32:
2093 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2094 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2095 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2096 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2097 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2098 ; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2099 ; AVX1-NEXT: vzeroupper
2102 ; AVX2-LABEL: uitofp_16i8_to_4f32:
2104 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2105 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2106 ; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2107 ; AVX2-NEXT: vzeroupper
2110 ; AVX512-LABEL: uitofp_16i8_to_4f32:
2112 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2113 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
2114 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2116 %cvt = uitofp <16 x i8> %a to <16 x float>
2117 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2118 ret <4 x float> %shuf
2121 define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
2122 ; SSE-LABEL: uitofp_4i64_to_4f32:
2124 ; SSE-NEXT: movd %xmm1, %rax
2125 ; SSE-NEXT: testq %rax, %rax
2126 ; SSE-NEXT: js .LBB47_1
2128 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
2129 ; SSE-NEXT: jmp .LBB47_3
2130 ; SSE-NEXT: .LBB47_1:
2131 ; SSE-NEXT: movq %rax, %rcx
2132 ; SSE-NEXT: shrq %rcx
2133 ; SSE-NEXT: andl $1, %eax
2134 ; SSE-NEXT: orq %rcx, %rax
2135 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
2136 ; SSE-NEXT: addss %xmm3, %xmm3
2137 ; SSE-NEXT: .LBB47_3:
2138 ; SSE-NEXT: movd %xmm0, %rax
2139 ; SSE-NEXT: testq %rax, %rax
2140 ; SSE-NEXT: js .LBB47_4
2142 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
2143 ; SSE-NEXT: jmp .LBB47_6
2144 ; SSE-NEXT: .LBB47_4:
2145 ; SSE-NEXT: movq %rax, %rcx
2146 ; SSE-NEXT: shrq %rcx
2147 ; SSE-NEXT: andl $1, %eax
2148 ; SSE-NEXT: orq %rcx, %rax
2149 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
2150 ; SSE-NEXT: addss %xmm2, %xmm2
2151 ; SSE-NEXT: .LBB47_6:
2152 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2153 ; SSE-NEXT: movd %xmm1, %rax
2154 ; SSE-NEXT: testq %rax, %rax
2155 ; SSE-NEXT: js .LBB47_7
2157 ; SSE-NEXT: xorps %xmm1, %xmm1
2158 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
2159 ; SSE-NEXT: jmp .LBB47_9
2160 ; SSE-NEXT: .LBB47_7:
2161 ; SSE-NEXT: movq %rax, %rcx
2162 ; SSE-NEXT: shrq %rcx
2163 ; SSE-NEXT: andl $1, %eax
2164 ; SSE-NEXT: orq %rcx, %rax
2165 ; SSE-NEXT: xorps %xmm1, %xmm1
2166 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
2167 ; SSE-NEXT: addss %xmm1, %xmm1
2168 ; SSE-NEXT: .LBB47_9:
2169 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2170 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2171 ; SSE-NEXT: movd %xmm0, %rax
2172 ; SSE-NEXT: testq %rax, %rax
2173 ; SSE-NEXT: js .LBB47_10
2174 ; SSE-NEXT: # BB#11:
2175 ; SSE-NEXT: xorps %xmm0, %xmm0
2176 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
2177 ; SSE-NEXT: jmp .LBB47_12
2178 ; SSE-NEXT: .LBB47_10:
2179 ; SSE-NEXT: movq %rax, %rcx
2180 ; SSE-NEXT: shrq %rcx
2181 ; SSE-NEXT: andl $1, %eax
2182 ; SSE-NEXT: orq %rcx, %rax
2183 ; SSE-NEXT: xorps %xmm0, %xmm0
2184 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
2185 ; SSE-NEXT: addss %xmm0, %xmm0
2186 ; SSE-NEXT: .LBB47_12:
2187 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2188 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
2189 ; SSE-NEXT: movaps %xmm2, %xmm0
2192 ; AVX1-LABEL: uitofp_4i64_to_4f32:
2194 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
2195 ; AVX1-NEXT: testq %rax, %rax
2196 ; AVX1-NEXT: js .LBB47_1
2197 ; AVX1-NEXT: # BB#2:
2198 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
2199 ; AVX1-NEXT: jmp .LBB47_3
2200 ; AVX1-NEXT: .LBB47_1:
2201 ; AVX1-NEXT: movq %rax, %rcx
2202 ; AVX1-NEXT: shrq %rcx
2203 ; AVX1-NEXT: andl $1, %eax
2204 ; AVX1-NEXT: orq %rcx, %rax
2205 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
2206 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
2207 ; AVX1-NEXT: .LBB47_3:
2208 ; AVX1-NEXT: vmovq %xmm0, %rax
2209 ; AVX1-NEXT: testq %rax, %rax
2210 ; AVX1-NEXT: js .LBB47_4
2211 ; AVX1-NEXT: # BB#5:
2212 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
2213 ; AVX1-NEXT: jmp .LBB47_6
2214 ; AVX1-NEXT: .LBB47_4:
2215 ; AVX1-NEXT: movq %rax, %rcx
2216 ; AVX1-NEXT: shrq %rcx
2217 ; AVX1-NEXT: andl $1, %eax
2218 ; AVX1-NEXT: orq %rcx, %rax
2219 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
2220 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
2221 ; AVX1-NEXT: .LBB47_6:
2222 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2223 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2224 ; AVX1-NEXT: vmovq %xmm0, %rax
2225 ; AVX1-NEXT: testq %rax, %rax
2226 ; AVX1-NEXT: js .LBB47_7
2227 ; AVX1-NEXT: # BB#8:
2228 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
2229 ; AVX1-NEXT: jmp .LBB47_9
2230 ; AVX1-NEXT: .LBB47_7:
2231 ; AVX1-NEXT: movq %rax, %rcx
2232 ; AVX1-NEXT: shrq %rcx
2233 ; AVX1-NEXT: andl $1, %eax
2234 ; AVX1-NEXT: orq %rcx, %rax
2235 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
2236 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
2237 ; AVX1-NEXT: .LBB47_9:
2238 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2239 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
2240 ; AVX1-NEXT: testq %rax, %rax
2241 ; AVX1-NEXT: js .LBB47_10
2242 ; AVX1-NEXT: # BB#11:
2243 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
2244 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2245 ; AVX1-NEXT: vzeroupper
2247 ; AVX1-NEXT: .LBB47_10:
2248 ; AVX1-NEXT: movq %rax, %rcx
2249 ; AVX1-NEXT: shrq %rcx
2250 ; AVX1-NEXT: andl $1, %eax
2251 ; AVX1-NEXT: orq %rcx, %rax
2252 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
2253 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
2254 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2255 ; AVX1-NEXT: vzeroupper
2258 ; AVX2-LABEL: uitofp_4i64_to_4f32:
2260 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
2261 ; AVX2-NEXT: testq %rax, %rax
2262 ; AVX2-NEXT: js .LBB47_1
2263 ; AVX2-NEXT: # BB#2:
2264 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
2265 ; AVX2-NEXT: jmp .LBB47_3
2266 ; AVX2-NEXT: .LBB47_1:
2267 ; AVX2-NEXT: movq %rax, %rcx
2268 ; AVX2-NEXT: shrq %rcx
2269 ; AVX2-NEXT: andl $1, %eax
2270 ; AVX2-NEXT: orq %rcx, %rax
2271 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
2272 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1
2273 ; AVX2-NEXT: .LBB47_3:
2274 ; AVX2-NEXT: vmovq %xmm0, %rax
2275 ; AVX2-NEXT: testq %rax, %rax
2276 ; AVX2-NEXT: js .LBB47_4
2277 ; AVX2-NEXT: # BB#5:
2278 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
2279 ; AVX2-NEXT: jmp .LBB47_6
2280 ; AVX2-NEXT: .LBB47_4:
2281 ; AVX2-NEXT: movq %rax, %rcx
2282 ; AVX2-NEXT: shrq %rcx
2283 ; AVX2-NEXT: andl $1, %eax
2284 ; AVX2-NEXT: orq %rcx, %rax
2285 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
2286 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
2287 ; AVX2-NEXT: .LBB47_6:
2288 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2289 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2290 ; AVX2-NEXT: vmovq %xmm0, %rax
2291 ; AVX2-NEXT: testq %rax, %rax
2292 ; AVX2-NEXT: js .LBB47_7
2293 ; AVX2-NEXT: # BB#8:
2294 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
2295 ; AVX2-NEXT: jmp .LBB47_9
2296 ; AVX2-NEXT: .LBB47_7:
2297 ; AVX2-NEXT: movq %rax, %rcx
2298 ; AVX2-NEXT: shrq %rcx
2299 ; AVX2-NEXT: andl $1, %eax
2300 ; AVX2-NEXT: orq %rcx, %rax
2301 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
2302 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
2303 ; AVX2-NEXT: .LBB47_9:
2304 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2305 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
2306 ; AVX2-NEXT: testq %rax, %rax
2307 ; AVX2-NEXT: js .LBB47_10
2308 ; AVX2-NEXT: # BB#11:
2309 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
2310 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2311 ; AVX2-NEXT: vzeroupper
2313 ; AVX2-NEXT: .LBB47_10:
2314 ; AVX2-NEXT: movq %rax, %rcx
2315 ; AVX2-NEXT: shrq %rcx
2316 ; AVX2-NEXT: andl $1, %eax
2317 ; AVX2-NEXT: orq %rcx, %rax
2318 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
2319 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
2320 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2321 ; AVX2-NEXT: vzeroupper
2324 ; AVX512F-LABEL: uitofp_4i64_to_4f32:
2326 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2327 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
2328 ; AVX512F-NEXT: vmovq %xmm0, %rax
2329 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
2330 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2331 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
2332 ; AVX512F-NEXT: vmovq %xmm0, %rax
2333 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
2334 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2335 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2336 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
2337 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2338 ; AVX512F-NEXT: retq
2340 ; AVX512VL-LABEL: uitofp_4i64_to_4f32:
2342 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2343 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
2344 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2345 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
2346 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
2347 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
2348 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2349 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
2350 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
2351 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2352 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
2353 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
2354 ; AVX512VL-NEXT: retq
2356 ; AVX512DQ-LABEL: uitofp_4i64_to_4f32:
2358 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
2359 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
2360 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
2361 ; AVX512DQ-NEXT: retq
2363 ; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32:
2364 ; AVX512VLDQ: # BB#0:
2365 ; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
2366 ; AVX512VLDQ-NEXT: retq
2367 %cvt = uitofp <4 x i64> %a to <4 x float>
2368 ret <4 x float> %cvt
2371 define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) {
2372 ; SSE-LABEL: uitofp_8i32_to_8f32:
2374 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
2375 ; SSE-NEXT: movdqa %xmm0, %xmm3
2376 ; SSE-NEXT: pand %xmm2, %xmm3
2377 ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
2378 ; SSE-NEXT: por %xmm4, %xmm3
2379 ; SSE-NEXT: psrld $16, %xmm0
2380 ; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
2381 ; SSE-NEXT: por %xmm5, %xmm0
2382 ; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
2383 ; SSE-NEXT: addps %xmm6, %xmm0
2384 ; SSE-NEXT: addps %xmm3, %xmm0
2385 ; SSE-NEXT: pand %xmm1, %xmm2
2386 ; SSE-NEXT: por %xmm4, %xmm2
2387 ; SSE-NEXT: psrld $16, %xmm1
2388 ; SSE-NEXT: por %xmm5, %xmm1
2389 ; SSE-NEXT: addps %xmm6, %xmm1
2390 ; SSE-NEXT: addps %xmm2, %xmm1
2393 ; AVX1-LABEL: uitofp_8i32_to_8f32:
2395 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
2396 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2397 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
2398 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2399 ; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
2400 ; AVX1-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
2401 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
2402 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2403 ; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
2406 ; AVX2-LABEL: uitofp_8i32_to_8f32:
2408 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
2409 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
2410 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
2411 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
2412 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
2413 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
2414 ; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
2415 ; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
2418 ; AVX512F-LABEL: uitofp_8i32_to_8f32:
2420 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
2421 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
2422 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
2423 ; AVX512F-NEXT: retq
2425 ; AVX512VL-LABEL: uitofp_8i32_to_8f32:
2427 ; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0
2428 ; AVX512VL-NEXT: retq
2430 ; AVX512DQ-LABEL: uitofp_8i32_to_8f32:
2432 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
2433 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
2434 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
2435 ; AVX512DQ-NEXT: retq
2437 ; AVX512VLDQ-LABEL: uitofp_8i32_to_8f32:
2438 ; AVX512VLDQ: # BB#0:
2439 ; AVX512VLDQ-NEXT: vcvtudq2ps %ymm0, %ymm0
2440 ; AVX512VLDQ-NEXT: retq
2441 %cvt = uitofp <8 x i32> %a to <8 x float>
2442 ret <8 x float> %cvt
2445 define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) {
2446 ; SSE-LABEL: uitofp_8i16_to_8f32:
2448 ; SSE-NEXT: pxor %xmm1, %xmm1
2449 ; SSE-NEXT: movdqa %xmm0, %xmm2
2450 ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2451 ; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
2452 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2453 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
2454 ; SSE-NEXT: movaps %xmm2, %xmm0
2457 ; AVX1-LABEL: uitofp_8i16_to_8f32:
2459 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
2460 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2461 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2462 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2463 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2466 ; AVX2-LABEL: uitofp_8i16_to_8f32:
2468 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2469 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2472 ; AVX512-LABEL: uitofp_8i16_to_8f32:
2474 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2475 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
2477 %cvt = uitofp <8 x i16> %a to <8 x float>
2478 ret <8 x float> %cvt
2481 define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) {
2482 ; SSE-LABEL: uitofp_8i8_to_8f32:
2484 ; SSE-NEXT: pxor %xmm1, %xmm1
2485 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2486 ; SSE-NEXT: movdqa %xmm0, %xmm2
2487 ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2488 ; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
2489 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2490 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
2491 ; SSE-NEXT: movaps %xmm2, %xmm0
2494 ; AVX1-LABEL: uitofp_8i8_to_8f32:
2496 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2497 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2498 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2499 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2500 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2503 ; AVX2-LABEL: uitofp_8i8_to_8f32:
2505 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2506 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2509 ; AVX512-LABEL: uitofp_8i8_to_8f32:
2511 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2512 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
2514 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2515 %cvt = uitofp <8 x i8> %shuf to <8 x float>
2516 ret <8 x float> %cvt
2519 define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
2520 ; SSE-LABEL: uitofp_16i8_to_8f32:
2522 ; SSE-NEXT: pxor %xmm1, %xmm1
2523 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2524 ; SSE-NEXT: movdqa %xmm0, %xmm2
2525 ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2526 ; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
2527 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2528 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
2529 ; SSE-NEXT: movaps %xmm2, %xmm0
2532 ; AVX1-LABEL: uitofp_16i8_to_8f32:
2534 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2535 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
2536 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2537 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2538 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
2541 ; AVX2-LABEL: uitofp_16i8_to_8f32:
2543 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
2544 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
2547 ; AVX512-LABEL: uitofp_16i8_to_8f32:
2549 ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2550 ; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
2551 ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
2553 %cvt = uitofp <16 x i8> %a to <16 x float>
2554 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2555 ret <8 x float> %shuf
2559 ; Load Signed Integer to Double
2562 define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) {
2563 ; SSE-LABEL: sitofp_load_2i64_to_2f64:
2565 ; SSE-NEXT: movdqa (%rdi), %xmm1
2566 ; SSE-NEXT: movd %xmm1, %rax
2567 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
2568 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2569 ; SSE-NEXT: movd %xmm1, %rax
2570 ; SSE-NEXT: xorps %xmm1, %xmm1
2571 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
2572 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2575 ; VEX-LABEL: sitofp_load_2i64_to_2f64:
2577 ; VEX-NEXT: vmovdqa (%rdi), %xmm0
2578 ; VEX-NEXT: vpextrq $1, %xmm0, %rax
2579 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
2580 ; VEX-NEXT: vmovq %xmm0, %rax
2581 ; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2582 ; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2585 ; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
2587 ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
2588 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2589 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
2590 ; AVX512F-NEXT: vmovq %xmm0, %rax
2591 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2592 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2593 ; AVX512F-NEXT: retq
2595 ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
2597 ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
2598 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2599 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
2600 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2601 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
2602 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2603 ; AVX512VL-NEXT: retq
2605 ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
2607 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
2608 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
2609 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2610 ; AVX512DQ-NEXT: retq
2612 ; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64:
2613 ; AVX512VLDQ: # BB#0:
2614 ; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %xmm0
2615 ; AVX512VLDQ-NEXT: retq
2616 %ld = load <2 x i64>, <2 x i64> *%a
2617 %cvt = sitofp <2 x i64> %ld to <2 x double>
2618 ret <2 x double> %cvt
2621 define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) {
2622 ; SSE-LABEL: sitofp_load_2i32_to_2f64:
2624 ; SSE-NEXT: cvtdq2pd (%rdi), %xmm0
2627 ; VEX-LABEL: sitofp_load_2i32_to_2f64:
2629 ; VEX-NEXT: vcvtdq2pd (%rdi), %xmm0
2632 ; AVX512F-LABEL: sitofp_load_2i32_to_2f64:
2634 ; AVX512F-NEXT: vcvtdq2pd (%rdi), %xmm0
2635 ; AVX512F-NEXT: retq
2637 ; AVX512VL-LABEL: sitofp_load_2i32_to_2f64:
2639 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
2640 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2641 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
2642 ; AVX512VL-NEXT: retq
2644 ; AVX512DQ-LABEL: sitofp_load_2i32_to_2f64:
2646 ; AVX512DQ-NEXT: vcvtdq2pd (%rdi), %xmm0
2647 ; AVX512DQ-NEXT: retq
2649 ; AVX512VLDQ-LABEL: sitofp_load_2i32_to_2f64:
2650 ; AVX512VLDQ: # BB#0:
2651 ; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
2652 ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2653 ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
2654 ; AVX512VLDQ-NEXT: retq
2655 %ld = load <2 x i32>, <2 x i32> *%a
2656 %cvt = sitofp <2 x i32> %ld to <2 x double>
2657 ret <2 x double> %cvt
2660 define <2 x double> @sitofp_load_2i16_to_2f64(<2 x i16> *%a) {
2661 ; SSE-LABEL: sitofp_load_2i16_to_2f64:
2663 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2664 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2665 ; SSE-NEXT: psrad $16, %xmm0
2666 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
2669 ; AVX-LABEL: sitofp_load_2i16_to_2f64:
2671 ; AVX-NEXT: vpmovsxwq (%rdi), %xmm0
2672 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2673 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
2675 %ld = load <2 x i16>, <2 x i16> *%a
2676 %cvt = sitofp <2 x i16> %ld to <2 x double>
2677 ret <2 x double> %cvt
2680 define <2 x double> @sitofp_load_2i8_to_2f64(<2 x i8> *%a) {
2681 ; SSE-LABEL: sitofp_load_2i8_to_2f64:
2683 ; SSE-NEXT: movzwl (%rdi), %eax
2684 ; SSE-NEXT: movd %eax, %xmm0
2685 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2686 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2687 ; SSE-NEXT: psrad $24, %xmm0
2688 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
2691 ; AVX-LABEL: sitofp_load_2i8_to_2f64:
2693 ; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
2694 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2695 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
2697 %ld = load <2 x i8>, <2 x i8> *%a
2698 %cvt = sitofp <2 x i8> %ld to <2 x double>
2699 ret <2 x double> %cvt
2702 define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) {
2703 ; SSE-LABEL: sitofp_load_4i64_to_4f64:
2705 ; SSE-NEXT: movdqa (%rdi), %xmm1
2706 ; SSE-NEXT: movdqa 16(%rdi), %xmm2
2707 ; SSE-NEXT: movd %xmm1, %rax
2708 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0
2709 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2710 ; SSE-NEXT: movd %xmm1, %rax
2711 ; SSE-NEXT: xorps %xmm1, %xmm1
2712 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
2713 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2714 ; SSE-NEXT: movd %xmm2, %rax
2715 ; SSE-NEXT: xorps %xmm1, %xmm1
2716 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1
2717 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
2718 ; SSE-NEXT: movd %xmm2, %rax
2719 ; SSE-NEXT: xorps %xmm2, %xmm2
2720 ; SSE-NEXT: cvtsi2sdq %rax, %xmm2
2721 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2724 ; AVX1-LABEL: sitofp_load_4i64_to_4f64:
2726 ; AVX1-NEXT: vmovaps (%rdi), %ymm0
2727 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2728 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
2729 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
2730 ; AVX1-NEXT: vmovq %xmm1, %rax
2731 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2732 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2733 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
2734 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
2735 ; AVX1-NEXT: vmovq %xmm0, %rax
2736 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2737 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2738 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2741 ; AVX2-LABEL: sitofp_load_4i64_to_4f64:
2743 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
2744 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2745 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
2746 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
2747 ; AVX2-NEXT: vmovq %xmm1, %rax
2748 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2749 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2750 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
2751 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
2752 ; AVX2-NEXT: vmovq %xmm0, %rax
2753 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2754 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2755 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2758 ; AVX512F-LABEL: sitofp_load_4i64_to_4f64:
2760 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
2761 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
2762 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
2763 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
2764 ; AVX512F-NEXT: vmovq %xmm1, %rax
2765 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2766 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2767 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2768 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
2769 ; AVX512F-NEXT: vmovq %xmm0, %rax
2770 ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2771 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2772 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2773 ; AVX512F-NEXT: retq
2775 ; AVX512VL-LABEL: sitofp_load_4i64_to_4f64:
2777 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
2778 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
2779 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
2780 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
2781 ; AVX512VL-NEXT: vmovq %xmm1, %rax
2782 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
2783 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
2784 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2785 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
2786 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2787 ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
2788 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2789 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2790 ; AVX512VL-NEXT: retq
2792 ; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64:
2794 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
2795 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
2796 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
2797 ; AVX512DQ-NEXT: retq
2799 ; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f64:
2800 ; AVX512VLDQ: # BB#0:
2801 ; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %ymm0
2802 ; AVX512VLDQ-NEXT: retq
2803 %ld = load <4 x i64>, <4 x i64> *%a
2804 %cvt = sitofp <4 x i64> %ld to <4 x double>
2805 ret <4 x double> %cvt
2808 define <4 x double> @sitofp_load_4i32_to_4f64(<4 x i32> *%a) {
2809 ; SSE-LABEL: sitofp_load_4i32_to_4f64:
2811 ; SSE-NEXT: movdqa (%rdi), %xmm1
2812 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
2813 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2814 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
2817 ; AVX-LABEL: sitofp_load_4i32_to_4f64:
2819 ; AVX-NEXT: vcvtdq2pd (%rdi), %ymm0
2821 %ld = load <4 x i32>, <4 x i32> *%a
2822 %cvt = sitofp <4 x i32> %ld to <4 x double>
2823 ret <4 x double> %cvt
2826 define <4 x double> @sitofp_load_4i16_to_4f64(<4 x i16> *%a) {
2827 ; SSE-LABEL: sitofp_load_4i16_to_4f64:
2829 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2830 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2831 ; SSE-NEXT: psrad $16, %xmm1
2832 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
2833 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2834 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
2837 ; AVX-LABEL: sitofp_load_4i16_to_4f64:
2839 ; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
2840 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
2842 %ld = load <4 x i16>, <4 x i16> *%a
2843 %cvt = sitofp <4 x i16> %ld to <4 x double>
2844 ret <4 x double> %cvt
2847 define <4 x double> @sitofp_load_4i8_to_4f64(<4 x i8> *%a) {
2848 ; SSE-LABEL: sitofp_load_4i8_to_4f64:
2850 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2851 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2852 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2853 ; SSE-NEXT: psrad $24, %xmm1
2854 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
2855 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2856 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
2859 ; AVX-LABEL: sitofp_load_4i8_to_4f64:
2861 ; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
2862 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
2864 %ld = load <4 x i8>, <4 x i8> *%a
2865 %cvt = sitofp <4 x i8> %ld to <4 x double>
2866 ret <4 x double> %cvt
2870 ; Load Unsigned Integer to Double
2873 define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) {
2874 ; SSE-LABEL: uitofp_load_2i64_to_2f64:
2876 ; SSE-NEXT: movdqa (%rdi), %xmm1
2877 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
2878 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
2879 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2880 ; SSE-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
2881 ; SSE-NEXT: subpd %xmm4, %xmm1
2882 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
2883 ; SSE-NEXT: addpd %xmm1, %xmm0
2884 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
2885 ; SSE-NEXT: subpd %xmm4, %xmm3
2886 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
2887 ; SSE-NEXT: addpd %xmm3, %xmm1
2888 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2891 ; VEX-LABEL: uitofp_load_2i64_to_2f64:
2893 ; VEX-NEXT: vmovdqa (%rdi), %xmm0
2894 ; VEX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
2895 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2896 ; VEX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
2897 ; VEX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
2898 ; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2899 ; VEX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2900 ; VEX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
2901 ; VEX-NEXT: vhaddpd %xmm0, %xmm2, %xmm0
2904 ; AVX512F-LABEL: uitofp_load_2i64_to_2f64:
2906 ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
2907 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
2908 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
2909 ; AVX512F-NEXT: vmovq %xmm0, %rax
2910 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
2911 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2912 ; AVX512F-NEXT: retq
2914 ; AVX512VL-LABEL: uitofp_load_2i64_to_2f64:
2916 ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
2917 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
2918 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
2919 ; AVX512VL-NEXT: vmovq %xmm0, %rax
2920 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
2921 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2922 ; AVX512VL-NEXT: retq
2924 ; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64:
2926 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
2927 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
2928 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2929 ; AVX512DQ-NEXT: retq
2931 ; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64:
2932 ; AVX512VLDQ: # BB#0:
2933 ; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %xmm0
2934 ; AVX512VLDQ-NEXT: retq
2935 %ld = load <2 x i64>, <2 x i64> *%a
2936 %cvt = uitofp <2 x i64> %ld to <2 x double>
2937 ret <2 x double> %cvt
2940 define <2 x double> @uitofp_load_2i32_to_2f64(<2 x i32> *%a) {
2941 ; SSE-LABEL: uitofp_load_2i32_to_2f64:
2943 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2944 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
2945 ; SSE-NEXT: pand %xmm0, %xmm1
2946 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
2947 ; SSE-NEXT: psrld $16, %xmm0
2948 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
2949 ; SSE-NEXT: mulpd {{.*}}(%rip), %xmm0
2950 ; SSE-NEXT: addpd %xmm1, %xmm0
2953 ; VEX-LABEL: uitofp_load_2i32_to_2f64:
2955 ; VEX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2956 ; VEX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2957 ; VEX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
2958 ; VEX-NEXT: vcvtdq2pd %xmm1, %xmm1
2959 ; VEX-NEXT: vpsrld $16, %xmm0, %xmm0
2960 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
2961 ; VEX-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0
2962 ; VEX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
2965 ; AVX512F-LABEL: uitofp_load_2i32_to_2f64:
2967 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2968 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
2969 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2970 ; AVX512F-NEXT: retq
2972 ; AVX512VL-LABEL: uitofp_load_2i32_to_2f64:
2974 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
2975 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2976 ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0
2977 ; AVX512VL-NEXT: retq
2979 ; AVX512DQ-LABEL: uitofp_load_2i32_to_2f64:
2981 ; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2982 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
2983 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
2984 ; AVX512DQ-NEXT: retq
2986 ; AVX512VLDQ-LABEL: uitofp_load_2i32_to_2f64:
2987 ; AVX512VLDQ: # BB#0:
2988 ; AVX512VLDQ-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
2989 ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2990 ; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
2991 ; AVX512VLDQ-NEXT: retq
2992 %ld = load <2 x i32>, <2 x i32> *%a
2993 %cvt = uitofp <2 x i32> %ld to <2 x double>
2994 ret <2 x double> %cvt
2997 define <2 x double> @uitofp_load_2i16_to_2f64(<2 x i16> *%a) {
2998 ; SSE-LABEL: uitofp_load_2i16_to_2f64:
3000 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3001 ; SSE-NEXT: pxor %xmm1, %xmm1
3002 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3003 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
3006 ; VEX-LABEL: uitofp_load_2i16_to_2f64:
3008 ; VEX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3009 ; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3010 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
3013 ; AVX512F-LABEL: uitofp_load_2i16_to_2f64:
3015 ; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3016 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3017 ; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
3018 ; AVX512F-NEXT: retq
3020 ; AVX512VL-LABEL: uitofp_load_2i16_to_2f64:
3022 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
3023 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3024 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
3025 ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7]
3026 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
3027 ; AVX512VL-NEXT: retq
3029 ; AVX512DQ-LABEL: uitofp_load_2i16_to_2f64:
3031 ; AVX512DQ-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3032 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
3033 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3034 ; AVX512DQ-NEXT: retq
3036 ; AVX512VLDQ-LABEL: uitofp_load_2i16_to_2f64:
3037 ; AVX512VLDQ: # BB#0:
3038 ; AVX512VLDQ-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
3039 ; AVX512VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
3040 ; AVX512VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
3041 ; AVX512VLDQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3,4,5,6,7]
3042 ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3043 ; AVX512VLDQ-NEXT: retq
3044 %ld = load <2 x i16>, <2 x i16> *%a
3045 %cvt = uitofp <2 x i16> %ld to <2 x double>
3046 ret <2 x double> %cvt
3049 define <2 x double> @uitofp_load_2i8_to_2f64(<2 x i8> *%a) {
3050 ; SSE-LABEL: uitofp_load_2i8_to_2f64:
3052 ; SSE-NEXT: movzwl (%rdi), %eax
3053 ; SSE-NEXT: movd %eax, %xmm0
3054 ; SSE-NEXT: pxor %xmm1, %xmm1
3055 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3056 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3057 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
3060 ; VEX-LABEL: uitofp_load_2i8_to_2f64:
3062 ; VEX-NEXT: movzwl (%rdi), %eax
3063 ; VEX-NEXT: vmovd %eax, %xmm0
3064 ; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3065 ; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
3068 ; AVX512F-LABEL: uitofp_load_2i8_to_2f64:
3070 ; AVX512F-NEXT: movzwl (%rdi), %eax
3071 ; AVX512F-NEXT: vmovd %eax, %xmm0
3072 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3073 ; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
3074 ; AVX512F-NEXT: retq
3076 ; AVX512VL-LABEL: uitofp_load_2i8_to_2f64:
3078 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
3079 ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u],zero,zero,zero,xmm0[u],zero,zero,zero
3080 ; AVX512VL-NEXT: vcvtdq2pd %xmm0, %xmm0
3081 ; AVX512VL-NEXT: retq
3083 ; AVX512DQ-LABEL: uitofp_load_2i8_to_2f64:
3085 ; AVX512DQ-NEXT: movzwl (%rdi), %eax
3086 ; AVX512DQ-NEXT: vmovd %eax, %xmm0
3087 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
3088 ; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3089 ; AVX512DQ-NEXT: retq
3091 ; AVX512VLDQ-LABEL: uitofp_load_2i8_to_2f64:
3092 ; AVX512VLDQ: # BB#0:
3093 ; AVX512VLDQ-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
3094 ; AVX512VLDQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u],zero,zero,zero,xmm0[u],zero,zero,zero
3095 ; AVX512VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
3096 ; AVX512VLDQ-NEXT: retq
3097 %ld = load <2 x i8>, <2 x i8> *%a
3098 %cvt = uitofp <2 x i8> %ld to <2 x double>
3099 ret <2 x double> %cvt
3102 define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) {
3103 ; SSE-LABEL: uitofp_load_4i64_to_4f64:
3105 ; SSE-NEXT: movdqa (%rdi), %xmm1
3106 ; SSE-NEXT: movdqa 16(%rdi), %xmm2
3107 ; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
3108 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
3109 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
3110 ; SSE-NEXT: movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25]
3111 ; SSE-NEXT: subpd %xmm5, %xmm1
3112 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
3113 ; SSE-NEXT: addpd %xmm1, %xmm0
3114 ; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
3115 ; SSE-NEXT: subpd %xmm5, %xmm4
3116 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[2,3,0,1]
3117 ; SSE-NEXT: addpd %xmm4, %xmm1
3118 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3119 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
3120 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3121 ; SSE-NEXT: subpd %xmm5, %xmm2
3122 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
3123 ; SSE-NEXT: addpd %xmm2, %xmm1
3124 ; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
3125 ; SSE-NEXT: subpd %xmm5, %xmm4
3126 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
3127 ; SSE-NEXT: addpd %xmm4, %xmm2
3128 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3131 ; AVX1-LABEL: uitofp_load_4i64_to_4f64:
3133 ; AVX1-NEXT: vmovaps (%rdi), %ymm0
3134 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3135 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
3136 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3137 ; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
3138 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3139 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3140 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3141 ; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1
3142 ; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
3143 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3144 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3145 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
3146 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3147 ; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0
3148 ; AVX1-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
3149 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3152 ; AVX2-LABEL: uitofp_load_4i64_to_4f64:
3154 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3155 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
3156 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
3157 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3158 ; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
3159 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3160 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3161 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3162 ; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1
3163 ; AVX2-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
3164 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3165 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3
3166 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
3167 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3168 ; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0
3169 ; AVX2-NEXT: vhaddpd %xmm0, %xmm3, %xmm0
3170 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3173 ; AVX512F-LABEL: uitofp_load_4i64_to_4f64:
3175 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3176 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
3177 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
3178 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
3179 ; AVX512F-NEXT: vmovq %xmm1, %rax
3180 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
3181 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3182 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3183 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
3184 ; AVX512F-NEXT: vmovq %xmm0, %rax
3185 ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
3186 ; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3187 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3188 ; AVX512F-NEXT: retq
3190 ; AVX512VL-LABEL: uitofp_load_4i64_to_4f64:
3192 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
3193 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
3194 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
3195 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
3196 ; AVX512VL-NEXT: vmovq %xmm1, %rax
3197 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
3198 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3199 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3200 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2
3201 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3202 ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0
3203 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3204 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3205 ; AVX512VL-NEXT: retq
3207 ; AVX512DQ-LABEL: uitofp_load_4i64_to_4f64:
3209 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
3210 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
3211 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
3212 ; AVX512DQ-NEXT: retq
3214 ; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f64:
3215 ; AVX512VLDQ: # BB#0:
3216 ; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %ymm0
3217 ; AVX512VLDQ-NEXT: retq
3218 %ld = load <4 x i64>, <4 x i64> *%a
3219 %cvt = uitofp <4 x i64> %ld to <4 x double>
3220 ret <4 x double> %cvt
3223 define <4 x double> @uitofp_load_4i32_to_4f64(<4 x i32> *%a) {
3224 ; SSE-LABEL: uitofp_load_4i32_to_4f64:
3226 ; SSE-NEXT: movdqa (%rdi), %xmm0
3227 ; SSE-NEXT: movdqa %xmm0, %xmm1
3228 ; SSE-NEXT: psrld $16, %xmm1
3229 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
3230 ; SSE-NEXT: movapd {{.*#+}} xmm2 = [6.553600e+04,6.553600e+04]
3231 ; SSE-NEXT: mulpd %xmm2, %xmm1
3232 ; SSE-NEXT: movdqa {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
3233 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
3234 ; SSE-NEXT: pand %xmm3, %xmm0
3235 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
3236 ; SSE-NEXT: addpd %xmm1, %xmm0
3237 ; SSE-NEXT: movdqa %xmm4, %xmm1
3238 ; SSE-NEXT: psrld $16, %xmm1
3239 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm5
3240 ; SSE-NEXT: mulpd %xmm2, %xmm5
3241 ; SSE-NEXT: pand %xmm3, %xmm4
3242 ; SSE-NEXT: cvtdq2pd %xmm4, %xmm1
3243 ; SSE-NEXT: addpd %xmm5, %xmm1
3246 ; AVX1-LABEL: uitofp_load_4i32_to_4f64:
3248 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
3249 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
3250 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
3251 ; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
3252 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
3253 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
3254 ; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
3255 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
3258 ; AVX2-LABEL: uitofp_load_4i32_to_4f64:
3260 ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
3261 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
3262 ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
3263 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
3264 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
3265 ; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
3266 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
3267 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
3268 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
3271 ; AVX512F-LABEL: uitofp_load_4i32_to_4f64:
3273 ; AVX512F-NEXT: vmovaps (%rdi), %xmm0
3274 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0
3275 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
3276 ; AVX512F-NEXT: retq
3278 ; AVX512VL-LABEL: uitofp_load_4i32_to_4f64:
3280 ; AVX512VL-NEXT: vcvtudq2pd (%rdi), %ymm0
3281 ; AVX512VL-NEXT: retq
3283 ; AVX512DQ-LABEL: uitofp_load_4i32_to_4f64:
3285 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
3286 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
3287 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
3288 ; AVX512DQ-NEXT: retq
3290 ; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f64:
3291 ; AVX512VLDQ: # BB#0:
3292 ; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %ymm0
3293 ; AVX512VLDQ-NEXT: retq
3294 %ld = load <4 x i32>, <4 x i32> *%a
3295 %cvt = uitofp <4 x i32> %ld to <4 x double>
3296 ret <4 x double> %cvt
3299 define <4 x double> @uitofp_load_4i16_to_4f64(<4 x i16> *%a) {
3300 ; SSE-LABEL: uitofp_load_4i16_to_4f64:
3302 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
3303 ; SSE-NEXT: pxor %xmm0, %xmm0
3304 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3305 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
3306 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3307 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
3310 ; AVX-LABEL: uitofp_load_4i16_to_4f64:
3312 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
3313 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
3315 %ld = load <4 x i16>, <4 x i16> *%a
3316 %cvt = uitofp <4 x i16> %ld to <4 x double>
3317 ret <4 x double> %cvt
3320 define <4 x double> @uitofp_load_4i8_to_4f64(<4 x i8> *%a) {
3321 ; SSE-LABEL: uitofp_load_4i8_to_4f64:
3323 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
3324 ; SSE-NEXT: pxor %xmm0, %xmm0
3325 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
3326 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
3327 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
3328 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3329 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
3332 ; AVX-LABEL: uitofp_load_4i8_to_4f64:
3334 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
3335 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
3337 %ld = load <4 x i8>, <4 x i8> *%a
3338 %cvt = uitofp <4 x i8> %ld to <4 x double>
3339 ret <4 x double> %cvt
3343 ; Load Signed Integer to Float
3346 define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
3347 ; SSE-LABEL: sitofp_load_4i64_to_4f32:
3349 ; SSE-NEXT: movdqa (%rdi), %xmm1
3350 ; SSE-NEXT: movdqa 16(%rdi), %xmm2
3351 ; SSE-NEXT: movd %xmm2, %rax
3352 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
3353 ; SSE-NEXT: movd %xmm1, %rax
3354 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
3355 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
3356 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
3357 ; SSE-NEXT: movd %xmm2, %rax
3358 ; SSE-NEXT: xorps %xmm2, %xmm2
3359 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3360 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3361 ; SSE-NEXT: movd %xmm1, %rax
3362 ; SSE-NEXT: xorps %xmm1, %xmm1
3363 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3364 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3365 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3368 ; AVX1-LABEL: sitofp_load_4i64_to_4f32:
3370 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
3371 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3372 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3373 ; AVX1-NEXT: vmovq %xmm0, %rax
3374 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3375 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3376 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3377 ; AVX1-NEXT: vmovq %xmm0, %rax
3378 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3379 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3380 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3381 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3382 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3383 ; AVX1-NEXT: vzeroupper
3386 ; AVX2-LABEL: sitofp_load_4i64_to_4f32:
3388 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3389 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3390 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3391 ; AVX2-NEXT: vmovq %xmm0, %rax
3392 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3393 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3394 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3395 ; AVX2-NEXT: vmovq %xmm0, %rax
3396 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3397 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3398 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3399 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3400 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3401 ; AVX2-NEXT: vzeroupper
3404 ; AVX512F-LABEL: sitofp_load_4i64_to_4f32:
3406 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3407 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3408 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3409 ; AVX512F-NEXT: vmovq %xmm0, %rax
3410 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3411 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3412 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
3413 ; AVX512F-NEXT: vmovq %xmm0, %rax
3414 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3415 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3416 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3417 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3418 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3419 ; AVX512F-NEXT: retq
3421 ; AVX512VL-LABEL: sitofp_load_4i64_to_4f32:
3423 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
3424 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3425 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3426 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3427 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3428 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3429 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
3430 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3431 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3432 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3433 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3434 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3435 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3436 ; AVX512VL-NEXT: retq
3438 ; AVX512DQ-LABEL: sitofp_load_4i64_to_4f32:
3440 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
3441 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
3442 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
3443 ; AVX512DQ-NEXT: retq
3445 ; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f32:
3446 ; AVX512VLDQ: # BB#0:
3447 ; AVX512VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0
3448 ; AVX512VLDQ-NEXT: retq
3449 %ld = load <4 x i64>, <4 x i64> *%a
3450 %cvt = sitofp <4 x i64> %ld to <4 x float>
3451 ret <4 x float> %cvt
3454 define <4 x float> @sitofp_load_4i32_to_4f32(<4 x i32> *%a) {
3455 ; SSE-LABEL: sitofp_load_4i32_to_4f32:
3457 ; SSE-NEXT: cvtdq2ps (%rdi), %xmm0
3460 ; AVX-LABEL: sitofp_load_4i32_to_4f32:
3462 ; AVX-NEXT: vcvtdq2ps (%rdi), %xmm0
3464 %ld = load <4 x i32>, <4 x i32> *%a
3465 %cvt = sitofp <4 x i32> %ld to <4 x float>
3466 ret <4 x float> %cvt
3469 define <4 x float> @sitofp_load_4i16_to_4f32(<4 x i16> *%a) {
3470 ; SSE-LABEL: sitofp_load_4i16_to_4f32:
3472 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3473 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3474 ; SSE-NEXT: psrad $16, %xmm0
3475 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
3478 ; AVX-LABEL: sitofp_load_4i16_to_4f32:
3480 ; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
3481 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
3483 %ld = load <4 x i16>, <4 x i16> *%a
3484 %cvt = sitofp <4 x i16> %ld to <4 x float>
3485 ret <4 x float> %cvt
3488 define <4 x float> @sitofp_load_4i8_to_4f32(<4 x i8> *%a) {
3489 ; SSE-LABEL: sitofp_load_4i8_to_4f32:
3491 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3492 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3493 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3494 ; SSE-NEXT: psrad $24, %xmm0
3495 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
3498 ; AVX-LABEL: sitofp_load_4i8_to_4f32:
3500 ; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
3501 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
3503 %ld = load <4 x i8>, <4 x i8> *%a
3504 %cvt = sitofp <4 x i8> %ld to <4 x float>
3505 ret <4 x float> %cvt
3508 define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
3509 ; SSE-LABEL: sitofp_load_8i64_to_8f32:
3511 ; SSE-NEXT: movdqa (%rdi), %xmm1
3512 ; SSE-NEXT: movdqa 16(%rdi), %xmm2
3513 ; SSE-NEXT: movdqa 32(%rdi), %xmm3
3514 ; SSE-NEXT: movdqa 48(%rdi), %xmm4
3515 ; SSE-NEXT: movd %xmm2, %rax
3516 ; SSE-NEXT: cvtsi2ssq %rax, %xmm5
3517 ; SSE-NEXT: movd %xmm1, %rax
3518 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
3519 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
3520 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
3521 ; SSE-NEXT: movd %xmm2, %rax
3522 ; SSE-NEXT: xorps %xmm2, %xmm2
3523 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3524 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3525 ; SSE-NEXT: movd %xmm1, %rax
3526 ; SSE-NEXT: xorps %xmm1, %xmm1
3527 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3528 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3529 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3530 ; SSE-NEXT: movd %xmm4, %rax
3531 ; SSE-NEXT: xorps %xmm2, %xmm2
3532 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3533 ; SSE-NEXT: movd %xmm3, %rax
3534 ; SSE-NEXT: xorps %xmm1, %xmm1
3535 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3536 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
3537 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
3538 ; SSE-NEXT: movd %xmm2, %rax
3539 ; SSE-NEXT: xorps %xmm2, %xmm2
3540 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3541 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
3542 ; SSE-NEXT: movd %xmm3, %rax
3543 ; SSE-NEXT: xorps %xmm3, %xmm3
3544 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
3545 ; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
3546 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
3549 ; AVX1-LABEL: sitofp_load_8i64_to_8f32:
3551 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
3552 ; AVX1-NEXT: vmovdqa 32(%rdi), %ymm1
3553 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
3554 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3555 ; AVX1-NEXT: vmovq %xmm1, %rax
3556 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
3557 ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3558 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3559 ; AVX1-NEXT: vmovq %xmm1, %rax
3560 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3561 ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3562 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax
3563 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm1
3564 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
3565 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3566 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3567 ; AVX1-NEXT: vmovq %xmm0, %rax
3568 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3569 ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3570 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3571 ; AVX1-NEXT: vmovq %xmm0, %rax
3572 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3573 ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3574 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3575 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0
3576 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
3577 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3580 ; AVX2-LABEL: sitofp_load_8i64_to_8f32:
3582 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3583 ; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
3584 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
3585 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3586 ; AVX2-NEXT: vmovq %xmm1, %rax
3587 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
3588 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3589 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
3590 ; AVX2-NEXT: vmovq %xmm1, %rax
3591 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3592 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3593 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax
3594 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm1
3595 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
3596 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3597 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3598 ; AVX2-NEXT: vmovq %xmm0, %rax
3599 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3600 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3601 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3602 ; AVX2-NEXT: vmovq %xmm0, %rax
3603 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3604 ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3605 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3606 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0
3607 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
3608 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3611 ; AVX512F-LABEL: sitofp_load_8i64_to_8f32:
3613 ; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0
3614 ; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm1
3615 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
3616 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3617 ; AVX512F-NEXT: vmovq %xmm1, %rax
3618 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm1
3619 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3620 ; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm2
3621 ; AVX512F-NEXT: vmovq %xmm2, %rax
3622 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
3623 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
3624 ; AVX512F-NEXT: vpextrq $1, %xmm2, %rax
3625 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3626 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3627 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3628 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3629 ; AVX512F-NEXT: vmovq %xmm0, %rax
3630 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3631 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3632 ; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
3633 ; AVX512F-NEXT: vmovq %xmm0, %rax
3634 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3635 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3636 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3637 ; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0
3638 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
3639 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3640 ; AVX512F-NEXT: retq
3642 ; AVX512VL-LABEL: sitofp_load_8i64_to_8f32:
3644 ; AVX512VL-NEXT: vmovdqa64 (%rdi), %zmm0
3645 ; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm1
3646 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
3647 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3648 ; AVX512VL-NEXT: vmovq %xmm1, %rax
3649 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm1
3650 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
3651 ; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm2
3652 ; AVX512VL-NEXT: vmovq %xmm2, %rax
3653 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
3654 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
3655 ; AVX512VL-NEXT: vpextrq $1, %xmm2, %rax
3656 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3657 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
3658 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3659 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2
3660 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3661 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3662 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
3663 ; AVX512VL-NEXT: vextracti32x4 $1, %zmm0, %xmm0
3664 ; AVX512VL-NEXT: vmovq %xmm0, %rax
3665 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3
3666 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
3667 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
3668 ; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0
3669 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
3670 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3671 ; AVX512VL-NEXT: retq
3673 ; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32:
3675 ; AVX512DQ-NEXT: vcvtqq2ps (%rdi), %ymm0
3676 ; AVX512DQ-NEXT: retq
3678 ; AVX512VLDQ-LABEL: sitofp_load_8i64_to_8f32:
3679 ; AVX512VLDQ: # BB#0:
3680 ; AVX512VLDQ-NEXT: vcvtqq2ps (%rdi), %ymm0
3681 ; AVX512VLDQ-NEXT: retq
3682 %ld = load <8 x i64>, <8 x i64> *%a
3683 %cvt = sitofp <8 x i64> %ld to <8 x float>
3684 ret <8 x float> %cvt
3687 define <8 x float> @sitofp_load_8i32_to_8f32(<8 x i32> *%a) {
3688 ; SSE-LABEL: sitofp_load_8i32_to_8f32:
3690 ; SSE-NEXT: cvtdq2ps (%rdi), %xmm0
3691 ; SSE-NEXT: cvtdq2ps 16(%rdi), %xmm1
3694 ; AVX-LABEL: sitofp_load_8i32_to_8f32:
3696 ; AVX-NEXT: vcvtdq2ps (%rdi), %ymm0
3698 %ld = load <8 x i32>, <8 x i32> *%a
3699 %cvt = sitofp <8 x i32> %ld to <8 x float>
3700 ret <8 x float> %cvt
3703 define <8 x float> @sitofp_load_8i16_to_8f32(<8 x i16> *%a) {
3704 ; SSE-LABEL: sitofp_load_8i16_to_8f32:
3706 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
3707 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3708 ; SSE-NEXT: psrad $16, %xmm0
3709 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
3710 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
3711 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
3712 ; SSE-NEXT: psrad $16, %xmm1
3713 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
3716 ; AVX1-LABEL: sitofp_load_8i16_to_8f32:
3718 ; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0
3719 ; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1
3720 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3721 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
3724 ; AVX2-LABEL: sitofp_load_8i16_to_8f32:
3726 ; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0
3727 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
3730 ; AVX512-LABEL: sitofp_load_8i16_to_8f32:
3732 ; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0
3733 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
3735 %ld = load <8 x i16>, <8 x i16> *%a
3736 %cvt = sitofp <8 x i16> %ld to <8 x float>
3737 ret <8 x float> %cvt
3740 define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) {
3741 ; SSE-LABEL: sitofp_load_8i8_to_8f32:
3743 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
3744 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3745 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
3746 ; SSE-NEXT: psrad $24, %xmm0
3747 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
3748 ; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
3749 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
3750 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
3751 ; SSE-NEXT: psrad $24, %xmm1
3752 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
3755 ; AVX1-LABEL: sitofp_load_8i8_to_8f32:
3757 ; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0
3758 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
3759 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
3760 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
3761 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3762 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
3765 ; AVX2-LABEL: sitofp_load_8i8_to_8f32:
3767 ; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0
3768 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
3771 ; AVX512-LABEL: sitofp_load_8i8_to_8f32:
3773 ; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0
3774 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
3776 %ld = load <8 x i8>, <8 x i8> *%a
3777 %cvt = sitofp <8 x i8> %ld to <8 x float>
3778 ret <8 x float> %cvt
3782 ; Load Unsigned Integer to Float
3785 define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
3786 ; SSE-LABEL: uitofp_load_4i64_to_4f32:
3788 ; SSE-NEXT: movdqa (%rdi), %xmm1
3789 ; SSE-NEXT: movdqa 16(%rdi), %xmm3
3790 ; SSE-NEXT: movd %xmm3, %rax
3791 ; SSE-NEXT: testq %rax, %rax
3792 ; SSE-NEXT: js .LBB76_1
3794 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3795 ; SSE-NEXT: jmp .LBB76_3
3796 ; SSE-NEXT: .LBB76_1:
3797 ; SSE-NEXT: movq %rax, %rcx
3798 ; SSE-NEXT: shrq %rcx
3799 ; SSE-NEXT: andl $1, %eax
3800 ; SSE-NEXT: orq %rcx, %rax
3801 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
3802 ; SSE-NEXT: addss %xmm2, %xmm2
3803 ; SSE-NEXT: .LBB76_3:
3804 ; SSE-NEXT: movd %xmm1, %rax
3805 ; SSE-NEXT: testq %rax, %rax
3806 ; SSE-NEXT: js .LBB76_4
3808 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
3809 ; SSE-NEXT: jmp .LBB76_6
3810 ; SSE-NEXT: .LBB76_4:
3811 ; SSE-NEXT: movq %rax, %rcx
3812 ; SSE-NEXT: shrq %rcx
3813 ; SSE-NEXT: andl $1, %eax
3814 ; SSE-NEXT: orq %rcx, %rax
3815 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
3816 ; SSE-NEXT: addss %xmm0, %xmm0
3817 ; SSE-NEXT: .LBB76_6:
3818 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
3819 ; SSE-NEXT: movd %xmm3, %rax
3820 ; SSE-NEXT: testq %rax, %rax
3821 ; SSE-NEXT: js .LBB76_7
3823 ; SSE-NEXT: xorps %xmm3, %xmm3
3824 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
3825 ; SSE-NEXT: jmp .LBB76_9
3826 ; SSE-NEXT: .LBB76_7:
3827 ; SSE-NEXT: movq %rax, %rcx
3828 ; SSE-NEXT: shrq %rcx
3829 ; SSE-NEXT: andl $1, %eax
3830 ; SSE-NEXT: orq %rcx, %rax
3831 ; SSE-NEXT: xorps %xmm3, %xmm3
3832 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
3833 ; SSE-NEXT: addss %xmm3, %xmm3
3834 ; SSE-NEXT: .LBB76_9:
3835 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
3836 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
3837 ; SSE-NEXT: movd %xmm1, %rax
3838 ; SSE-NEXT: testq %rax, %rax
3839 ; SSE-NEXT: js .LBB76_10
3840 ; SSE-NEXT: # BB#11:
3841 ; SSE-NEXT: xorps %xmm1, %xmm1
3842 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3843 ; SSE-NEXT: jmp .LBB76_12
3844 ; SSE-NEXT: .LBB76_10:
3845 ; SSE-NEXT: movq %rax, %rcx
3846 ; SSE-NEXT: shrq %rcx
3847 ; SSE-NEXT: andl $1, %eax
3848 ; SSE-NEXT: orq %rcx, %rax
3849 ; SSE-NEXT: xorps %xmm1, %xmm1
3850 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
3851 ; SSE-NEXT: addss %xmm1, %xmm1
3852 ; SSE-NEXT: .LBB76_12:
3853 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
3854 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3857 ; AVX1-LABEL: uitofp_load_4i64_to_4f32:
3859 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
3860 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3861 ; AVX1-NEXT: testq %rax, %rax
3862 ; AVX1-NEXT: js .LBB76_1
3863 ; AVX1-NEXT: # BB#2:
3864 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3865 ; AVX1-NEXT: jmp .LBB76_3
3866 ; AVX1-NEXT: .LBB76_1:
3867 ; AVX1-NEXT: movq %rax, %rcx
3868 ; AVX1-NEXT: shrq %rcx
3869 ; AVX1-NEXT: andl $1, %eax
3870 ; AVX1-NEXT: orq %rcx, %rax
3871 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3872 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
3873 ; AVX1-NEXT: .LBB76_3:
3874 ; AVX1-NEXT: vmovq %xmm0, %rax
3875 ; AVX1-NEXT: testq %rax, %rax
3876 ; AVX1-NEXT: js .LBB76_4
3877 ; AVX1-NEXT: # BB#5:
3878 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3879 ; AVX1-NEXT: jmp .LBB76_6
3880 ; AVX1-NEXT: .LBB76_4:
3881 ; AVX1-NEXT: movq %rax, %rcx
3882 ; AVX1-NEXT: shrq %rcx
3883 ; AVX1-NEXT: andl $1, %eax
3884 ; AVX1-NEXT: orq %rcx, %rax
3885 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3886 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
3887 ; AVX1-NEXT: .LBB76_6:
3888 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3889 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3890 ; AVX1-NEXT: vmovq %xmm0, %rax
3891 ; AVX1-NEXT: testq %rax, %rax
3892 ; AVX1-NEXT: js .LBB76_7
3893 ; AVX1-NEXT: # BB#8:
3894 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3895 ; AVX1-NEXT: jmp .LBB76_9
3896 ; AVX1-NEXT: .LBB76_7:
3897 ; AVX1-NEXT: movq %rax, %rcx
3898 ; AVX1-NEXT: shrq %rcx
3899 ; AVX1-NEXT: andl $1, %eax
3900 ; AVX1-NEXT: orq %rcx, %rax
3901 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3902 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
3903 ; AVX1-NEXT: .LBB76_9:
3904 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3905 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
3906 ; AVX1-NEXT: testq %rax, %rax
3907 ; AVX1-NEXT: js .LBB76_10
3908 ; AVX1-NEXT: # BB#11:
3909 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3910 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3911 ; AVX1-NEXT: vzeroupper
3913 ; AVX1-NEXT: .LBB76_10:
3914 ; AVX1-NEXT: movq %rax, %rcx
3915 ; AVX1-NEXT: shrq %rcx
3916 ; AVX1-NEXT: andl $1, %eax
3917 ; AVX1-NEXT: orq %rcx, %rax
3918 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3919 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
3920 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3921 ; AVX1-NEXT: vzeroupper
3924 ; AVX2-LABEL: uitofp_load_4i64_to_4f32:
3926 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
3927 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3928 ; AVX2-NEXT: testq %rax, %rax
3929 ; AVX2-NEXT: js .LBB76_1
3930 ; AVX2-NEXT: # BB#2:
3931 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3932 ; AVX2-NEXT: jmp .LBB76_3
3933 ; AVX2-NEXT: .LBB76_1:
3934 ; AVX2-NEXT: movq %rax, %rcx
3935 ; AVX2-NEXT: shrq %rcx
3936 ; AVX2-NEXT: andl $1, %eax
3937 ; AVX2-NEXT: orq %rcx, %rax
3938 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
3939 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1
3940 ; AVX2-NEXT: .LBB76_3:
3941 ; AVX2-NEXT: vmovq %xmm0, %rax
3942 ; AVX2-NEXT: testq %rax, %rax
3943 ; AVX2-NEXT: js .LBB76_4
3944 ; AVX2-NEXT: # BB#5:
3945 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3946 ; AVX2-NEXT: jmp .LBB76_6
3947 ; AVX2-NEXT: .LBB76_4:
3948 ; AVX2-NEXT: movq %rax, %rcx
3949 ; AVX2-NEXT: shrq %rcx
3950 ; AVX2-NEXT: andl $1, %eax
3951 ; AVX2-NEXT: orq %rcx, %rax
3952 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
3953 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
3954 ; AVX2-NEXT: .LBB76_6:
3955 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3956 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
3957 ; AVX2-NEXT: vmovq %xmm0, %rax
3958 ; AVX2-NEXT: testq %rax, %rax
3959 ; AVX2-NEXT: js .LBB76_7
3960 ; AVX2-NEXT: # BB#8:
3961 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3962 ; AVX2-NEXT: jmp .LBB76_9
3963 ; AVX2-NEXT: .LBB76_7:
3964 ; AVX2-NEXT: movq %rax, %rcx
3965 ; AVX2-NEXT: shrq %rcx
3966 ; AVX2-NEXT: andl $1, %eax
3967 ; AVX2-NEXT: orq %rcx, %rax
3968 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
3969 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
3970 ; AVX2-NEXT: .LBB76_9:
3971 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
3972 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
3973 ; AVX2-NEXT: testq %rax, %rax
3974 ; AVX2-NEXT: js .LBB76_10
3975 ; AVX2-NEXT: # BB#11:
3976 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3977 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3978 ; AVX2-NEXT: vzeroupper
3980 ; AVX2-NEXT: .LBB76_10:
3981 ; AVX2-NEXT: movq %rax, %rcx
3982 ; AVX2-NEXT: shrq %rcx
3983 ; AVX2-NEXT: andl $1, %eax
3984 ; AVX2-NEXT: orq %rcx, %rax
3985 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
3986 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
3987 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
3988 ; AVX2-NEXT: vzeroupper
3991 ; AVX512F-LABEL: uitofp_load_4i64_to_4f32:
3993 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
3994 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
3995 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
3996 ; AVX512F-NEXT: vmovq %xmm0, %rax
3997 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
3998 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
3999 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
4000 ; AVX512F-NEXT: vmovq %xmm0, %rax
4001 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
4002 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
4003 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
4004 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
4005 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
4006 ; AVX512F-NEXT: retq
4008 ; AVX512VL-LABEL: uitofp_load_4i64_to_4f32:
4010 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
4011 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
4012 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
4013 ; AVX512VL-NEXT: vmovq %xmm0, %rax
4014 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
4015 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
4016 ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
4017 ; AVX512VL-NEXT: vmovq %xmm0, %rax
4018 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
4019 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
4020 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
4021 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
4022 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
4023 ; AVX512VL-NEXT: retq
4025 ; AVX512DQ-LABEL: uitofp_load_4i64_to_4f32:
4027 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
4028 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
4029 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
4030 ; AVX512DQ-NEXT: retq
4032 ; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f32:
4033 ; AVX512VLDQ: # BB#0:
4034 ; AVX512VLDQ-NEXT: vcvtuqq2psy (%rdi), %xmm0
4035 ; AVX512VLDQ-NEXT: retq
4036 %ld = load <4 x i64>, <4 x i64> *%a
4037 %cvt = uitofp <4 x i64> %ld to <4 x float>
4038 ret <4 x float> %cvt
4041 define <4 x float> @uitofp_load_4i32_to_4f32(<4 x i32> *%a) {
4042 ; SSE-LABEL: uitofp_load_4i32_to_4f32:
4044 ; SSE-NEXT: movdqa (%rdi), %xmm0
4045 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
4046 ; SSE-NEXT: pand %xmm0, %xmm1
4047 ; SSE-NEXT: por {{.*}}(%rip), %xmm1
4048 ; SSE-NEXT: psrld $16, %xmm0
4049 ; SSE-NEXT: por {{.*}}(%rip), %xmm0
4050 ; SSE-NEXT: addps {{.*}}(%rip), %xmm0
4051 ; SSE-NEXT: addps %xmm1, %xmm0
4054 ; AVX1-LABEL: uitofp_load_4i32_to_4f32:
4056 ; AVX1-NEXT: vmovdqa (%rdi), %xmm0
4057 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4058 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
4059 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
4060 ; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
4061 ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
4064 ; AVX2-LABEL: uitofp_load_4i32_to_4f32:
4066 ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
4067 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
4068 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
4069 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
4070 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
4071 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
4072 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
4073 ; AVX2-NEXT: vaddps %xmm2, %xmm0, %xmm0
4074 ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
4077 ; AVX512F-LABEL: uitofp_load_4i32_to_4f32:
4079 ; AVX512F-NEXT: vmovaps (%rdi), %xmm0
4080 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
4081 ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
4082 ; AVX512F-NEXT: retq
4084 ; AVX512VL-LABEL: uitofp_load_4i32_to_4f32:
4086 ; AVX512VL-NEXT: vcvtudq2ps (%rdi), %xmm0
4087 ; AVX512VL-NEXT: retq
4089 ; AVX512DQ-LABEL: uitofp_load_4i32_to_4f32:
4091 ; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
4092 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
4093 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
4094 ; AVX512DQ-NEXT: retq
4096 ; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f32:
4097 ; AVX512VLDQ: # BB#0:
4098 ; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %xmm0
4099 ; AVX512VLDQ-NEXT: retq
4100 %ld = load <4 x i32>, <4 x i32> *%a
4101 %cvt = uitofp <4 x i32> %ld to <4 x float>
4102 ret <4 x float> %cvt
4105 define <4 x float> @uitofp_load_4i16_to_4f32(<4 x i16> *%a) {
4106 ; SSE-LABEL: uitofp_load_4i16_to_4f32:
4108 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
4109 ; SSE-NEXT: pxor %xmm1, %xmm1
4110 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4111 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4114 ; AVX-LABEL: uitofp_load_4i16_to_4f32:
4116 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4117 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
4119 %ld = load <4 x i16>, <4 x i16> *%a
4120 %cvt = uitofp <4 x i16> %ld to <4 x float>
4121 ret <4 x float> %cvt
4124 define <4 x float> @uitofp_load_4i8_to_4f32(<4 x i8> *%a) {
4125 ; SSE-LABEL: uitofp_load_4i8_to_4f32:
4127 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
4128 ; SSE-NEXT: pxor %xmm1, %xmm1
4129 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4130 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4131 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4134 ; AVX-LABEL: uitofp_load_4i8_to_4f32:
4136 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4137 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
4139 %ld = load <4 x i8>, <4 x i8> *%a
4140 %cvt = uitofp <4 x i8> %ld to <4 x float>
4141 ret <4 x float> %cvt
4144 define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
4145 ; SSE-LABEL: uitofp_load_8i64_to_8f32:
4147 ; SSE-NEXT: movdqa (%rdi), %xmm1
4148 ; SSE-NEXT: movdqa 16(%rdi), %xmm5
4149 ; SSE-NEXT: movdqa 32(%rdi), %xmm2
4150 ; SSE-NEXT: movdqa 48(%rdi), %xmm3
4151 ; SSE-NEXT: movd %xmm5, %rax
4152 ; SSE-NEXT: testq %rax, %rax
4153 ; SSE-NEXT: js .LBB80_1
4155 ; SSE-NEXT: cvtsi2ssq %rax, %xmm4
4156 ; SSE-NEXT: jmp .LBB80_3
4157 ; SSE-NEXT: .LBB80_1:
4158 ; SSE-NEXT: movq %rax, %rcx
4159 ; SSE-NEXT: shrq %rcx
4160 ; SSE-NEXT: andl $1, %eax
4161 ; SSE-NEXT: orq %rcx, %rax
4162 ; SSE-NEXT: cvtsi2ssq %rax, %xmm4
4163 ; SSE-NEXT: addss %xmm4, %xmm4
4164 ; SSE-NEXT: .LBB80_3:
4165 ; SSE-NEXT: movd %xmm1, %rax
4166 ; SSE-NEXT: testq %rax, %rax
4167 ; SSE-NEXT: js .LBB80_4
4169 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
4170 ; SSE-NEXT: jmp .LBB80_6
4171 ; SSE-NEXT: .LBB80_4:
4172 ; SSE-NEXT: movq %rax, %rcx
4173 ; SSE-NEXT: shrq %rcx
4174 ; SSE-NEXT: andl $1, %eax
4175 ; SSE-NEXT: orq %rcx, %rax
4176 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0
4177 ; SSE-NEXT: addss %xmm0, %xmm0
4178 ; SSE-NEXT: .LBB80_6:
4179 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
4180 ; SSE-NEXT: movd %xmm5, %rax
4181 ; SSE-NEXT: testq %rax, %rax
4182 ; SSE-NEXT: js .LBB80_7
4184 ; SSE-NEXT: cvtsi2ssq %rax, %xmm6
4185 ; SSE-NEXT: jmp .LBB80_9
4186 ; SSE-NEXT: .LBB80_7:
4187 ; SSE-NEXT: movq %rax, %rcx
4188 ; SSE-NEXT: shrq %rcx
4189 ; SSE-NEXT: andl $1, %eax
4190 ; SSE-NEXT: orq %rcx, %rax
4191 ; SSE-NEXT: cvtsi2ssq %rax, %xmm6
4192 ; SSE-NEXT: addss %xmm6, %xmm6
4193 ; SSE-NEXT: .LBB80_9:
4194 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
4195 ; SSE-NEXT: movd %xmm1, %rax
4196 ; SSE-NEXT: testq %rax, %rax
4197 ; SSE-NEXT: js .LBB80_10
4198 ; SSE-NEXT: # BB#11:
4199 ; SSE-NEXT: xorps %xmm5, %xmm5
4200 ; SSE-NEXT: cvtsi2ssq %rax, %xmm5
4201 ; SSE-NEXT: jmp .LBB80_12
4202 ; SSE-NEXT: .LBB80_10:
4203 ; SSE-NEXT: movq %rax, %rcx
4204 ; SSE-NEXT: shrq %rcx
4205 ; SSE-NEXT: andl $1, %eax
4206 ; SSE-NEXT: orq %rcx, %rax
4207 ; SSE-NEXT: xorps %xmm5, %xmm5
4208 ; SSE-NEXT: cvtsi2ssq %rax, %xmm5
4209 ; SSE-NEXT: addss %xmm5, %xmm5
4210 ; SSE-NEXT: .LBB80_12:
4211 ; SSE-NEXT: movd %xmm3, %rax
4212 ; SSE-NEXT: testq %rax, %rax
4213 ; SSE-NEXT: js .LBB80_13
4214 ; SSE-NEXT: # BB#14:
4215 ; SSE-NEXT: cvtsi2ssq %rax, %xmm7
4216 ; SSE-NEXT: jmp .LBB80_15
4217 ; SSE-NEXT: .LBB80_13:
4218 ; SSE-NEXT: movq %rax, %rcx
4219 ; SSE-NEXT: shrq %rcx
4220 ; SSE-NEXT: andl $1, %eax
4221 ; SSE-NEXT: orq %rcx, %rax
4222 ; SSE-NEXT: cvtsi2ssq %rax, %xmm7
4223 ; SSE-NEXT: addss %xmm7, %xmm7
4224 ; SSE-NEXT: .LBB80_15:
4225 ; SSE-NEXT: movd %xmm2, %rax
4226 ; SSE-NEXT: testq %rax, %rax
4227 ; SSE-NEXT: js .LBB80_16
4228 ; SSE-NEXT: # BB#17:
4229 ; SSE-NEXT: xorps %xmm1, %xmm1
4230 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
4231 ; SSE-NEXT: jmp .LBB80_18
4232 ; SSE-NEXT: .LBB80_16:
4233 ; SSE-NEXT: movq %rax, %rcx
4234 ; SSE-NEXT: shrq %rcx
4235 ; SSE-NEXT: andl $1, %eax
4236 ; SSE-NEXT: orq %rcx, %rax
4237 ; SSE-NEXT: xorps %xmm1, %xmm1
4238 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1
4239 ; SSE-NEXT: addss %xmm1, %xmm1
4240 ; SSE-NEXT: .LBB80_18:
4241 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
4242 ; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
4243 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
4244 ; SSE-NEXT: movd %xmm3, %rax
4245 ; SSE-NEXT: testq %rax, %rax
4246 ; SSE-NEXT: js .LBB80_19
4247 ; SSE-NEXT: # BB#20:
4248 ; SSE-NEXT: xorps %xmm3, %xmm3
4249 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
4250 ; SSE-NEXT: jmp .LBB80_21
4251 ; SSE-NEXT: .LBB80_19:
4252 ; SSE-NEXT: movq %rax, %rcx
4253 ; SSE-NEXT: shrq %rcx
4254 ; SSE-NEXT: andl $1, %eax
4255 ; SSE-NEXT: orq %rcx, %rax
4256 ; SSE-NEXT: xorps %xmm3, %xmm3
4257 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3
4258 ; SSE-NEXT: addss %xmm3, %xmm3
4259 ; SSE-NEXT: .LBB80_21:
4260 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
4261 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
4262 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
4263 ; SSE-NEXT: movd %xmm2, %rax
4264 ; SSE-NEXT: testq %rax, %rax
4265 ; SSE-NEXT: js .LBB80_22
4266 ; SSE-NEXT: # BB#23:
4267 ; SSE-NEXT: xorps %xmm2, %xmm2
4268 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
4269 ; SSE-NEXT: jmp .LBB80_24
4270 ; SSE-NEXT: .LBB80_22:
4271 ; SSE-NEXT: movq %rax, %rcx
4272 ; SSE-NEXT: shrq %rcx
4273 ; SSE-NEXT: andl $1, %eax
4274 ; SSE-NEXT: orq %rcx, %rax
4275 ; SSE-NEXT: xorps %xmm2, %xmm2
4276 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2
4277 ; SSE-NEXT: addss %xmm2, %xmm2
4278 ; SSE-NEXT: .LBB80_24:
4279 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
4280 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
4283 ; AVX1-LABEL: uitofp_load_8i64_to_8f32:
4285 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
4286 ; AVX1-NEXT: vmovdqa 32(%rdi), %ymm2
4287 ; AVX1-NEXT: vpextrq $1, %xmm2, %rax
4288 ; AVX1-NEXT: testq %rax, %rax
4289 ; AVX1-NEXT: js .LBB80_1
4290 ; AVX1-NEXT: # BB#2:
4291 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
4292 ; AVX1-NEXT: jmp .LBB80_3
4293 ; AVX1-NEXT: .LBB80_1:
4294 ; AVX1-NEXT: movq %rax, %rcx
4295 ; AVX1-NEXT: shrq %rcx
4296 ; AVX1-NEXT: andl $1, %eax
4297 ; AVX1-NEXT: orq %rcx, %rax
4298 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
4299 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
4300 ; AVX1-NEXT: .LBB80_3:
4301 ; AVX1-NEXT: vmovq %xmm2, %rax
4302 ; AVX1-NEXT: testq %rax, %rax
4303 ; AVX1-NEXT: js .LBB80_4
4304 ; AVX1-NEXT: # BB#5:
4305 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
4306 ; AVX1-NEXT: jmp .LBB80_6
4307 ; AVX1-NEXT: .LBB80_4:
4308 ; AVX1-NEXT: movq %rax, %rcx
4309 ; AVX1-NEXT: shrq %rcx
4310 ; AVX1-NEXT: andl $1, %eax
4311 ; AVX1-NEXT: orq %rcx, %rax
4312 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
4313 ; AVX1-NEXT: vaddss %xmm3, %xmm3, %xmm3
4314 ; AVX1-NEXT: .LBB80_6:
4315 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
4316 ; AVX1-NEXT: vmovq %xmm2, %rax
4317 ; AVX1-NEXT: testq %rax, %rax
4318 ; AVX1-NEXT: js .LBB80_7
4319 ; AVX1-NEXT: # BB#8:
4320 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm4
4321 ; AVX1-NEXT: jmp .LBB80_9
4322 ; AVX1-NEXT: .LBB80_7:
4323 ; AVX1-NEXT: movq %rax, %rcx
4324 ; AVX1-NEXT: shrq %rcx
4325 ; AVX1-NEXT: andl $1, %eax
4326 ; AVX1-NEXT: orq %rcx, %rax
4327 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm4
4328 ; AVX1-NEXT: vaddss %xmm4, %xmm4, %xmm4
4329 ; AVX1-NEXT: .LBB80_9:
4330 ; AVX1-NEXT: vpextrq $1, %xmm2, %rax
4331 ; AVX1-NEXT: testq %rax, %rax
4332 ; AVX1-NEXT: js .LBB80_10
4333 ; AVX1-NEXT: # BB#11:
4334 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2
4335 ; AVX1-NEXT: jmp .LBB80_12
4336 ; AVX1-NEXT: .LBB80_10:
4337 ; AVX1-NEXT: movq %rax, %rcx
4338 ; AVX1-NEXT: shrq %rcx
4339 ; AVX1-NEXT: andl $1, %eax
4340 ; AVX1-NEXT: orq %rcx, %rax
4341 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2
4342 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
4343 ; AVX1-NEXT: .LBB80_12:
4344 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax
4345 ; AVX1-NEXT: testq %rax, %rax
4346 ; AVX1-NEXT: js .LBB80_13
4347 ; AVX1-NEXT: # BB#14:
4348 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5
4349 ; AVX1-NEXT: jmp .LBB80_15
4350 ; AVX1-NEXT: .LBB80_13:
4351 ; AVX1-NEXT: movq %rax, %rcx
4352 ; AVX1-NEXT: shrq %rcx
4353 ; AVX1-NEXT: andl $1, %eax
4354 ; AVX1-NEXT: orq %rcx, %rax
4355 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5
4356 ; AVX1-NEXT: vaddss %xmm5, %xmm5, %xmm5
4357 ; AVX1-NEXT: .LBB80_15:
4358 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3]
4359 ; AVX1-NEXT: vmovq %xmm0, %rax
4360 ; AVX1-NEXT: testq %rax, %rax
4361 ; AVX1-NEXT: js .LBB80_16
4362 ; AVX1-NEXT: # BB#17:
4363 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm3
4364 ; AVX1-NEXT: jmp .LBB80_18
4365 ; AVX1-NEXT: .LBB80_16:
4366 ; AVX1-NEXT: movq %rax, %rcx
4367 ; AVX1-NEXT: shrq %rcx
4368 ; AVX1-NEXT: andl $1, %eax
4369 ; AVX1-NEXT: orq %rcx, %rax
4370 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm3
4371 ; AVX1-NEXT: vaddss %xmm3, %xmm3, %xmm3
4372 ; AVX1-NEXT: .LBB80_18:
4373 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm4[0],xmm1[3]
4374 ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[2,3]
4375 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
4376 ; AVX1-NEXT: vmovq %xmm4, %rax
4377 ; AVX1-NEXT: testq %rax, %rax
4378 ; AVX1-NEXT: js .LBB80_19
4379 ; AVX1-NEXT: # BB#20:
4380 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm5
4381 ; AVX1-NEXT: jmp .LBB80_21
4382 ; AVX1-NEXT: .LBB80_19:
4383 ; AVX1-NEXT: movq %rax, %rcx
4384 ; AVX1-NEXT: shrq %rcx
4385 ; AVX1-NEXT: andl $1, %eax
4386 ; AVX1-NEXT: orq %rcx, %rax
4387 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0
4388 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm5
4389 ; AVX1-NEXT: .LBB80_21:
4390 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm2[0]
4391 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1],xmm5[0],xmm3[3]
4392 ; AVX1-NEXT: vpextrq $1, %xmm4, %rax
4393 ; AVX1-NEXT: testq %rax, %rax
4394 ; AVX1-NEXT: js .LBB80_22
4395 ; AVX1-NEXT: # BB#23:
4396 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm2
4397 ; AVX1-NEXT: jmp .LBB80_24
4398 ; AVX1-NEXT: .LBB80_22:
4399 ; AVX1-NEXT: movq %rax, %rcx
4400 ; AVX1-NEXT: shrq %rcx
4401 ; AVX1-NEXT: andl $1, %eax
4402 ; AVX1-NEXT: orq %rcx, %rax
4403 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm2
4404 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
4405 ; AVX1-NEXT: .LBB80_24:
4406 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4407 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4410 ; AVX2-LABEL: uitofp_load_8i64_to_8f32:
4412 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
4413 ; AVX2-NEXT: vmovdqa 32(%rdi), %ymm2
4414 ; AVX2-NEXT: vpextrq $1, %xmm2, %rax
4415 ; AVX2-NEXT: testq %rax, %rax
4416 ; AVX2-NEXT: js .LBB80_1
4417 ; AVX2-NEXT: # BB#2:
4418 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
4419 ; AVX2-NEXT: jmp .LBB80_3
4420 ; AVX2-NEXT: .LBB80_1:
4421 ; AVX2-NEXT: movq %rax, %rcx
4422 ; AVX2-NEXT: shrq %rcx
4423 ; AVX2-NEXT: andl $1, %eax
4424 ; AVX2-NEXT: orq %rcx, %rax
4425 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
4426 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1
4427 ; AVX2-NEXT: .LBB80_3:
4428 ; AVX2-NEXT: vmovq %xmm2, %rax
4429 ; AVX2-NEXT: testq %rax, %rax
4430 ; AVX2-NEXT: js .LBB80_4
4431 ; AVX2-NEXT: # BB#5:
4432 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
4433 ; AVX2-NEXT: jmp .LBB80_6
4434 ; AVX2-NEXT: .LBB80_4:
4435 ; AVX2-NEXT: movq %rax, %rcx
4436 ; AVX2-NEXT: shrq %rcx
4437 ; AVX2-NEXT: andl $1, %eax
4438 ; AVX2-NEXT: orq %rcx, %rax
4439 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3
4440 ; AVX2-NEXT: vaddss %xmm3, %xmm3, %xmm3
4441 ; AVX2-NEXT: .LBB80_6:
4442 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
4443 ; AVX2-NEXT: vmovq %xmm2, %rax
4444 ; AVX2-NEXT: testq %rax, %rax
4445 ; AVX2-NEXT: js .LBB80_7
4446 ; AVX2-NEXT: # BB#8:
4447 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm4
4448 ; AVX2-NEXT: jmp .LBB80_9
4449 ; AVX2-NEXT: .LBB80_7:
4450 ; AVX2-NEXT: movq %rax, %rcx
4451 ; AVX2-NEXT: shrq %rcx
4452 ; AVX2-NEXT: andl $1, %eax
4453 ; AVX2-NEXT: orq %rcx, %rax
4454 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm4
4455 ; AVX2-NEXT: vaddss %xmm4, %xmm4, %xmm4
4456 ; AVX2-NEXT: .LBB80_9:
4457 ; AVX2-NEXT: vpextrq $1, %xmm2, %rax
4458 ; AVX2-NEXT: testq %rax, %rax
4459 ; AVX2-NEXT: js .LBB80_10
4460 ; AVX2-NEXT: # BB#11:
4461 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2
4462 ; AVX2-NEXT: jmp .LBB80_12
4463 ; AVX2-NEXT: .LBB80_10:
4464 ; AVX2-NEXT: movq %rax, %rcx
4465 ; AVX2-NEXT: shrq %rcx
4466 ; AVX2-NEXT: andl $1, %eax
4467 ; AVX2-NEXT: orq %rcx, %rax
4468 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2
4469 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
4470 ; AVX2-NEXT: .LBB80_12:
4471 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax
4472 ; AVX2-NEXT: testq %rax, %rax
4473 ; AVX2-NEXT: js .LBB80_13
4474 ; AVX2-NEXT: # BB#14:
4475 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5
4476 ; AVX2-NEXT: jmp .LBB80_15
4477 ; AVX2-NEXT: .LBB80_13:
4478 ; AVX2-NEXT: movq %rax, %rcx
4479 ; AVX2-NEXT: shrq %rcx
4480 ; AVX2-NEXT: andl $1, %eax
4481 ; AVX2-NEXT: orq %rcx, %rax
4482 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5
4483 ; AVX2-NEXT: vaddss %xmm5, %xmm5, %xmm5
4484 ; AVX2-NEXT: .LBB80_15:
4485 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3]
4486 ; AVX2-NEXT: vmovq %xmm0, %rax
4487 ; AVX2-NEXT: testq %rax, %rax
4488 ; AVX2-NEXT: js .LBB80_16
4489 ; AVX2-NEXT: # BB#17:
4490 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm3
4491 ; AVX2-NEXT: jmp .LBB80_18
4492 ; AVX2-NEXT: .LBB80_16:
4493 ; AVX2-NEXT: movq %rax, %rcx
4494 ; AVX2-NEXT: shrq %rcx
4495 ; AVX2-NEXT: andl $1, %eax
4496 ; AVX2-NEXT: orq %rcx, %rax
4497 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm3
4498 ; AVX2-NEXT: vaddss %xmm3, %xmm3, %xmm3
4499 ; AVX2-NEXT: .LBB80_18:
4500 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm4[0],xmm1[3]
4501 ; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[2,3]
4502 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
4503 ; AVX2-NEXT: vmovq %xmm4, %rax
4504 ; AVX2-NEXT: testq %rax, %rax
4505 ; AVX2-NEXT: js .LBB80_19
4506 ; AVX2-NEXT: # BB#20:
4507 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm5
4508 ; AVX2-NEXT: jmp .LBB80_21
4509 ; AVX2-NEXT: .LBB80_19:
4510 ; AVX2-NEXT: movq %rax, %rcx
4511 ; AVX2-NEXT: shrq %rcx
4512 ; AVX2-NEXT: andl $1, %eax
4513 ; AVX2-NEXT: orq %rcx, %rax
4514 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0
4515 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm5
4516 ; AVX2-NEXT: .LBB80_21:
4517 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm2[0]
4518 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1],xmm5[0],xmm3[3]
4519 ; AVX2-NEXT: vpextrq $1, %xmm4, %rax
4520 ; AVX2-NEXT: testq %rax, %rax
4521 ; AVX2-NEXT: js .LBB80_22
4522 ; AVX2-NEXT: # BB#23:
4523 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm2
4524 ; AVX2-NEXT: jmp .LBB80_24
4525 ; AVX2-NEXT: .LBB80_22:
4526 ; AVX2-NEXT: movq %rax, %rcx
4527 ; AVX2-NEXT: shrq %rcx
4528 ; AVX2-NEXT: andl $1, %eax
4529 ; AVX2-NEXT: orq %rcx, %rax
4530 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm2
4531 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
4532 ; AVX2-NEXT: .LBB80_24:
4533 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4534 ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4537 ; AVX512F-LABEL: uitofp_load_8i64_to_8f32:
4539 ; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm0
4540 ; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm1
4541 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
4542 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
4543 ; AVX512F-NEXT: vmovq %xmm1, %rax
4544 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm1
4545 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
4546 ; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm2
4547 ; AVX512F-NEXT: vmovq %xmm2, %rax
4548 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm3
4549 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
4550 ; AVX512F-NEXT: vpextrq $1, %xmm2, %rax
4551 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2
4552 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4553 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
4554 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2
4555 ; AVX512F-NEXT: vmovq %xmm0, %rax
4556 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3
4557 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
4558 ; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0
4559 ; AVX512F-NEXT: vmovq %xmm0, %rax
4560 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3
4561 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
4562 ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
4563 ; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm0
4564 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
4565 ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4566 ; AVX512F-NEXT: retq
4568 ; AVX512VL-LABEL: uitofp_load_8i64_to_8f32:
4570 ; AVX512VL-NEXT: vmovdqa64 (%rdi), %zmm0
4571 ; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm1
4572 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
4573 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
4574 ; AVX512VL-NEXT: vmovq %xmm1, %rax
4575 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm1
4576 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
4577 ; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm2
4578 ; AVX512VL-NEXT: vmovq %xmm2, %rax
4579 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm3
4580 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
4581 ; AVX512VL-NEXT: vpextrq $1, %xmm2, %rax
4582 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2
4583 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
4584 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
4585 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2
4586 ; AVX512VL-NEXT: vmovq %xmm0, %rax
4587 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3
4588 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
4589 ; AVX512VL-NEXT: vextracti32x4 $1, %zmm0, %xmm0
4590 ; AVX512VL-NEXT: vmovq %xmm0, %rax
4591 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3
4592 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
4593 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
4594 ; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm0
4595 ; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
4596 ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4597 ; AVX512VL-NEXT: retq
4599 ; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32:
4601 ; AVX512DQ-NEXT: vcvtuqq2ps (%rdi), %ymm0
4602 ; AVX512DQ-NEXT: retq
4604 ; AVX512VLDQ-LABEL: uitofp_load_8i64_to_8f32:
4605 ; AVX512VLDQ: # BB#0:
4606 ; AVX512VLDQ-NEXT: vcvtuqq2ps (%rdi), %ymm0
4607 ; AVX512VLDQ-NEXT: retq
4608 %ld = load <8 x i64>, <8 x i64> *%a
4609 %cvt = uitofp <8 x i64> %ld to <8 x float>
4610 ret <8 x float> %cvt
4613 define <8 x float> @uitofp_load_8i32_to_8f32(<8 x i32> *%a) {
4614 ; SSE-LABEL: uitofp_load_8i32_to_8f32:
4616 ; SSE-NEXT: movdqa (%rdi), %xmm0
4617 ; SSE-NEXT: movdqa 16(%rdi), %xmm1
4618 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
4619 ; SSE-NEXT: movdqa %xmm0, %xmm3
4620 ; SSE-NEXT: pand %xmm2, %xmm3
4621 ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
4622 ; SSE-NEXT: por %xmm4, %xmm3
4623 ; SSE-NEXT: psrld $16, %xmm0
4624 ; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
4625 ; SSE-NEXT: por %xmm5, %xmm0
4626 ; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
4627 ; SSE-NEXT: addps %xmm6, %xmm0
4628 ; SSE-NEXT: addps %xmm3, %xmm0
4629 ; SSE-NEXT: pand %xmm1, %xmm2
4630 ; SSE-NEXT: por %xmm4, %xmm2
4631 ; SSE-NEXT: psrld $16, %xmm1
4632 ; SSE-NEXT: por %xmm5, %xmm1
4633 ; SSE-NEXT: addps %xmm6, %xmm1
4634 ; SSE-NEXT: addps %xmm2, %xmm1
4637 ; AVX1-LABEL: uitofp_load_8i32_to_8f32:
4639 ; AVX1-NEXT: vmovdqa (%rdi), %ymm0
4640 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
4641 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4642 ; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
4643 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
4644 ; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
4645 ; AVX1-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
4646 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
4647 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4648 ; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
4651 ; AVX2-LABEL: uitofp_load_8i32_to_8f32:
4653 ; AVX2-NEXT: vmovdqa (%rdi), %ymm0
4654 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
4655 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
4656 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
4657 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
4658 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
4659 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
4660 ; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
4661 ; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
4664 ; AVX512F-LABEL: uitofp_load_8i32_to_8f32:
4666 ; AVX512F-NEXT: vmovaps (%rdi), %ymm0
4667 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
4668 ; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
4669 ; AVX512F-NEXT: retq
4671 ; AVX512VL-LABEL: uitofp_load_8i32_to_8f32:
4673 ; AVX512VL-NEXT: vcvtudq2ps (%rdi), %ymm0
4674 ; AVX512VL-NEXT: retq
4676 ; AVX512DQ-LABEL: uitofp_load_8i32_to_8f32:
4678 ; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0
4679 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
4680 ; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
4681 ; AVX512DQ-NEXT: retq
4683 ; AVX512VLDQ-LABEL: uitofp_load_8i32_to_8f32:
4684 ; AVX512VLDQ: # BB#0:
4685 ; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %ymm0
4686 ; AVX512VLDQ-NEXT: retq
4687 %ld = load <8 x i32>, <8 x i32> *%a
4688 %cvt = uitofp <8 x i32> %ld to <8 x float>
4689 ret <8 x float> %cvt
4692 define <8 x float> @uitofp_load_8i16_to_8f32(<8 x i16> *%a) {
4693 ; SSE-LABEL: uitofp_load_8i16_to_8f32:
4695 ; SSE-NEXT: movdqa (%rdi), %xmm1
4696 ; SSE-NEXT: pxor %xmm2, %xmm2
4697 ; SSE-NEXT: movdqa %xmm1, %xmm0
4698 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4699 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4700 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4701 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
4704 ; AVX1-LABEL: uitofp_load_8i16_to_8f32:
4706 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4707 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
4708 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4709 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4712 ; AVX2-LABEL: uitofp_load_8i16_to_8f32:
4714 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
4715 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4718 ; AVX512-LABEL: uitofp_load_8i16_to_8f32:
4720 ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
4721 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4723 %ld = load <8 x i16>, <8 x i16> *%a
4724 %cvt = uitofp <8 x i16> %ld to <8 x float>
4725 ret <8 x float> %cvt
4728 define <8 x float> @uitofp_load_8i8_to_8f32(<8 x i8> *%a) {
4729 ; SSE-LABEL: uitofp_load_8i8_to_8f32:
4731 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
4732 ; SSE-NEXT: pxor %xmm2, %xmm2
4733 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4734 ; SSE-NEXT: movdqa %xmm1, %xmm0
4735 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
4736 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4737 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4738 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
4741 ; AVX1-LABEL: uitofp_load_8i8_to_8f32:
4743 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4744 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
4745 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4746 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4749 ; AVX2-LABEL: uitofp_load_8i8_to_8f32:
4751 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
4752 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4755 ; AVX512-LABEL: uitofp_load_8i8_to_8f32:
4757 ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
4758 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4760 %ld = load <8 x i8>, <8 x i8> *%a
4761 %cvt = uitofp <8 x i8> %ld to <8 x float>
4762 ret <8 x float> %cvt
4769 %Arguments = type <{ <8 x i8>, <8 x i16>, <8 x float>* }>
4770 define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) {
4771 ; SSE-LABEL: aggregate_sitofp_8i16_to_8f32:
4773 ; SSE-NEXT: movq 24(%rdi), %rax
4774 ; SSE-NEXT: movdqu 8(%rdi), %xmm0
4775 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
4776 ; SSE-NEXT: psrad $16, %xmm1
4777 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
4778 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
4779 ; SSE-NEXT: psrad $16, %xmm0
4780 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
4781 ; SSE-NEXT: movaps %xmm0, 16(%rax)
4782 ; SSE-NEXT: movaps %xmm1, (%rax)
4785 ; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32:
4787 ; AVX1-NEXT: movq 24(%rdi), %rax
4788 ; AVX1-NEXT: vmovdqu 8(%rdi), %xmm0
4789 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
4790 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
4791 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
4792 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4793 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
4794 ; AVX1-NEXT: vmovaps %ymm0, (%rax)
4795 ; AVX1-NEXT: vzeroupper
4798 ; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32:
4800 ; AVX2-NEXT: movq 24(%rdi), %rax
4801 ; AVX2-NEXT: vpmovsxwd 8(%rdi), %ymm0
4802 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
4803 ; AVX2-NEXT: vmovaps %ymm0, (%rax)
4804 ; AVX2-NEXT: vzeroupper
4807 ; AVX512-LABEL: aggregate_sitofp_8i16_to_8f32:
4809 ; AVX512-NEXT: movq 24(%rdi), %rax
4810 ; AVX512-NEXT: vpmovsxwd 8(%rdi), %ymm0
4811 ; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
4812 ; AVX512-NEXT: vmovaps %ymm0, (%rax)
4814 %1 = load %Arguments, %Arguments* %a0, align 1
4815 %2 = extractvalue %Arguments %1, 1
4816 %3 = extractvalue %Arguments %1, 2
4817 %4 = sitofp <8 x i16> %2 to <8 x float>
4818 store <8 x float> %4, <8 x float>* %3, align 32
4822 define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind {
4823 ; SSE-LABEL: sitofp_i32_to_2f64:
4825 ; SSE-NEXT: cvtsi2sdl %edi, %xmm0
4828 ; AVX-LABEL: sitofp_i32_to_2f64:
4830 ; AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0
4832 %cvt = sitofp i32 %a1 to double
4833 %res = insertelement <2 x double> %a0, double %cvt, i32 0
4834 ret <2 x double> %res
4837 define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind {
4838 ; SSE-LABEL: sitofp_i32_to_4f32:
4840 ; SSE-NEXT: cvtsi2ssl %edi, %xmm0
4843 ; AVX-LABEL: sitofp_i32_to_4f32:
4845 ; AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
4847 %cvt = sitofp i32 %a1 to float
4848 %res = insertelement <4 x float> %a0, float %cvt, i32 0
4849 ret <4 x float> %res
4852 define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind {
4853 ; SSE-LABEL: sitofp_i64_to_2f64:
4855 ; SSE-NEXT: cvtsi2sdq %rdi, %xmm0
4858 ; AVX-LABEL: sitofp_i64_to_2f64:
4860 ; AVX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
4862 %cvt = sitofp i64 %a1 to double
4863 %res = insertelement <2 x double> %a0, double %cvt, i32 0
4864 ret <2 x double> %res
4867 define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind {
4868 ; SSE-LABEL: sitofp_i64_to_4f32:
4870 ; SSE-NEXT: cvtsi2ssq %rdi, %xmm0
4873 ; AVX-LABEL: sitofp_i64_to_4f32:
4875 ; AVX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
4877 %cvt = sitofp i64 %a1 to float
4878 %res = insertelement <4 x float> %a0, float %cvt, i32 0
4879 ret <4 x float> %res