1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQ --check-prefix=AVX512DQ
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
11 define <16 x float> @sitof32(<16 x i32> %a) nounwind {
14 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
16 %b = sitofp <16 x i32> %a to <16 x float>
20 define <8 x double> @sltof864(<8 x i64> %a) {
21 ; NODQ-LABEL: sltof864:
23 ; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1
24 ; NODQ-NEXT: vpextrq $1, %xmm1, %rax
25 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
26 ; NODQ-NEXT: vmovq %xmm1, %rax
27 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
28 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
29 ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
30 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
31 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
32 ; NODQ-NEXT: vmovq %xmm2, %rax
33 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
34 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
35 ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
36 ; NODQ-NEXT: vextracti32x4 $1, %zmm0, %xmm2
37 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
38 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
39 ; NODQ-NEXT: vmovq %xmm2, %rax
40 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
41 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
42 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
43 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
44 ; NODQ-NEXT: vmovq %xmm0, %rax
45 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
46 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
47 ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
48 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
53 ; DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
55 %b = sitofp <8 x i64> %a to <8 x double>
59 define <4 x double> @sltof464(<4 x i64> %a) {
60 ; NODQ-LABEL: sltof464:
62 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm1
63 ; NODQ-NEXT: vpextrq $1, %xmm1, %rax
64 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
65 ; NODQ-NEXT: vmovq %xmm1, %rax
66 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
67 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
68 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
69 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
70 ; NODQ-NEXT: vmovq %xmm0, %rax
71 ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
72 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
73 ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
76 ; VLDQ-LABEL: sltof464:
78 ; VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0
81 ; AVX512DQ-LABEL: sltof464:
83 ; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
84 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
85 ; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
87 %b = sitofp <4 x i64> %a to <4 x double>
91 define <2 x float> @sltof2f32(<2 x i64> %a) {
92 ; NODQ-LABEL: sltof2f32:
94 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
95 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
96 ; NODQ-NEXT: vmovq %xmm0, %rax
97 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
98 ; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
99 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
100 ; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
103 ; VLDQ-LABEL: sltof2f32:
105 ; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
108 ; AVX512DQ-LABEL: sltof2f32:
110 ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
111 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
112 ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
113 ; AVX512DQ-NEXT: retq
114 %b = sitofp <2 x i64> %a to <2 x float>
118 define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
119 ; NODQ-LABEL: sltof4f32_mem:
121 ; NODQ-NEXT: vmovdqu (%rdi), %ymm0
122 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
123 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
124 ; NODQ-NEXT: vmovq %xmm0, %rax
125 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
126 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
127 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
128 ; NODQ-NEXT: vmovq %xmm0, %rax
129 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
130 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
131 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
132 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
133 ; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
136 ; VLDQ-LABEL: sltof4f32_mem:
138 ; VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0
141 ; AVX512DQ-LABEL: sltof4f32_mem:
143 ; AVX512DQ-NEXT: vmovups (%rdi), %ymm0
144 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
145 ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
146 ; AVX512DQ-NEXT: retq
147 %a1 = load <4 x i64>, <4 x i64>* %a, align 8
148 %b = sitofp <4 x i64> %a1 to <4 x float>
152 define <4 x i64> @f64tosl(<4 x double> %a) {
153 ; NODQ-LABEL: f64tosl:
155 ; NODQ-NEXT: vextractf128 $1, %ymm0, %xmm1
156 ; NODQ-NEXT: vcvttsd2si %xmm1, %rax
157 ; NODQ-NEXT: vmovq %rax, %xmm2
158 ; NODQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
159 ; NODQ-NEXT: vcvttsd2si %xmm1, %rax
160 ; NODQ-NEXT: vmovq %rax, %xmm1
161 ; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
162 ; NODQ-NEXT: vcvttsd2si %xmm0, %rax
163 ; NODQ-NEXT: vmovq %rax, %xmm2
164 ; NODQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
165 ; NODQ-NEXT: vcvttsd2si %xmm0, %rax
166 ; NODQ-NEXT: vmovq %rax, %xmm0
167 ; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
168 ; NODQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
171 ; VLDQ-LABEL: f64tosl:
173 ; VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0
176 ; AVX512DQ-LABEL: f64tosl:
178 ; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
179 ; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
180 ; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
181 ; AVX512DQ-NEXT: retq
182 %b = fptosi <4 x double> %a to <4 x i64>
186 define <4 x i64> @f32tosl(<4 x float> %a) {
187 ; NODQ-LABEL: f32tosl:
189 ; NODQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
190 ; NODQ-NEXT: vcvttss2si %xmm1, %rax
191 ; NODQ-NEXT: vmovq %rax, %xmm1
192 ; NODQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
193 ; NODQ-NEXT: vcvttss2si %xmm2, %rax
194 ; NODQ-NEXT: vmovq %rax, %xmm2
195 ; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
196 ; NODQ-NEXT: vcvttss2si %xmm0, %rax
197 ; NODQ-NEXT: vmovq %rax, %xmm2
198 ; NODQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
199 ; NODQ-NEXT: vcvttss2si %xmm0, %rax
200 ; NODQ-NEXT: vmovq %rax, %xmm0
201 ; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
202 ; NODQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
205 ; VLDQ-LABEL: f32tosl:
207 ; VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
210 ; AVX512DQ-LABEL: f32tosl:
212 ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
213 ; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
214 ; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
215 ; AVX512DQ-NEXT: retq
216 %b = fptosi <4 x float> %a to <4 x i64>
220 define <4 x float> @sltof432(<4 x i64> %a) {
221 ; NODQ-LABEL: sltof432:
223 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
224 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
225 ; NODQ-NEXT: vmovq %xmm0, %rax
226 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
227 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
228 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
229 ; NODQ-NEXT: vmovq %xmm0, %rax
230 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
231 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
232 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
233 ; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
234 ; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
237 ; VLDQ-LABEL: sltof432:
239 ; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
242 ; AVX512DQ-LABEL: sltof432:
244 ; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
245 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
246 ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
247 ; AVX512DQ-NEXT: retq
248 %b = sitofp <4 x i64> %a to <4 x float>
252 define <4 x float> @ultof432(<4 x i64> %a) {
253 ; NODQ-LABEL: ultof432:
255 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
256 ; NODQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
257 ; NODQ-NEXT: vmovq %xmm0, %rax
258 ; NODQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2
259 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
260 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
261 ; NODQ-NEXT: vmovq %xmm0, %rax
262 ; NODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2
263 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
264 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
265 ; NODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0
266 ; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
269 ; VLDQ-LABEL: ultof432:
271 ; VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
274 ; AVX512DQ-LABEL: ultof432:
276 ; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
277 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
278 ; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
279 ; AVX512DQ-NEXT: retq
280 %b = uitofp <4 x i64> %a to <4 x float>
284 define <8 x double> @ultof64(<8 x i64> %a) {
285 ; NODQ-LABEL: ultof64:
287 ; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1
288 ; NODQ-NEXT: vpextrq $1, %xmm1, %rax
289 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
290 ; NODQ-NEXT: vmovq %xmm1, %rax
291 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1
292 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
293 ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
294 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
295 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3
296 ; NODQ-NEXT: vmovq %xmm2, %rax
297 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
298 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
299 ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
300 ; NODQ-NEXT: vextracti32x4 $1, %zmm0, %xmm2
301 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax
302 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3
303 ; NODQ-NEXT: vmovq %xmm2, %rax
304 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2
305 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
306 ; NODQ-NEXT: vpextrq $1, %xmm0, %rax
307 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3
308 ; NODQ-NEXT: vmovq %xmm0, %rax
309 ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm0
310 ; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
311 ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
312 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
317 ; DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
319 %b = uitofp <8 x i64> %a to <8 x double>
323 define <16 x i32> @fptosi00(<16 x float> %a) nounwind {
324 ; ALL-LABEL: fptosi00:
326 ; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
328 %b = fptosi <16 x float> %a to <16 x i32>
332 define <16 x i32> @fptoui00(<16 x float> %a) nounwind {
333 ; ALL-LABEL: fptoui00:
335 ; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
337 %b = fptoui <16 x float> %a to <16 x i32>
341 define <8 x i32> @fptoui_256(<8 x float> %a) nounwind {
342 ; NOVL-LABEL: fptoui_256:
344 ; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
345 ; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0
346 ; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
349 ; VL-LABEL: fptoui_256:
351 ; VL-NEXT: vcvttps2udq %ymm0, %ymm0
353 %b = fptoui <8 x float> %a to <8 x i32>
357 define <4 x i32> @fptoui_128(<4 x float> %a) nounwind {
358 ; NOVL-LABEL: fptoui_128:
360 ; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
361 ; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0
362 ; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
365 ; VL-LABEL: fptoui_128:
367 ; VL-NEXT: vcvttps2udq %xmm0, %xmm0
369 %b = fptoui <4 x float> %a to <4 x i32>
373 define <8 x i32> @fptoui01(<8 x double> %a) nounwind {
374 ; ALL-LABEL: fptoui01:
376 ; ALL-NEXT: vcvttpd2udq %zmm0, %ymm0
378 %b = fptoui <8 x double> %a to <8 x i32>
382 define <4 x i32> @fptoui_256d(<4 x double> %a) nounwind {
383 ; NOVL-LABEL: fptoui_256d:
385 ; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
386 ; NOVL-NEXT: vcvttpd2udq %zmm0, %ymm0
387 ; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
390 ; VL-LABEL: fptoui_256d:
392 ; VL-NEXT: vcvttpd2udq %ymm0, %xmm0
394 %b = fptoui <4 x double> %a to <4 x i32>
398 define <8 x double> @sitof64(<8 x i32> %a) {
399 ; ALL-LABEL: sitof64:
401 ; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0
403 %b = sitofp <8 x i32> %a to <8 x double>
406 define <8 x double> @sitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
407 ; NODQ-LABEL: sitof64_mask:
409 ; NODQ-NEXT: kmovw %edi, %k1
410 ; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
413 ; DQ-LABEL: sitof64_mask:
415 ; DQ-NEXT: kmovb %edi, %k1
416 ; DQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
418 %1 = bitcast i8 %c to <8 x i1>
419 %2 = sitofp <8 x i32> %b to <8 x double>
420 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
423 define <8 x double> @sitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
424 ; NODQ-LABEL: sitof64_maskz:
426 ; NODQ-NEXT: kmovw %edi, %k1
427 ; NODQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
430 ; DQ-LABEL: sitof64_maskz:
432 ; DQ-NEXT: kmovb %edi, %k1
433 ; DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
435 %1 = bitcast i8 %b to <8 x i1>
436 %2 = sitofp <8 x i32> %a to <8 x double>
437 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
441 define <8 x i32> @fptosi01(<8 x double> %a) {
442 ; ALL-LABEL: fptosi01:
444 ; ALL-NEXT: vcvttpd2dq %zmm0, %ymm0
446 %b = fptosi <8 x double> %a to <8 x i32>
450 define <4 x i32> @fptosi03(<4 x double> %a) {
451 ; ALL-LABEL: fptosi03:
453 ; ALL-NEXT: vcvttpd2dq %ymm0, %xmm0
455 %b = fptosi <4 x double> %a to <4 x i32>
459 define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
460 ; NODQ-LABEL: fptrunc00:
462 ; NODQ-NEXT: vcvtpd2ps %zmm0, %ymm0
463 ; NODQ-NEXT: vcvtpd2ps %zmm1, %ymm1
464 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
467 ; DQ-LABEL: fptrunc00:
469 ; DQ-NEXT: vcvtpd2ps %zmm0, %ymm0
470 ; DQ-NEXT: vcvtpd2ps %zmm1, %ymm1
471 ; DQ-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
473 %a = fptrunc <16 x double> %b to <16 x float>
477 define <4 x float> @fptrunc01(<4 x double> %b) {
478 ; ALL-LABEL: fptrunc01:
480 ; ALL-NEXT: vcvtpd2ps %ymm0, %xmm0
482 %a = fptrunc <4 x double> %b to <4 x float>
486 define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
487 ; NOVL-LABEL: fptrunc02:
489 ; NOVL-NEXT: vpslld $31, %xmm1, %xmm1
490 ; NOVL-NEXT: vpsrad $31, %xmm1, %xmm1
491 ; NOVL-NEXT: vcvtpd2ps %ymm0, %xmm0
492 ; NOVL-NEXT: vpand %xmm0, %xmm1, %xmm0
495 ; VL-LABEL: fptrunc02:
497 ; VL-NEXT: vpslld $31, %xmm1, %xmm1
498 ; VL-NEXT: vptestmd %xmm1, %xmm1, %k1
499 ; VL-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
501 %a = fptrunc <4 x double> %b to <4 x float>
502 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
506 define <4 x float> @fptrunc03(<2 x double> %a0, <4 x float> %a1) nounwind {
507 ; ALL-LABEL: fptrunc03:
509 ; ALL-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
511 %ext = extractelement <2 x double> %a0, i32 0
512 %cvt = fptrunc double %ext to float
513 %res = insertelement <4 x float> %a1, float %cvt, i32 0
517 define <8 x double> @fpext00(<8 x float> %b) nounwind {
518 ; ALL-LABEL: fpext00:
520 ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
522 %a = fpext <8 x float> %b to <8 x double>
526 define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) {
527 ; NOVL-LABEL: fpext01:
529 ; NOVL-NEXT: vcvtps2pd %xmm0, %ymm0
530 ; NOVL-NEXT: vcmpltpd %ymm2, %ymm1, %ymm1
531 ; NOVL-NEXT: vandpd %ymm0, %ymm1, %ymm0
536 ; VL-NEXT: vcmpltpd %ymm2, %ymm1, %k1
537 ; VL-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z}
539 %a = fpext <4 x float> %b to <4 x double>
540 %mask = fcmp ogt <4 x double>%a1, %b1
541 %c = select <4 x i1>%mask, <4 x double>%a, <4 x double>zeroinitializer
545 define <2 x double> @fpext02(<2 x double> %a0, <4 x float> %a1) nounwind {
546 ; ALL-LABEL: fpext02:
548 ; ALL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
550 %ext = extractelement <4 x float> %a1, i32 0
551 %cvt = fpext float %ext to double
552 %res = insertelement <2 x double> %a0, double %cvt, i32 0
553 ret <2 x double> %res
556 define double @funcA(i64* nocapture %e) {
558 ; ALL: ## BB#0: ## %entry
559 ; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
562 %tmp1 = load i64, i64* %e, align 8
563 %conv = sitofp i64 %tmp1 to double
567 define double @funcB(i32* %e) {
569 ; ALL: ## BB#0: ## %entry
570 ; ALL-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
573 %tmp1 = load i32, i32* %e, align 4
574 %conv = sitofp i32 %tmp1 to double
578 define float @funcC(i32* %e) {
580 ; ALL: ## BB#0: ## %entry
581 ; ALL-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
584 %tmp1 = load i32, i32* %e, align 4
585 %conv = sitofp i32 %tmp1 to float
589 define float @i64tof32(i64* %e) {
590 ; ALL-LABEL: i64tof32:
591 ; ALL: ## BB#0: ## %entry
592 ; ALL-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
595 %tmp1 = load i64, i64* %e, align 8
596 %conv = sitofp i64 %tmp1 to float
600 define void @fpext() {
602 ; ALL: ## BB#0: ## %entry
603 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
604 ; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
605 ; ALL-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
608 %f = alloca float, align 4
609 %d = alloca double, align 8
610 %tmp = load float, float* %f, align 4
611 %conv = fpext float %tmp to double
612 store double %conv, double* %d, align 8
616 define void @fpround_scalar() nounwind uwtable {
617 ; ALL-LABEL: fpround_scalar:
618 ; ALL: ## BB#0: ## %entry
619 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
620 ; ALL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
621 ; ALL-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
624 %f = alloca float, align 4
625 %d = alloca double, align 8
626 %tmp = load double, double* %d, align 8
627 %conv = fptrunc double %tmp to float
628 store float %conv, float* %f, align 4
632 define double @long_to_double(i64 %x) {
633 ; ALL-LABEL: long_to_double:
635 ; ALL-NEXT: vmovq %rdi, %xmm0
637 %res = bitcast i64 %x to double
641 define i64 @double_to_long(double %x) {
642 ; ALL-LABEL: double_to_long:
644 ; ALL-NEXT: vmovq %xmm0, %rax
646 %res = bitcast double %x to i64
650 define float @int_to_float(i32 %x) {
651 ; ALL-LABEL: int_to_float:
653 ; ALL-NEXT: vmovd %edi, %xmm0
655 %res = bitcast i32 %x to float
659 define i32 @float_to_int(float %x) {
660 ; ALL-LABEL: float_to_int:
662 ; ALL-NEXT: vmovd %xmm0, %eax
664 %res = bitcast float %x to i32
668 define <16 x double> @uitof64(<16 x i32> %a) nounwind {
669 ; NODQ-LABEL: uitof64:
671 ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm2
672 ; NODQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
673 ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm1
674 ; NODQ-NEXT: vmovaps %zmm2, %zmm0
679 ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm2
680 ; DQ-NEXT: vextracti32x8 $1, %zmm0, %ymm0
681 ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm1
682 ; DQ-NEXT: vmovaps %zmm2, %zmm0
684 %b = uitofp <16 x i32> %a to <16 x double>
687 define <8 x double> @uitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
688 ; NODQ-LABEL: uitof64_mask:
690 ; NODQ-NEXT: kmovw %edi, %k1
691 ; NODQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
694 ; DQ-LABEL: uitof64_mask:
696 ; DQ-NEXT: kmovb %edi, %k1
697 ; DQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
699 %1 = bitcast i8 %c to <8 x i1>
700 %2 = uitofp <8 x i32> %b to <8 x double>
701 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
704 define <8 x double> @uitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
705 ; NODQ-LABEL: uitof64_maskz:
707 ; NODQ-NEXT: kmovw %edi, %k1
708 ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
711 ; DQ-LABEL: uitof64_maskz:
713 ; DQ-NEXT: kmovb %edi, %k1
714 ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
716 %1 = bitcast i8 %b to <8 x i1>
717 %2 = uitofp <8 x i32> %a to <8 x double>
718 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
722 define <4 x double> @uitof64_256(<4 x i32> %a) nounwind {
723 ; NOVL-LABEL: uitof64_256:
725 ; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
726 ; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
727 ; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
730 ; VL-LABEL: uitof64_256:
732 ; VL-NEXT: vcvtudq2pd %xmm0, %ymm0
734 %b = uitofp <4 x i32> %a to <4 x double>
738 define <16 x float> @uitof32(<16 x i32> %a) nounwind {
739 ; ALL-LABEL: uitof32:
741 ; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
743 %b = uitofp <16 x i32> %a to <16 x float>
747 define <8 x float> @uitof32_256(<8 x i32> %a) nounwind {
748 ; NOVL-LABEL: uitof32_256:
750 ; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
751 ; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0
752 ; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
755 ; VL-LABEL: uitof32_256:
757 ; VL-NEXT: vcvtudq2ps %ymm0, %ymm0
759 %b = uitofp <8 x i32> %a to <8 x float>
763 define <4 x float> @uitof32_128(<4 x i32> %a) nounwind {
764 ; NOVL-LABEL: uitof32_128:
766 ; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
767 ; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0
768 ; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
771 ; VL-LABEL: uitof32_128:
773 ; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
775 %b = uitofp <4 x i32> %a to <4 x float>
779 define i32 @fptosi02(float %a) nounwind {
780 ; ALL-LABEL: fptosi02:
782 ; ALL-NEXT: vcvttss2si %xmm0, %eax
784 %b = fptosi float %a to i32
788 define i32 @fptoui02(float %a) nounwind {
789 ; ALL-LABEL: fptoui02:
791 ; ALL-NEXT: vcvttss2usi %xmm0, %eax
793 %b = fptoui float %a to i32
797 define float @uitofp02(i32 %a) nounwind {
798 ; ALL-LABEL: uitofp02:
800 ; ALL-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
802 %b = uitofp i32 %a to float
806 define double @uitofp03(i32 %a) nounwind {
807 ; ALL-LABEL: uitofp03:
809 ; ALL-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
811 %b = uitofp i32 %a to double
815 define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
816 ; NODQ-LABEL: sitofp_16i1_float:
818 ; NODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
819 ; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
820 ; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
821 ; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
824 ; DQ-LABEL: sitofp_16i1_float:
826 ; DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
827 ; DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
828 ; DQ-NEXT: vpmovm2d %k0, %zmm0
829 ; DQ-NEXT: vcvtdq2ps %zmm0, %zmm0
831 %mask = icmp slt <16 x i32> %a, zeroinitializer
832 %1 = sitofp <16 x i1> %mask to <16 x float>
836 define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
837 ; ALL-LABEL: sitofp_16i8_float:
839 ; ALL-NEXT: vpmovsxbd %xmm0, %zmm0
840 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
842 %1 = sitofp <16 x i8> %a to <16 x float>
846 define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
847 ; ALL-LABEL: sitofp_16i16_float:
849 ; ALL-NEXT: vpmovsxwd %ymm0, %zmm0
850 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
852 %1 = sitofp <16 x i16> %a to <16 x float>
856 define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
857 ; ALL-LABEL: sitofp_8i16_double:
859 ; ALL-NEXT: vpmovsxwd %xmm0, %ymm0
860 ; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0
862 %1 = sitofp <8 x i16> %a to <8 x double>
866 define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
867 ; ALL-LABEL: sitofp_8i8_double:
869 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
870 ; ALL-NEXT: vpslld $24, %ymm0, %ymm0
871 ; ALL-NEXT: vpsrad $24, %ymm0, %ymm0
872 ; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0
874 %1 = sitofp <8 x i8> %a to <8 x double>
878 define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
879 ; NOVLDQ-LABEL: sitofp_16i1_double:
881 ; NOVLDQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
882 ; NOVLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
883 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
884 ; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
885 ; NOVLDQ-NEXT: vpmovqd %zmm0, %ymm0
886 ; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
887 ; NOVLDQ-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
888 ; NOVLDQ-NEXT: vpmovqd %zmm1, %ymm1
889 ; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
892 ; VLDQ-LABEL: sitofp_16i1_double:
894 ; VLDQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2
895 ; VLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0
896 ; VLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1
897 ; VLDQ-NEXT: vpmovm2d %k1, %ymm0
898 ; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
899 ; VLDQ-NEXT: vpmovm2d %k0, %ymm1
900 ; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
903 ; VLNODQ-LABEL: sitofp_16i1_double:
905 ; VLNODQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
906 ; VLNODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
907 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
908 ; VLNODQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
909 ; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm0 {%k2} {z}
910 ; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
911 ; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm1 {%k1} {z}
912 ; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
915 ; AVX512DQ-LABEL: sitofp_16i1_double:
917 ; AVX512DQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2
918 ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0
919 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1
920 ; AVX512DQ-NEXT: vpmovm2q %k1, %zmm0
921 ; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
922 ; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
923 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm1
924 ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
925 ; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm1
926 ; AVX512DQ-NEXT: retq
927 %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
928 %1 = sitofp <16 x i1> %cmpres to <16 x double>
932 define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
933 ; NOVLDQ-LABEL: sitofp_8i1_double:
935 ; NOVLDQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
936 ; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
937 ; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
938 ; NOVLDQ-NEXT: vpmovqd %zmm0, %ymm0
939 ; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
942 ; VLDQ-LABEL: sitofp_8i1_double:
944 ; VLDQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1
945 ; VLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
946 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0
947 ; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
950 ; VLNODQ-LABEL: sitofp_8i1_double:
952 ; VLNODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
953 ; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
954 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
955 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
956 ; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
959 ; AVX512DQ-LABEL: sitofp_8i1_double:
961 ; AVX512DQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1
962 ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
963 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
964 ; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
965 ; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
966 ; AVX512DQ-NEXT: retq
967 %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
968 %1 = sitofp <8 x i1> %cmpres to <8 x double>
972 define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
973 ; NOVLDQ-LABEL: sitofp_8i1_float:
975 ; NOVLDQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
976 ; NOVLDQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
977 ; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1
978 ; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
979 ; NOVLDQ-NEXT: vpmovqd %zmm0, %ymm0
980 ; NOVLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
983 ; VLDQ-LABEL: sitofp_8i1_float:
985 ; VLDQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
986 ; VLDQ-NEXT: vcmpltps %ymm0, %ymm1, %k0
987 ; VLDQ-NEXT: vpmovm2d %k0, %ymm0
988 ; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
991 ; VLNODQ-LABEL: sitofp_8i1_float:
993 ; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
994 ; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %k1
995 ; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
996 ; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
997 ; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0
1000 ; AVX512DQ-LABEL: sitofp_8i1_float:
1001 ; AVX512DQ: ## BB#0:
1002 ; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1003 ; AVX512DQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
1004 ; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0
1005 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
1006 ; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
1007 ; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
1008 ; AVX512DQ-NEXT: retq
1009 %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
1010 %1 = sitofp <8 x i1> %cmpres to <8 x float>
1014 define <4 x float> @sitofp_4i1_float(<4 x float> %a) {
1015 ; NOVL-LABEL: sitofp_4i1_float:
1017 ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1018 ; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1019 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
1022 ; VLDQ-LABEL: sitofp_4i1_float:
1024 ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1025 ; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0
1026 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0
1027 ; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1030 ; VLNODQ-LABEL: sitofp_4i1_float:
1032 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1033 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
1034 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1035 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1036 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1038 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
1039 %1 = sitofp <4 x i1> %cmpres to <4 x float>
1043 define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
1044 ; NOVL-LABEL: sitofp_4i1_double:
1046 ; NOVL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1047 ; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
1048 ; NOVL-NEXT: vpmovqd %zmm0, %ymm0
1049 ; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0
1052 ; VLDQ-LABEL: sitofp_4i1_double:
1054 ; VLDQ-NEXT: vxorpd %ymm1, %ymm1, %ymm1
1055 ; VLDQ-NEXT: vcmpltpd %ymm0, %ymm1, %k0
1056 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0
1057 ; VLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
1060 ; VLNODQ-LABEL: sitofp_4i1_double:
1062 ; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
1063 ; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1
1064 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1065 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1066 ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0
1068 %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
1069 %1 = sitofp <4 x i1> %cmpres to <4 x double>
1073 define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
1074 ; NOVL-LABEL: sitofp_2i1_float:
1076 ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
1077 ; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
1078 ; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
1079 ; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
1082 ; VLDQ-LABEL: sitofp_2i1_float:
1084 ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
1085 ; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0
1086 ; VLDQ-NEXT: vpmovm2d %k0, %xmm0
1087 ; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1090 ; VLNODQ-LABEL: sitofp_2i1_float:
1092 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1093 ; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
1094 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1095 ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1096 ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
1098 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
1099 %1 = sitofp <2 x i1> %cmpres to <2 x float>
1103 define <2 x double> @sitofp_2i1_double(<2 x double> %a) {
1104 ; NOVL-LABEL: sitofp_2i1_double:
1106 ; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1107 ; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
1108 ; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
1109 ; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0
1112 ; VLDQ-LABEL: sitofp_2i1_double:
1114 ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1115 ; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0
1116 ; VLDQ-NEXT: vpmovm2q %k0, %xmm0
1117 ; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
1120 ; VLNODQ-LABEL: sitofp_2i1_double:
1122 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1123 ; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1
1124 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
1125 ; VLNODQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
1126 ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
1127 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm1
1128 ; VLNODQ-NEXT: vmovq %xmm0, %rax
1129 ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
1130 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1132 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
1133 %1 = sitofp <2 x i1> %cmpres to <2 x double>
1137 define <16 x float> @uitofp_16i8(<16 x i8>%a) {
1138 ; ALL-LABEL: uitofp_16i8:
1140 ; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1141 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
1143 %b = uitofp <16 x i8> %a to <16 x float>
1147 define <16 x float> @uitofp_16i16(<16 x i16>%a) {
1148 ; ALL-LABEL: uitofp_16i16:
1150 ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1151 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
1153 %b = uitofp <16 x i16> %a to <16 x float>
1157 define <16 x float> @uitofp_16i1_float(<16 x i32> %a) {
1158 ; ALL-LABEL: uitofp_16i1_float:
1160 ; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
1161 ; ALL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1162 ; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1163 ; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
1165 %mask = icmp slt <16 x i32> %a, zeroinitializer
1166 %1 = uitofp <16 x i1> %mask to <16 x float>
1170 define <16 x double> @uitofp_16i1_double(<16 x i32> %a) {
1171 ; NOVL-LABEL: uitofp_16i1_double:
1173 ; NOVL-NEXT: vpxord %zmm1, %zmm1, %zmm1
1174 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1175 ; NOVL-NEXT: movq {{.*}}(%rip), %rax
1176 ; NOVL-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
1177 ; NOVL-NEXT: vpmovqd %zmm0, %ymm0
1178 ; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
1179 ; NOVL-NEXT: kshiftrw $8, %k1, %k1
1180 ; NOVL-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z}
1181 ; NOVL-NEXT: vpmovqd %zmm1, %ymm1
1182 ; NOVL-NEXT: vcvtudq2pd %ymm1, %zmm1
1185 ; VL-LABEL: uitofp_16i1_double:
1187 ; VL-NEXT: vpxord %zmm1, %zmm1, %zmm1
1188 ; VL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1189 ; VL-NEXT: movl {{.*}}(%rip), %eax
1190 ; VL-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
1191 ; VL-NEXT: vcvtudq2pd %ymm0, %zmm0
1192 ; VL-NEXT: kshiftrw $8, %k1, %k1
1193 ; VL-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z}
1194 ; VL-NEXT: vcvtudq2pd %ymm1, %zmm1
1196 %mask = icmp slt <16 x i32> %a, zeroinitializer
1197 %1 = uitofp <16 x i1> %mask to <16 x double>
1198 ret <16 x double> %1
1201 define <8 x float> @uitofp_8i1_float(<8 x i32> %a) {
1202 ; NOVL-LABEL: uitofp_8i1_float:
1204 ; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1205 ; NOVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
1206 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1207 ; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1208 ; NOVL-NEXT: vpmovqd %zmm0, %ymm0
1209 ; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0
1210 ; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1213 ; VL-LABEL: uitofp_8i1_float:
1215 ; VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
1216 ; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
1217 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
1218 ; VL-NEXT: vcvtudq2ps %ymm0, %ymm0
1220 %mask = icmp slt <8 x i32> %a, zeroinitializer
1221 %1 = uitofp <8 x i1> %mask to <8 x float>
1225 define <8 x double> @uitofp_8i1_double(<8 x i32> %a) {
1226 ; NOVL-LABEL: uitofp_8i1_double:
1228 ; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1229 ; NOVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
1230 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
1231 ; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1232 ; NOVL-NEXT: vpmovqd %zmm0, %ymm0
1233 ; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
1236 ; VL-LABEL: uitofp_8i1_double:
1238 ; VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
1239 ; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
1240 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
1241 ; VL-NEXT: vcvtudq2pd %ymm0, %zmm0
1243 %mask = icmp slt <8 x i32> %a, zeroinitializer
1244 %1 = uitofp <8 x i1> %mask to <8 x double>
1248 define <4 x float> @uitofp_4i1_float(<4 x i32> %a) {
1249 ; NOVL-LABEL: uitofp_4i1_float:
1251 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1252 ; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
1253 ; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
1254 ; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0
1257 ; VL-LABEL: uitofp_4i1_float:
1259 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1260 ; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1261 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1262 ; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
1264 %mask = icmp slt <4 x i32> %a, zeroinitializer
1265 %1 = uitofp <4 x i1> %mask to <4 x float>
1269 define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
1270 ; NOVL-LABEL: uitofp_4i1_double:
1272 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1273 ; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
1274 ; NOVL-NEXT: vpsrld $31, %xmm0, %xmm0
1275 ; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0
1278 ; VL-LABEL: uitofp_4i1_double:
1280 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1281 ; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1282 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1283 ; VL-NEXT: vcvtudq2pd %xmm0, %ymm0
1285 %mask = icmp slt <4 x i32> %a, zeroinitializer
1286 %1 = uitofp <4 x i1> %mask to <4 x double>
1290 define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
1291 ; NOVL-LABEL: uitofp_2i1_float:
1293 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1294 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1295 ; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
1296 ; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0
1297 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1298 ; NOVL-NEXT: vpextrq $1, %xmm0, %rax
1299 ; NOVL-NEXT: andl $1, %eax
1300 ; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
1301 ; NOVL-NEXT: vmovq %xmm0, %rax
1302 ; NOVL-NEXT: andl $1, %eax
1303 ; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
1304 ; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
1307 ; VL-LABEL: uitofp_2i1_float:
1309 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1310 ; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1311 ; VL-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1312 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1313 ; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
1315 %mask = icmp ult <2 x i32> %a, zeroinitializer
1316 %1 = uitofp <2 x i1> %mask to <2 x float>
1320 define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
1321 ; NOVL-LABEL: uitofp_2i1_double:
1323 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1324 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1325 ; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
1326 ; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0
1327 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1328 ; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1331 ; VLDQ-LABEL: uitofp_2i1_double:
1333 ; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1334 ; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1335 ; VLDQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1336 ; VLDQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1337 ; VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0
1340 ; VLNODQ-LABEL: uitofp_2i1_double:
1342 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1343 ; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1344 ; VLNODQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1345 ; VLNODQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1346 ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
1347 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm1
1348 ; VLNODQ-NEXT: vmovq %xmm0, %rax
1349 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
1350 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1352 %mask = icmp ult <2 x i32> %a, zeroinitializer
1353 %1 = uitofp <2 x i1> %mask to <2 x double>