1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512F
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 --check-prefixes=AVX512BW
9 define i8 @v8i16(<8 x i16> %a, <8 x i16> %b) {
10 ; SSE2-SSSE3-LABEL: v8i16:
11 ; SSE2-SSSE3: # %bb.0:
12 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
13 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
14 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
15 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
16 ; SSE2-SSSE3-NEXT: retq
20 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
21 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
22 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
23 ; AVX12-NEXT: # kill: def %al killed %al killed %eax
26 ; AVX512F-LABEL: v8i16:
28 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
29 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
30 ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
31 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
32 ; AVX512F-NEXT: kmovw %k0, %eax
33 ; AVX512F-NEXT: # kill: def %al killed %al killed %eax
34 ; AVX512F-NEXT: vzeroupper
37 ; AVX512BW-LABEL: v8i16:
39 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
40 ; AVX512BW-NEXT: kmovd %k0, %eax
41 ; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
43 %x = icmp sgt <8 x i16> %a, %b
44 %res = bitcast <8 x i1> %x to i8
48 define i4 @v4i32(<4 x i32> %a, <4 x i32> %b) {
49 ; SSE2-SSSE3-LABEL: v4i32:
50 ; SSE2-SSSE3: # %bb.0:
51 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
52 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
53 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
54 ; SSE2-SSSE3-NEXT: retq
58 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
59 ; AVX12-NEXT: vmovmskps %xmm0, %eax
60 ; AVX12-NEXT: # kill: def %al killed %al killed %eax
63 ; AVX512F-LABEL: v4i32:
65 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
66 ; AVX512F-NEXT: kmovw %k0, %eax
67 ; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
68 ; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
71 ; AVX512BW-LABEL: v4i32:
73 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
74 ; AVX512BW-NEXT: kmovd %k0, %eax
75 ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
76 ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
78 %x = icmp sgt <4 x i32> %a, %b
79 %res = bitcast <4 x i1> %x to i4
83 define i4 @v4f32(<4 x float> %a, <4 x float> %b) {
84 ; SSE2-SSSE3-LABEL: v4f32:
85 ; SSE2-SSSE3: # %bb.0:
86 ; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm1
87 ; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax
88 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
89 ; SSE2-SSSE3-NEXT: retq
93 ; AVX12-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
94 ; AVX12-NEXT: vmovmskps %xmm0, %eax
95 ; AVX12-NEXT: # kill: def %al killed %al killed %eax
98 ; AVX512F-LABEL: v4f32:
100 ; AVX512F-NEXT: vcmpltps %xmm0, %xmm1, %k0
101 ; AVX512F-NEXT: kmovw %k0, %eax
102 ; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
103 ; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
106 ; AVX512BW-LABEL: v4f32:
108 ; AVX512BW-NEXT: vcmpltps %xmm0, %xmm1, %k0
109 ; AVX512BW-NEXT: kmovd %k0, %eax
110 ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
111 ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
112 ; AVX512BW-NEXT: retq
113 %x = fcmp ogt <4 x float> %a, %b
114 %res = bitcast <4 x i1> %x to i4
118 define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) {
119 ; SSE2-SSSE3-LABEL: v16i8:
120 ; SSE2-SSSE3: # %bb.0:
121 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
122 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
123 ; SSE2-SSSE3-NEXT: # kill: def %ax killed %ax killed %eax
124 ; SSE2-SSSE3-NEXT: retq
126 ; AVX12-LABEL: v16i8:
128 ; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
129 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
130 ; AVX12-NEXT: # kill: def %ax killed %ax killed %eax
133 ; AVX512F-LABEL: v16i8:
135 ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
136 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
137 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
138 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
139 ; AVX512F-NEXT: kmovw %k0, %eax
140 ; AVX512F-NEXT: # kill: def %ax killed %ax killed %eax
141 ; AVX512F-NEXT: vzeroupper
144 ; AVX512BW-LABEL: v16i8:
146 ; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
147 ; AVX512BW-NEXT: kmovd %k0, %eax
148 ; AVX512BW-NEXT: # kill: def %ax killed %ax killed %eax
149 ; AVX512BW-NEXT: retq
150 %x = icmp sgt <16 x i8> %a, %b
151 %res = bitcast <16 x i1> %x to i16
155 define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) {
156 ; SSE2-SSSE3-LABEL: v2i8:
157 ; SSE2-SSSE3: # %bb.0:
158 ; SSE2-SSSE3-NEXT: psllq $56, %xmm0
159 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
160 ; SSE2-SSSE3-NEXT: psrad $31, %xmm2
161 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
162 ; SSE2-SSSE3-NEXT: psrad $24, %xmm0
163 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
164 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
165 ; SSE2-SSSE3-NEXT: psllq $56, %xmm1
166 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
167 ; SSE2-SSSE3-NEXT: psrad $31, %xmm2
168 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
169 ; SSE2-SSSE3-NEXT: psrad $24, %xmm1
170 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
171 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
172 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
173 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
174 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
175 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
176 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
177 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
178 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
179 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
180 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
181 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
182 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
183 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
184 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
185 ; SSE2-SSSE3-NEXT: retq
189 ; AVX1-NEXT: vpsllq $56, %xmm1, %xmm1
190 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
191 ; AVX1-NEXT: vpsrad $24, %xmm1, %xmm1
192 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
193 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
194 ; AVX1-NEXT: vpsllq $56, %xmm0, %xmm0
195 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
196 ; AVX1-NEXT: vpsrad $24, %xmm0, %xmm0
197 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
198 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
199 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
200 ; AVX1-NEXT: vmovmskpd %xmm0, %eax
201 ; AVX1-NEXT: # kill: def %al killed %al killed %eax
206 ; AVX2-NEXT: vpsllq $56, %xmm1, %xmm1
207 ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
208 ; AVX2-NEXT: vpsrad $24, %xmm1, %xmm1
209 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
210 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
211 ; AVX2-NEXT: vpsllq $56, %xmm0, %xmm0
212 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
213 ; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0
214 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
215 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
216 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
217 ; AVX2-NEXT: vmovmskpd %xmm0, %eax
218 ; AVX2-NEXT: # kill: def %al killed %al killed %eax
221 ; AVX512F-LABEL: v2i8:
223 ; AVX512F-NEXT: vpsllq $56, %xmm1, %xmm1
224 ; AVX512F-NEXT: vpsraq $56, %xmm1, %xmm1
225 ; AVX512F-NEXT: vpsllq $56, %xmm0, %xmm0
226 ; AVX512F-NEXT: vpsraq $56, %xmm0, %xmm0
227 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
228 ; AVX512F-NEXT: kmovw %k0, %eax
229 ; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
230 ; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
233 ; AVX512BW-LABEL: v2i8:
235 ; AVX512BW-NEXT: vpsllq $56, %xmm1, %xmm1
236 ; AVX512BW-NEXT: vpsraq $56, %xmm1, %xmm1
237 ; AVX512BW-NEXT: vpsllq $56, %xmm0, %xmm0
238 ; AVX512BW-NEXT: vpsraq $56, %xmm0, %xmm0
239 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
240 ; AVX512BW-NEXT: kmovd %k0, %eax
241 ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
242 ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
243 ; AVX512BW-NEXT: retq
244 %x = icmp sgt <2 x i8> %a, %b
245 %res = bitcast <2 x i1> %x to i2
249 define i2 @v2i16(<2 x i16> %a, <2 x i16> %b) {
250 ; SSE2-SSSE3-LABEL: v2i16:
251 ; SSE2-SSSE3: # %bb.0:
252 ; SSE2-SSSE3-NEXT: psllq $48, %xmm0
253 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
254 ; SSE2-SSSE3-NEXT: psrad $31, %xmm2
255 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
256 ; SSE2-SSSE3-NEXT: psrad $16, %xmm0
257 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
258 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
259 ; SSE2-SSSE3-NEXT: psllq $48, %xmm1
260 ; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2
261 ; SSE2-SSSE3-NEXT: psrad $31, %xmm2
262 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
263 ; SSE2-SSSE3-NEXT: psrad $16, %xmm1
264 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
265 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
266 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
267 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
268 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
269 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
270 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
271 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
272 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
273 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
274 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
275 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
276 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
277 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
278 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
279 ; SSE2-SSSE3-NEXT: retq
283 ; AVX1-NEXT: vpsllq $48, %xmm1, %xmm1
284 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
285 ; AVX1-NEXT: vpsrad $16, %xmm1, %xmm1
286 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
287 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
288 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0
289 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
290 ; AVX1-NEXT: vpsrad $16, %xmm0, %xmm0
291 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
292 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
293 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
294 ; AVX1-NEXT: vmovmskpd %xmm0, %eax
295 ; AVX1-NEXT: # kill: def %al killed %al killed %eax
300 ; AVX2-NEXT: vpsllq $48, %xmm1, %xmm1
301 ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
302 ; AVX2-NEXT: vpsrad $16, %xmm1, %xmm1
303 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
304 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
305 ; AVX2-NEXT: vpsllq $48, %xmm0, %xmm0
306 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
307 ; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0
308 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
309 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
310 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
311 ; AVX2-NEXT: vmovmskpd %xmm0, %eax
312 ; AVX2-NEXT: # kill: def %al killed %al killed %eax
315 ; AVX512F-LABEL: v2i16:
317 ; AVX512F-NEXT: vpsllq $48, %xmm1, %xmm1
318 ; AVX512F-NEXT: vpsraq $48, %xmm1, %xmm1
319 ; AVX512F-NEXT: vpsllq $48, %xmm0, %xmm0
320 ; AVX512F-NEXT: vpsraq $48, %xmm0, %xmm0
321 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
322 ; AVX512F-NEXT: kmovw %k0, %eax
323 ; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
324 ; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
327 ; AVX512BW-LABEL: v2i16:
329 ; AVX512BW-NEXT: vpsllq $48, %xmm1, %xmm1
330 ; AVX512BW-NEXT: vpsraq $48, %xmm1, %xmm1
331 ; AVX512BW-NEXT: vpsllq $48, %xmm0, %xmm0
332 ; AVX512BW-NEXT: vpsraq $48, %xmm0, %xmm0
333 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
334 ; AVX512BW-NEXT: kmovd %k0, %eax
335 ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
336 ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
337 ; AVX512BW-NEXT: retq
338 %x = icmp sgt <2 x i16> %a, %b
339 %res = bitcast <2 x i1> %x to i2
343 define i2 @v2i32(<2 x i32> %a, <2 x i32> %b) {
344 ; SSE2-SSSE3-LABEL: v2i32:
345 ; SSE2-SSSE3: # %bb.0:
346 ; SSE2-SSSE3-NEXT: psllq $32, %xmm0
347 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
348 ; SSE2-SSSE3-NEXT: psrad $31, %xmm0
349 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
350 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
351 ; SSE2-SSSE3-NEXT: psllq $32, %xmm1
352 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
353 ; SSE2-SSSE3-NEXT: psrad $31, %xmm1
354 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
355 ; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
356 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
357 ; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0
358 ; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2
359 ; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
360 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
361 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,0,2,2]
362 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm2
363 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
364 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
365 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
366 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
367 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
368 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
369 ; SSE2-SSSE3-NEXT: retq
373 ; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
374 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
375 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
376 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
377 ; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
378 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
379 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
380 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
381 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
382 ; AVX1-NEXT: vmovmskpd %xmm0, %eax
383 ; AVX1-NEXT: # kill: def %al killed %al killed %eax
388 ; AVX2-NEXT: vpsllq $32, %xmm1, %xmm1
389 ; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
390 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
391 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
392 ; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
393 ; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
394 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
395 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
396 ; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
397 ; AVX2-NEXT: vmovmskpd %xmm0, %eax
398 ; AVX2-NEXT: # kill: def %al killed %al killed %eax
401 ; AVX512F-LABEL: v2i32:
403 ; AVX512F-NEXT: vpsllq $32, %xmm1, %xmm1
404 ; AVX512F-NEXT: vpsraq $32, %xmm1, %xmm1
405 ; AVX512F-NEXT: vpsllq $32, %xmm0, %xmm0
406 ; AVX512F-NEXT: vpsraq $32, %xmm0, %xmm0
407 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
408 ; AVX512F-NEXT: kmovw %k0, %eax
409 ; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
410 ; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
413 ; AVX512BW-LABEL: v2i32:
415 ; AVX512BW-NEXT: vpsllq $32, %xmm1, %xmm1
416 ; AVX512BW-NEXT: vpsraq $32, %xmm1, %xmm1
417 ; AVX512BW-NEXT: vpsllq $32, %xmm0, %xmm0
418 ; AVX512BW-NEXT: vpsraq $32, %xmm0, %xmm0
419 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
420 ; AVX512BW-NEXT: kmovd %k0, %eax
421 ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
422 ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
423 ; AVX512BW-NEXT: retq
424 %x = icmp sgt <2 x i32> %a, %b
425 %res = bitcast <2 x i1> %x to i2
429 define i2 @v2i64(<2 x i64> %a, <2 x i64> %b) {
430 ; SSE2-SSSE3-LABEL: v2i64:
431 ; SSE2-SSSE3: # %bb.0:
432 ; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
433 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1
434 ; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0
435 ; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2
436 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
437 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
438 ; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
439 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
440 ; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
441 ; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
442 ; SSE2-SSSE3-NEXT: por %xmm0, %xmm1
443 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
444 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
445 ; SSE2-SSSE3-NEXT: retq
447 ; AVX12-LABEL: v2i64:
449 ; AVX12-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
450 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
451 ; AVX12-NEXT: # kill: def %al killed %al killed %eax
454 ; AVX512F-LABEL: v2i64:
456 ; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
457 ; AVX512F-NEXT: kmovw %k0, %eax
458 ; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
459 ; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
462 ; AVX512BW-LABEL: v2i64:
464 ; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
465 ; AVX512BW-NEXT: kmovd %k0, %eax
466 ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
467 ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
468 ; AVX512BW-NEXT: retq
469 %x = icmp sgt <2 x i64> %a, %b
470 %res = bitcast <2 x i1> %x to i2
474 define i2 @v2f64(<2 x double> %a, <2 x double> %b) {
475 ; SSE2-SSSE3-LABEL: v2f64:
476 ; SSE2-SSSE3: # %bb.0:
477 ; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm1
478 ; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
479 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
480 ; SSE2-SSSE3-NEXT: retq
482 ; AVX12-LABEL: v2f64:
484 ; AVX12-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
485 ; AVX12-NEXT: vmovmskpd %xmm0, %eax
486 ; AVX12-NEXT: # kill: def %al killed %al killed %eax
489 ; AVX512F-LABEL: v2f64:
491 ; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %k0
492 ; AVX512F-NEXT: kmovw %k0, %eax
493 ; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
494 ; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
497 ; AVX512BW-LABEL: v2f64:
499 ; AVX512BW-NEXT: vcmpltpd %xmm0, %xmm1, %k0
500 ; AVX512BW-NEXT: kmovd %k0, %eax
501 ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
502 ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
503 ; AVX512BW-NEXT: retq
504 %x = fcmp ogt <2 x double> %a, %b
505 %res = bitcast <2 x i1> %x to i2
509 define i4 @v4i8(<4 x i8> %a, <4 x i8> %b) {
510 ; SSE2-SSSE3-LABEL: v4i8:
511 ; SSE2-SSSE3: # %bb.0:
512 ; SSE2-SSSE3-NEXT: pslld $24, %xmm1
513 ; SSE2-SSSE3-NEXT: psrad $24, %xmm1
514 ; SSE2-SSSE3-NEXT: pslld $24, %xmm0
515 ; SSE2-SSSE3-NEXT: psrad $24, %xmm0
516 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
517 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
518 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
519 ; SSE2-SSSE3-NEXT: retq
523 ; AVX12-NEXT: vpslld $24, %xmm1, %xmm1
524 ; AVX12-NEXT: vpsrad $24, %xmm1, %xmm1
525 ; AVX12-NEXT: vpslld $24, %xmm0, %xmm0
526 ; AVX12-NEXT: vpsrad $24, %xmm0, %xmm0
527 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
528 ; AVX12-NEXT: vmovmskps %xmm0, %eax
529 ; AVX12-NEXT: # kill: def %al killed %al killed %eax
532 ; AVX512F-LABEL: v4i8:
534 ; AVX512F-NEXT: vpslld $24, %xmm1, %xmm1
535 ; AVX512F-NEXT: vpsrad $24, %xmm1, %xmm1
536 ; AVX512F-NEXT: vpslld $24, %xmm0, %xmm0
537 ; AVX512F-NEXT: vpsrad $24, %xmm0, %xmm0
538 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
539 ; AVX512F-NEXT: kmovw %k0, %eax
540 ; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
541 ; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
544 ; AVX512BW-LABEL: v4i8:
546 ; AVX512BW-NEXT: vpslld $24, %xmm1, %xmm1
547 ; AVX512BW-NEXT: vpsrad $24, %xmm1, %xmm1
548 ; AVX512BW-NEXT: vpslld $24, %xmm0, %xmm0
549 ; AVX512BW-NEXT: vpsrad $24, %xmm0, %xmm0
550 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
551 ; AVX512BW-NEXT: kmovd %k0, %eax
552 ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
553 ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
554 ; AVX512BW-NEXT: retq
555 %x = icmp sgt <4 x i8> %a, %b
556 %res = bitcast <4 x i1> %x to i4
560 define i4 @v4i16(<4 x i16> %a, <4 x i16> %b) {
561 ; SSE2-SSSE3-LABEL: v4i16:
562 ; SSE2-SSSE3: # %bb.0:
563 ; SSE2-SSSE3-NEXT: pslld $16, %xmm1
564 ; SSE2-SSSE3-NEXT: psrad $16, %xmm1
565 ; SSE2-SSSE3-NEXT: pslld $16, %xmm0
566 ; SSE2-SSSE3-NEXT: psrad $16, %xmm0
567 ; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
568 ; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax
569 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
570 ; SSE2-SSSE3-NEXT: retq
572 ; AVX12-LABEL: v4i16:
574 ; AVX12-NEXT: vpslld $16, %xmm1, %xmm1
575 ; AVX12-NEXT: vpsrad $16, %xmm1, %xmm1
576 ; AVX12-NEXT: vpslld $16, %xmm0, %xmm0
577 ; AVX12-NEXT: vpsrad $16, %xmm0, %xmm0
578 ; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
579 ; AVX12-NEXT: vmovmskps %xmm0, %eax
580 ; AVX12-NEXT: # kill: def %al killed %al killed %eax
583 ; AVX512F-LABEL: v4i16:
585 ; AVX512F-NEXT: vpslld $16, %xmm1, %xmm1
586 ; AVX512F-NEXT: vpsrad $16, %xmm1, %xmm1
587 ; AVX512F-NEXT: vpslld $16, %xmm0, %xmm0
588 ; AVX512F-NEXT: vpsrad $16, %xmm0, %xmm0
589 ; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
590 ; AVX512F-NEXT: kmovw %k0, %eax
591 ; AVX512F-NEXT: movb %al, -{{[0-9]+}}(%rsp)
592 ; AVX512F-NEXT: movb -{{[0-9]+}}(%rsp), %al
595 ; AVX512BW-LABEL: v4i16:
597 ; AVX512BW-NEXT: vpslld $16, %xmm1, %xmm1
598 ; AVX512BW-NEXT: vpsrad $16, %xmm1, %xmm1
599 ; AVX512BW-NEXT: vpslld $16, %xmm0, %xmm0
600 ; AVX512BW-NEXT: vpsrad $16, %xmm0, %xmm0
601 ; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
602 ; AVX512BW-NEXT: kmovd %k0, %eax
603 ; AVX512BW-NEXT: movb %al, -{{[0-9]+}}(%rsp)
604 ; AVX512BW-NEXT: movb -{{[0-9]+}}(%rsp), %al
605 ; AVX512BW-NEXT: retq
606 %x = icmp sgt <4 x i16> %a, %b
607 %res = bitcast <4 x i1> %x to i4
611 define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) {
612 ; SSE2-SSSE3-LABEL: v8i8:
613 ; SSE2-SSSE3: # %bb.0:
614 ; SSE2-SSSE3-NEXT: psllw $8, %xmm1
615 ; SSE2-SSSE3-NEXT: psraw $8, %xmm1
616 ; SSE2-SSSE3-NEXT: psllw $8, %xmm0
617 ; SSE2-SSSE3-NEXT: psraw $8, %xmm0
618 ; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
619 ; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0
620 ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
621 ; SSE2-SSSE3-NEXT: # kill: def %al killed %al killed %eax
622 ; SSE2-SSSE3-NEXT: retq
626 ; AVX12-NEXT: vpsllw $8, %xmm1, %xmm1
627 ; AVX12-NEXT: vpsraw $8, %xmm1, %xmm1
628 ; AVX12-NEXT: vpsllw $8, %xmm0, %xmm0
629 ; AVX12-NEXT: vpsraw $8, %xmm0, %xmm0
630 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
631 ; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
632 ; AVX12-NEXT: vpmovmskb %xmm0, %eax
633 ; AVX12-NEXT: # kill: def %al killed %al killed %eax
636 ; AVX512F-LABEL: v8i8:
638 ; AVX512F-NEXT: vpsllw $8, %xmm1, %xmm1
639 ; AVX512F-NEXT: vpsraw $8, %xmm1, %xmm1
640 ; AVX512F-NEXT: vpsllw $8, %xmm0, %xmm0
641 ; AVX512F-NEXT: vpsraw $8, %xmm0, %xmm0
642 ; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
643 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
644 ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0
645 ; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0
646 ; AVX512F-NEXT: kmovw %k0, %eax
647 ; AVX512F-NEXT: # kill: def %al killed %al killed %eax
648 ; AVX512F-NEXT: vzeroupper
651 ; AVX512BW-LABEL: v8i8:
653 ; AVX512BW-NEXT: vpsllw $8, %xmm1, %xmm1
654 ; AVX512BW-NEXT: vpsraw $8, %xmm1, %xmm1
655 ; AVX512BW-NEXT: vpsllw $8, %xmm0, %xmm0
656 ; AVX512BW-NEXT: vpsraw $8, %xmm0, %xmm0
657 ; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
658 ; AVX512BW-NEXT: kmovd %k0, %eax
659 ; AVX512BW-NEXT: # kill: def %al killed %al killed %eax
660 ; AVX512BW-NEXT: retq
661 %x = icmp sgt <8 x i8> %a, %b
662 %res = bitcast <8 x i1> %x to i8