1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
7 define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
8 ; X32-LABEL: test_mm_add_epi8:
10 ; X32-NEXT: paddb %xmm1, %xmm0
13 ; X64-LABEL: test_mm_add_epi8:
15 ; X64-NEXT: paddb %xmm1, %xmm0
17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
19 %res = add <16 x i8> %arg0, %arg1
20 %bc = bitcast <16 x i8> %res to <2 x i64>
24 define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
25 ; X32-LABEL: test_mm_add_epi16:
27 ; X32-NEXT: paddw %xmm1, %xmm0
30 ; X64-LABEL: test_mm_add_epi16:
32 ; X64-NEXT: paddw %xmm1, %xmm0
34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
36 %res = add <8 x i16> %arg0, %arg1
37 %bc = bitcast <8 x i16> %res to <2 x i64>
41 define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
42 ; X32-LABEL: test_mm_add_epi32:
44 ; X32-NEXT: paddd %xmm1, %xmm0
47 ; X64-LABEL: test_mm_add_epi32:
49 ; X64-NEXT: paddd %xmm1, %xmm0
51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
53 %res = add <4 x i32> %arg0, %arg1
54 %bc = bitcast <4 x i32> %res to <2 x i64>
58 define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
59 ; X32-LABEL: test_mm_add_epi64:
61 ; X32-NEXT: paddq %xmm1, %xmm0
64 ; X64-LABEL: test_mm_add_epi64:
66 ; X64-NEXT: paddq %xmm1, %xmm0
68 %res = add <2 x i64> %a0, %a1
72 define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
73 ; X32-LABEL: test_mm_add_pd:
75 ; X32-NEXT: addpd %xmm1, %xmm0
78 ; X64-LABEL: test_mm_add_pd:
80 ; X64-NEXT: addpd %xmm1, %xmm0
82 %res = fadd <2 x double> %a0, %a1
86 define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
87 ; X32-LABEL: test_mm_add_sd:
89 ; X32-NEXT: addsd %xmm1, %xmm0
92 ; X64-LABEL: test_mm_add_sd:
94 ; X64-NEXT: addsd %xmm1, %xmm0
96 %ext0 = extractelement <2 x double> %a0, i32 0
97 %ext1 = extractelement <2 x double> %a1, i32 0
98 %fadd = fadd double %ext0, %ext1
99 %res = insertelement <2 x double> %a0, double %fadd, i32 0
100 ret <2 x double> %res
103 define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
104 ; X32-LABEL: test_mm_adds_epi8:
106 ; X32-NEXT: paddsb %xmm1, %xmm0
109 ; X64-LABEL: test_mm_adds_epi8:
111 ; X64-NEXT: paddsb %xmm1, %xmm0
113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1)
116 %bc = bitcast <16 x i8> %res to <2 x i64>
119 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
121 define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
122 ; X32-LABEL: test_mm_adds_epi16:
124 ; X32-NEXT: paddsw %xmm1, %xmm0
127 ; X64-LABEL: test_mm_adds_epi16:
129 ; X64-NEXT: paddsw %xmm1, %xmm0
131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1)
134 %bc = bitcast <8 x i16> %res to <2 x i64>
137 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
139 define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
140 ; X32-LABEL: test_mm_adds_epu8:
142 ; X32-NEXT: paddusb %xmm1, %xmm0
145 ; X64-LABEL: test_mm_adds_epu8:
147 ; X64-NEXT: paddusb %xmm1, %xmm0
149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1)
152 %bc = bitcast <16 x i8> %res to <2 x i64>
155 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
157 define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
158 ; X32-LABEL: test_mm_adds_epu16:
160 ; X32-NEXT: paddusw %xmm1, %xmm0
163 ; X64-LABEL: test_mm_adds_epu16:
165 ; X64-NEXT: paddusw %xmm1, %xmm0
167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1)
170 %bc = bitcast <8 x i16> %res to <2 x i64>
173 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
175 define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
176 ; X32-LABEL: test_mm_and_pd:
178 ; X32-NEXT: andps %xmm1, %xmm0
181 ; X64-LABEL: test_mm_and_pd:
183 ; X64-NEXT: andps %xmm1, %xmm0
185 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
186 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
187 %res = and <4 x i32> %arg0, %arg1
188 %bc = bitcast <4 x i32> %res to <2 x double>
192 define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
193 ; X32-LABEL: test_mm_and_si128:
195 ; X32-NEXT: andps %xmm1, %xmm0
198 ; X64-LABEL: test_mm_and_si128:
200 ; X64-NEXT: andps %xmm1, %xmm0
202 %res = and <2 x i64> %a0, %a1
206 define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
207 ; X32-LABEL: test_mm_andnot_pd:
209 ; X32-NEXT: andnps %xmm1, %xmm0
212 ; X64-LABEL: test_mm_andnot_pd:
214 ; X64-NEXT: andnps %xmm1, %xmm0
216 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
217 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1>
219 %res = and <4 x i32> %not, %arg1
220 %bc = bitcast <4 x i32> %res to <2 x double>
224 define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
225 ; X32-LABEL: test_mm_andnot_si128:
227 ; X32-NEXT: pcmpeqd %xmm2, %xmm2
228 ; X32-NEXT: pxor %xmm2, %xmm0
229 ; X32-NEXT: pand %xmm1, %xmm0
232 ; X64-LABEL: test_mm_andnot_si128:
234 ; X64-NEXT: pcmpeqd %xmm2, %xmm2
235 ; X64-NEXT: pxor %xmm2, %xmm0
236 ; X64-NEXT: pand %xmm1, %xmm0
238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1>
239 %res = and <2 x i64> %not, %a1
243 define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
244 ; X32-LABEL: test_mm_avg_epu8:
246 ; X32-NEXT: pavgb %xmm1, %xmm0
249 ; X64-LABEL: test_mm_avg_epu8:
251 ; X64-NEXT: pavgb %xmm1, %xmm0
253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
255 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1)
256 %bc = bitcast <16 x i8> %res to <2 x i64>
259 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone
261 define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
262 ; X32-LABEL: test_mm_avg_epu16:
264 ; X32-NEXT: pavgw %xmm1, %xmm0
267 ; X64-LABEL: test_mm_avg_epu16:
269 ; X64-NEXT: pavgw %xmm1, %xmm0
271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
272 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
273 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1)
274 %bc = bitcast <8 x i16> %res to <2 x i64>
277 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
279 define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind {
280 ; X32-LABEL: test_mm_bslli_si128:
282 ; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
285 ; X64-LABEL: test_mm_bslli_si128:
287 ; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
290 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
291 %bc = bitcast <16 x i8> %res to <2 x i64>
295 define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind {
296 ; X32-LABEL: test_mm_bsrli_si128:
298 ; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
301 ; X64-LABEL: test_mm_bsrli_si128:
303 ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
305 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
306 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
307 %bc = bitcast <16 x i8> %res to <2 x i64>
311 define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind {
312 ; X32-LABEL: test_mm_castpd_ps:
316 ; X64-LABEL: test_mm_castpd_ps:
319 %res = bitcast <2 x double> %a0 to <4 x float>
323 define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind {
324 ; X32-LABEL: test_mm_castpd_si128:
328 ; X64-LABEL: test_mm_castpd_si128:
331 %res = bitcast <2 x double> %a0 to <2 x i64>
335 define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind {
336 ; X32-LABEL: test_mm_castps_pd:
340 ; X64-LABEL: test_mm_castps_pd:
343 %res = bitcast <4 x float> %a0 to <2 x double>
344 ret <2 x double> %res
347 define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind {
348 ; X32-LABEL: test_mm_castps_si128:
352 ; X64-LABEL: test_mm_castps_si128:
355 %res = bitcast <4 x float> %a0 to <2 x i64>
359 define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind {
360 ; X32-LABEL: test_mm_castsi128_pd:
364 ; X64-LABEL: test_mm_castsi128_pd:
367 %res = bitcast <2 x i64> %a0 to <2 x double>
368 ret <2 x double> %res
371 define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind {
372 ; X32-LABEL: test_mm_castsi128_ps:
376 ; X64-LABEL: test_mm_castsi128_ps:
379 %res = bitcast <2 x i64> %a0 to <4 x float>
383 define void @test_mm_clflush(i8* %a0) nounwind {
384 ; X32-LABEL: test_mm_clflush:
386 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
387 ; X32-NEXT: clflush (%eax)
390 ; X64-LABEL: test_mm_clflush:
392 ; X64-NEXT: clflush (%rdi)
394 call void @llvm.x86.sse2.clflush(i8* %a0)
397 declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone
399 define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
400 ; X32-LABEL: test_mm_cmpeq_epi8:
402 ; X32-NEXT: pcmpeqb %xmm1, %xmm0
405 ; X64-LABEL: test_mm_cmpeq_epi8:
407 ; X64-NEXT: pcmpeqb %xmm1, %xmm0
409 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
410 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
411 %cmp = icmp eq <16 x i8> %arg0, %arg1
412 %res = sext <16 x i1> %cmp to <16 x i8>
413 %bc = bitcast <16 x i8> %res to <2 x i64>
417 define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
418 ; X32-LABEL: test_mm_cmpeq_epi16:
420 ; X32-NEXT: pcmpeqw %xmm1, %xmm0
423 ; X64-LABEL: test_mm_cmpeq_epi16:
425 ; X64-NEXT: pcmpeqw %xmm1, %xmm0
427 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
428 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
429 %cmp = icmp eq <8 x i16> %arg0, %arg1
430 %res = sext <8 x i1> %cmp to <8 x i16>
431 %bc = bitcast <8 x i16> %res to <2 x i64>
435 define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
436 ; X32-LABEL: test_mm_cmpeq_epi32:
438 ; X32-NEXT: pcmpeqd %xmm1, %xmm0
441 ; X64-LABEL: test_mm_cmpeq_epi32:
443 ; X64-NEXT: pcmpeqd %xmm1, %xmm0
445 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
446 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
447 %cmp = icmp eq <4 x i32> %arg0, %arg1
448 %res = sext <4 x i1> %cmp to <4 x i32>
449 %bc = bitcast <4 x i32> %res to <2 x i64>
453 define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
454 ; X32-LABEL: test_mm_cmpeq_pd:
456 ; X32-NEXT: cmpeqpd %xmm1, %xmm0
459 ; X64-LABEL: test_mm_cmpeq_pd:
461 ; X64-NEXT: cmpeqpd %xmm1, %xmm0
463 %fcmp = fcmp oeq <2 x double> %a0, %a1
464 %sext = sext <2 x i1> %fcmp to <2 x i64>
465 %res = bitcast <2 x i64> %sext to <2 x double>
466 ret <2 x double> %res
469 define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
470 ; X32-LABEL: test_mm_cmpeq_sd:
472 ; X32-NEXT: cmpeqsd %xmm1, %xmm0
475 ; X64-LABEL: test_mm_cmpeq_sd:
477 ; X64-NEXT: cmpeqsd %xmm1, %xmm0
479 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
480 ret <2 x double> %res
482 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
484 define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
485 ; X32-LABEL: test_mm_cmpge_pd:
487 ; X32-NEXT: cmplepd %xmm0, %xmm1
488 ; X32-NEXT: movapd %xmm1, %xmm0
491 ; X64-LABEL: test_mm_cmpge_pd:
493 ; X64-NEXT: cmplepd %xmm0, %xmm1
494 ; X64-NEXT: movapd %xmm1, %xmm0
496 %fcmp = fcmp ole <2 x double> %a1, %a0
497 %sext = sext <2 x i1> %fcmp to <2 x i64>
498 %res = bitcast <2 x i64> %sext to <2 x double>
499 ret <2 x double> %res
502 define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
503 ; X32-LABEL: test_mm_cmpge_sd:
505 ; X32-NEXT: cmplesd %xmm0, %xmm1
506 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
509 ; X64-LABEL: test_mm_cmpge_sd:
511 ; X64-NEXT: cmplesd %xmm0, %xmm1
512 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
514 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2)
515 %ext0 = extractelement <2 x double> %cmp, i32 0
516 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
517 %ext1 = extractelement <2 x double> %a0, i32 1
518 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
519 ret <2 x double> %ins1
522 define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
523 ; X32-LABEL: test_mm_cmpgt_epi8:
525 ; X32-NEXT: pcmpgtb %xmm1, %xmm0
528 ; X64-LABEL: test_mm_cmpgt_epi8:
530 ; X64-NEXT: pcmpgtb %xmm1, %xmm0
532 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
533 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
534 %cmp = icmp sgt <16 x i8> %arg0, %arg1
535 %res = sext <16 x i1> %cmp to <16 x i8>
536 %bc = bitcast <16 x i8> %res to <2 x i64>
540 define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
541 ; X32-LABEL: test_mm_cmpgt_epi16:
543 ; X32-NEXT: pcmpgtw %xmm1, %xmm0
546 ; X64-LABEL: test_mm_cmpgt_epi16:
548 ; X64-NEXT: pcmpgtw %xmm1, %xmm0
550 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
551 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
552 %cmp = icmp sgt <8 x i16> %arg0, %arg1
553 %res = sext <8 x i1> %cmp to <8 x i16>
554 %bc = bitcast <8 x i16> %res to <2 x i64>
558 define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
559 ; X32-LABEL: test_mm_cmpgt_epi32:
561 ; X32-NEXT: pcmpgtd %xmm1, %xmm0
564 ; X64-LABEL: test_mm_cmpgt_epi32:
566 ; X64-NEXT: pcmpgtd %xmm1, %xmm0
568 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
569 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
570 %cmp = icmp sgt <4 x i32> %arg0, %arg1
571 %res = sext <4 x i1> %cmp to <4 x i32>
572 %bc = bitcast <4 x i32> %res to <2 x i64>
576 define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
577 ; X32-LABEL: test_mm_cmpgt_pd:
579 ; X32-NEXT: cmpltpd %xmm0, %xmm1
580 ; X32-NEXT: movapd %xmm1, %xmm0
583 ; X64-LABEL: test_mm_cmpgt_pd:
585 ; X64-NEXT: cmpltpd %xmm0, %xmm1
586 ; X64-NEXT: movapd %xmm1, %xmm0
588 %fcmp = fcmp olt <2 x double> %a1, %a0
589 %sext = sext <2 x i1> %fcmp to <2 x i64>
590 %res = bitcast <2 x i64> %sext to <2 x double>
591 ret <2 x double> %res
594 define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
595 ; X32-LABEL: test_mm_cmpgt_sd:
597 ; X32-NEXT: cmpltsd %xmm0, %xmm1
598 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
601 ; X64-LABEL: test_mm_cmpgt_sd:
603 ; X64-NEXT: cmpltsd %xmm0, %xmm1
604 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
606 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1)
607 %ext0 = extractelement <2 x double> %cmp, i32 0
608 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
609 %ext1 = extractelement <2 x double> %a0, i32 1
610 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
611 ret <2 x double> %ins1
614 define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
615 ; X32-LABEL: test_mm_cmple_pd:
617 ; X32-NEXT: cmplepd %xmm1, %xmm0
620 ; X64-LABEL: test_mm_cmple_pd:
622 ; X64-NEXT: cmplepd %xmm1, %xmm0
624 %fcmp = fcmp ole <2 x double> %a0, %a1
625 %sext = sext <2 x i1> %fcmp to <2 x i64>
626 %res = bitcast <2 x i64> %sext to <2 x double>
627 ret <2 x double> %res
630 define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
631 ; X32-LABEL: test_mm_cmple_sd:
633 ; X32-NEXT: cmplesd %xmm1, %xmm0
636 ; X64-LABEL: test_mm_cmple_sd:
638 ; X64-NEXT: cmplesd %xmm1, %xmm0
640 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2)
641 ret <2 x double> %res
644 define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
645 ; X32-LABEL: test_mm_cmplt_epi8:
647 ; X32-NEXT: pcmpgtb %xmm0, %xmm1
648 ; X32-NEXT: movdqa %xmm1, %xmm0
651 ; X64-LABEL: test_mm_cmplt_epi8:
653 ; X64-NEXT: pcmpgtb %xmm0, %xmm1
654 ; X64-NEXT: movdqa %xmm1, %xmm0
656 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
657 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
658 %cmp = icmp sgt <16 x i8> %arg1, %arg0
659 %res = sext <16 x i1> %cmp to <16 x i8>
660 %bc = bitcast <16 x i8> %res to <2 x i64>
664 define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
665 ; X32-LABEL: test_mm_cmplt_epi16:
667 ; X32-NEXT: pcmpgtw %xmm0, %xmm1
668 ; X32-NEXT: movdqa %xmm1, %xmm0
671 ; X64-LABEL: test_mm_cmplt_epi16:
673 ; X64-NEXT: pcmpgtw %xmm0, %xmm1
674 ; X64-NEXT: movdqa %xmm1, %xmm0
676 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
677 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
678 %cmp = icmp sgt <8 x i16> %arg1, %arg0
679 %res = sext <8 x i1> %cmp to <8 x i16>
680 %bc = bitcast <8 x i16> %res to <2 x i64>
684 define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
685 ; X32-LABEL: test_mm_cmplt_epi32:
687 ; X32-NEXT: pcmpgtd %xmm0, %xmm1
688 ; X32-NEXT: movdqa %xmm1, %xmm0
691 ; X64-LABEL: test_mm_cmplt_epi32:
693 ; X64-NEXT: pcmpgtd %xmm0, %xmm1
694 ; X64-NEXT: movdqa %xmm1, %xmm0
696 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
697 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
698 %cmp = icmp sgt <4 x i32> %arg1, %arg0
699 %res = sext <4 x i1> %cmp to <4 x i32>
700 %bc = bitcast <4 x i32> %res to <2 x i64>
704 define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
705 ; X32-LABEL: test_mm_cmplt_pd:
707 ; X32-NEXT: cmpltpd %xmm1, %xmm0
710 ; X64-LABEL: test_mm_cmplt_pd:
712 ; X64-NEXT: cmpltpd %xmm1, %xmm0
714 %fcmp = fcmp olt <2 x double> %a0, %a1
715 %sext = sext <2 x i1> %fcmp to <2 x i64>
716 %res = bitcast <2 x i64> %sext to <2 x double>
717 ret <2 x double> %res
720 define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
721 ; X32-LABEL: test_mm_cmplt_sd:
723 ; X32-NEXT: cmpltsd %xmm1, %xmm0
726 ; X64-LABEL: test_mm_cmplt_sd:
728 ; X64-NEXT: cmpltsd %xmm1, %xmm0
730 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1)
731 ret <2 x double> %res
734 define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
735 ; X32-LABEL: test_mm_cmpneq_pd:
737 ; X32-NEXT: cmpneqpd %xmm1, %xmm0
740 ; X64-LABEL: test_mm_cmpneq_pd:
742 ; X64-NEXT: cmpneqpd %xmm1, %xmm0
744 %fcmp = fcmp une <2 x double> %a0, %a1
745 %sext = sext <2 x i1> %fcmp to <2 x i64>
746 %res = bitcast <2 x i64> %sext to <2 x double>
747 ret <2 x double> %res
750 define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
751 ; X32-LABEL: test_mm_cmpneq_sd:
753 ; X32-NEXT: cmpneqsd %xmm1, %xmm0
756 ; X64-LABEL: test_mm_cmpneq_sd:
758 ; X64-NEXT: cmpneqsd %xmm1, %xmm0
760 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4)
761 ret <2 x double> %res
764 define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
765 ; X32-LABEL: test_mm_cmpnge_pd:
767 ; X32-NEXT: cmpnlepd %xmm0, %xmm1
768 ; X32-NEXT: movapd %xmm1, %xmm0
771 ; X64-LABEL: test_mm_cmpnge_pd:
773 ; X64-NEXT: cmpnlepd %xmm0, %xmm1
774 ; X64-NEXT: movapd %xmm1, %xmm0
776 %fcmp = fcmp ugt <2 x double> %a1, %a0
777 %sext = sext <2 x i1> %fcmp to <2 x i64>
778 %res = bitcast <2 x i64> %sext to <2 x double>
779 ret <2 x double> %res
782 define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
783 ; X32-LABEL: test_mm_cmpnge_sd:
785 ; X32-NEXT: cmpnlesd %xmm0, %xmm1
786 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
789 ; X64-LABEL: test_mm_cmpnge_sd:
791 ; X64-NEXT: cmpnlesd %xmm0, %xmm1
792 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
794 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6)
795 %ext0 = extractelement <2 x double> %cmp, i32 0
796 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
797 %ext1 = extractelement <2 x double> %a0, i32 1
798 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
799 ret <2 x double> %ins1
802 define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
803 ; X32-LABEL: test_mm_cmpngt_pd:
805 ; X32-NEXT: cmpnltpd %xmm0, %xmm1
806 ; X32-NEXT: movapd %xmm1, %xmm0
809 ; X64-LABEL: test_mm_cmpngt_pd:
811 ; X64-NEXT: cmpnltpd %xmm0, %xmm1
812 ; X64-NEXT: movapd %xmm1, %xmm0
814 %fcmp = fcmp uge <2 x double> %a1, %a0
815 %sext = sext <2 x i1> %fcmp to <2 x i64>
816 %res = bitcast <2 x i64> %sext to <2 x double>
817 ret <2 x double> %res
820 define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
821 ; X32-LABEL: test_mm_cmpngt_sd:
823 ; X32-NEXT: cmpnltsd %xmm0, %xmm1
824 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
827 ; X64-LABEL: test_mm_cmpngt_sd:
829 ; X64-NEXT: cmpnltsd %xmm0, %xmm1
830 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
832 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5)
833 %ext0 = extractelement <2 x double> %cmp, i32 0
834 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
835 %ext1 = extractelement <2 x double> %a0, i32 1
836 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
837 ret <2 x double> %ins1
840 define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
841 ; X32-LABEL: test_mm_cmpnle_pd:
843 ; X32-NEXT: cmpnlepd %xmm1, %xmm0
846 ; X64-LABEL: test_mm_cmpnle_pd:
848 ; X64-NEXT: cmpnlepd %xmm1, %xmm0
850 %fcmp = fcmp ugt <2 x double> %a0, %a1
851 %sext = sext <2 x i1> %fcmp to <2 x i64>
852 %res = bitcast <2 x i64> %sext to <2 x double>
853 ret <2 x double> %res
856 define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
857 ; X32-LABEL: test_mm_cmpnle_sd:
859 ; X32-NEXT: cmpnlesd %xmm1, %xmm0
862 ; X64-LABEL: test_mm_cmpnle_sd:
864 ; X64-NEXT: cmpnlesd %xmm1, %xmm0
866 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6)
867 ret <2 x double> %res
870 define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
871 ; X32-LABEL: test_mm_cmpnlt_pd:
873 ; X32-NEXT: cmpnltpd %xmm1, %xmm0
876 ; X64-LABEL: test_mm_cmpnlt_pd:
878 ; X64-NEXT: cmpnltpd %xmm1, %xmm0
880 %fcmp = fcmp uge <2 x double> %a0, %a1
881 %sext = sext <2 x i1> %fcmp to <2 x i64>
882 %res = bitcast <2 x i64> %sext to <2 x double>
883 ret <2 x double> %res
886 define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
887 ; X32-LABEL: test_mm_cmpnlt_sd:
889 ; X32-NEXT: cmpnltsd %xmm1, %xmm0
892 ; X64-LABEL: test_mm_cmpnlt_sd:
894 ; X64-NEXT: cmpnltsd %xmm1, %xmm0
896 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5)
897 ret <2 x double> %res
900 define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
901 ; X32-LABEL: test_mm_cmpord_pd:
903 ; X32-NEXT: cmpordpd %xmm1, %xmm0
906 ; X64-LABEL: test_mm_cmpord_pd:
908 ; X64-NEXT: cmpordpd %xmm1, %xmm0
910 %fcmp = fcmp ord <2 x double> %a0, %a1
911 %sext = sext <2 x i1> %fcmp to <2 x i64>
912 %res = bitcast <2 x i64> %sext to <2 x double>
913 ret <2 x double> %res
916 define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
917 ; X32-LABEL: test_mm_cmpord_sd:
919 ; X32-NEXT: cmpordsd %xmm1, %xmm0
922 ; X64-LABEL: test_mm_cmpord_sd:
924 ; X64-NEXT: cmpordsd %xmm1, %xmm0
926 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7)
927 ret <2 x double> %res
930 define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
931 ; X32-LABEL: test_mm_cmpunord_pd:
933 ; X32-NEXT: cmpunordpd %xmm1, %xmm0
936 ; X64-LABEL: test_mm_cmpunord_pd:
938 ; X64-NEXT: cmpunordpd %xmm1, %xmm0
940 %fcmp = fcmp uno <2 x double> %a0, %a1
941 %sext = sext <2 x i1> %fcmp to <2 x i64>
942 %res = bitcast <2 x i64> %sext to <2 x double>
943 ret <2 x double> %res
946 define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
947 ; X32-LABEL: test_mm_cmpunord_sd:
949 ; X32-NEXT: cmpunordsd %xmm1, %xmm0
952 ; X64-LABEL: test_mm_cmpunord_sd:
954 ; X64-NEXT: cmpunordsd %xmm1, %xmm0
956 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3)
957 ret <2 x double> %res
960 define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
961 ; X32-LABEL: test_mm_comieq_sd:
963 ; X32-NEXT: comisd %xmm1, %xmm0
964 ; X32-NEXT: setnp %al
966 ; X32-NEXT: andb %al, %cl
967 ; X32-NEXT: movzbl %cl, %eax
970 ; X64-LABEL: test_mm_comieq_sd:
972 ; X64-NEXT: comisd %xmm1, %xmm0
973 ; X64-NEXT: setnp %al
975 ; X64-NEXT: andb %al, %cl
976 ; X64-NEXT: movzbl %cl, %eax
978 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
981 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
983 define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
984 ; X32-LABEL: test_mm_comige_sd:
986 ; X32-NEXT: xorl %eax, %eax
987 ; X32-NEXT: comisd %xmm1, %xmm0
988 ; X32-NEXT: setae %al
991 ; X64-LABEL: test_mm_comige_sd:
993 ; X64-NEXT: xorl %eax, %eax
994 ; X64-NEXT: comisd %xmm1, %xmm0
995 ; X64-NEXT: setae %al
997 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1)
1000 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
1002 define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1003 ; X32-LABEL: test_mm_comigt_sd:
1005 ; X32-NEXT: xorl %eax, %eax
1006 ; X32-NEXT: comisd %xmm1, %xmm0
1007 ; X32-NEXT: seta %al
1010 ; X64-LABEL: test_mm_comigt_sd:
1012 ; X64-NEXT: xorl %eax, %eax
1013 ; X64-NEXT: comisd %xmm1, %xmm0
1014 ; X64-NEXT: seta %al
1016 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1)
1019 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
1021 define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1022 ; X32-LABEL: test_mm_comile_sd:
1024 ; X32-NEXT: xorl %eax, %eax
1025 ; X32-NEXT: comisd %xmm0, %xmm1
1026 ; X32-NEXT: setae %al
1029 ; X64-LABEL: test_mm_comile_sd:
1031 ; X64-NEXT: xorl %eax, %eax
1032 ; X64-NEXT: comisd %xmm0, %xmm1
1033 ; X64-NEXT: setae %al
1035 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1)
1038 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
1040 define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1041 ; X32-LABEL: test_mm_comilt_sd:
1043 ; X32-NEXT: xorl %eax, %eax
1044 ; X32-NEXT: comisd %xmm0, %xmm1
1045 ; X32-NEXT: seta %al
1048 ; X64-LABEL: test_mm_comilt_sd:
1050 ; X64-NEXT: xorl %eax, %eax
1051 ; X64-NEXT: comisd %xmm0, %xmm1
1052 ; X64-NEXT: seta %al
1054 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1)
1057 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
1059 define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1060 ; X32-LABEL: test_mm_comineq_sd:
1062 ; X32-NEXT: comisd %xmm1, %xmm0
1063 ; X32-NEXT: setp %al
1064 ; X32-NEXT: setne %cl
1065 ; X32-NEXT: orb %al, %cl
1066 ; X32-NEXT: movzbl %cl, %eax
1069 ; X64-LABEL: test_mm_comineq_sd:
1071 ; X64-NEXT: comisd %xmm1, %xmm0
1072 ; X64-NEXT: setp %al
1073 ; X64-NEXT: setne %cl
1074 ; X64-NEXT: orb %al, %cl
1075 ; X64-NEXT: movzbl %cl, %eax
1077 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1)
1080 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
1082 define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind {
1083 ; X32-LABEL: test_mm_cvtepi32_pd:
1085 ; X32-NEXT: cvtdq2pd %xmm0, %xmm0
1088 ; X64-LABEL: test_mm_cvtepi32_pd:
1090 ; X64-NEXT: cvtdq2pd %xmm0, %xmm0
1092 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1093 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1>
1094 %res = sitofp <2 x i32> %ext to <2 x double>
1095 ret <2 x double> %res
1098 define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind {
1099 ; X32-LABEL: test_mm_cvtepi32_ps:
1101 ; X32-NEXT: cvtdq2ps %xmm0, %xmm0
1104 ; X64-LABEL: test_mm_cvtepi32_ps:
1106 ; X64-NEXT: cvtdq2ps %xmm0, %xmm0
1108 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1109 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0)
1110 ret <4 x float> %res
1112 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
1114 define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind {
1115 ; X32-LABEL: test_mm_cvtpd_epi32:
1117 ; X32-NEXT: cvtpd2dq %xmm0, %xmm0
1120 ; X64-LABEL: test_mm_cvtpd_epi32:
1122 ; X64-NEXT: cvtpd2dq %xmm0, %xmm0
1124 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
1125 %bc = bitcast <4 x i32> %res to <2 x i64>
1128 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
1130 define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind {
1131 ; X32-LABEL: test_mm_cvtpd_ps:
1133 ; X32-NEXT: cvtpd2ps %xmm0, %xmm0
1136 ; X64-LABEL: test_mm_cvtpd_ps:
1138 ; X64-NEXT: cvtpd2ps %xmm0, %xmm0
1140 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
1141 ret <4 x float> %res
1143 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
1145 define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind {
1146 ; X32-LABEL: test_mm_cvtps_epi32:
1148 ; X32-NEXT: cvtps2dq %xmm0, %xmm0
1151 ; X64-LABEL: test_mm_cvtps_epi32:
1153 ; X64-NEXT: cvtps2dq %xmm0, %xmm0
1155 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
1156 %bc = bitcast <4 x i32> %res to <2 x i64>
1159 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
1161 define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind {
1162 ; X32-LABEL: test_mm_cvtps_pd:
1164 ; X32-NEXT: cvtps2pd %xmm0, %xmm0
1167 ; X64-LABEL: test_mm_cvtps_pd:
1169 ; X64-NEXT: cvtps2pd %xmm0, %xmm0
1171 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1>
1172 %res = fpext <2 x float> %ext to <2 x double>
1173 ret <2 x double> %res
1176 define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind {
1177 ; X32-LABEL: test_mm_cvtsd_f64:
1179 ; X32-NEXT: pushl %ebp
1180 ; X32-NEXT: movl %esp, %ebp
1181 ; X32-NEXT: andl $-8, %esp
1182 ; X32-NEXT: subl $8, %esp
1183 ; X32-NEXT: movlps %xmm0, (%esp)
1184 ; X32-NEXT: fldl (%esp)
1185 ; X32-NEXT: movl %ebp, %esp
1186 ; X32-NEXT: popl %ebp
1189 ; X64-LABEL: test_mm_cvtsd_f64:
1192 %res = extractelement <2 x double> %a0, i32 0
1196 define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind {
1197 ; X32-LABEL: test_mm_cvtsd_si32:
1199 ; X32-NEXT: cvtsd2si %xmm0, %eax
1202 ; X64-LABEL: test_mm_cvtsd_si32:
1204 ; X64-NEXT: cvtsd2si %xmm0, %eax
1206 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
1209 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
1211 define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) {
1212 ; X32-LABEL: test_mm_cvtsd_ss:
1214 ; X32-NEXT: cvtsd2ss %xmm1, %xmm0
1217 ; X64-LABEL: test_mm_cvtsd_ss:
1219 ; X64-NEXT: cvtsd2ss %xmm1, %xmm0
1221 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1222 ret <4 x float> %res
1224 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
1226 define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) {
1227 ; X32-LABEL: test_mm_cvtsd_ss_load:
1229 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1230 ; X32-NEXT: cvtsd2ss (%eax), %xmm0
1233 ; X64-LABEL: test_mm_cvtsd_ss_load:
1235 ; X64-NEXT: cvtsd2ss (%rdi), %xmm0
1237 %a1 = load <2 x double>, <2 x double>* %p1
1238 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1)
1239 ret <4 x float> %res
1242 define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind {
1243 ; X32-LABEL: test_mm_cvtsi128_si32:
1245 ; X32-NEXT: movd %xmm0, %eax
1248 ; X64-LABEL: test_mm_cvtsi128_si32:
1250 ; X64-NEXT: movd %xmm0, %eax
1252 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1253 %res = extractelement <4 x i32> %arg0, i32 0
1257 define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind {
1258 ; X32-LABEL: test_mm_cvtsi32_sd:
1260 ; X32-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0
1263 ; X64-LABEL: test_mm_cvtsi32_sd:
1265 ; X64-NEXT: cvtsi2sdl %edi, %xmm0
1267 %cvt = sitofp i32 %a1 to double
1268 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1269 ret <2 x double> %res
1272 define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind {
1273 ; X32-LABEL: test_mm_cvtsi32_si128:
1275 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1278 ; X64-LABEL: test_mm_cvtsi32_si128:
1280 ; X64-NEXT: movd %edi, %xmm0
1282 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
1283 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1
1284 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2
1285 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3
1286 %res = bitcast <4 x i32> %res3 to <2 x i64>
1290 define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind {
1291 ; X32-LABEL: test_mm_cvtss_sd:
1293 ; X32-NEXT: cvtss2sd %xmm1, %xmm0
1296 ; X64-LABEL: test_mm_cvtss_sd:
1298 ; X64-NEXT: cvtss2sd %xmm1, %xmm0
1300 %ext = extractelement <4 x float> %a1, i32 0
1301 %cvt = fpext float %ext to double
1302 %res = insertelement <2 x double> %a0, double %cvt, i32 0
1303 ret <2 x double> %res
1306 define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind {
1307 ; X32-LABEL: test_mm_cvttpd_epi32:
1309 ; X32-NEXT: cvttpd2dq %xmm0, %xmm0
1312 ; X64-LABEL: test_mm_cvttpd_epi32:
1314 ; X64-NEXT: cvttpd2dq %xmm0, %xmm0
1316 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
1317 %bc = bitcast <4 x i32> %res to <2 x i64>
1320 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
1322 define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind {
1323 ; X32-LABEL: test_mm_cvttps_epi32:
1325 ; X32-NEXT: cvttps2dq %xmm0, %xmm0
1328 ; X64-LABEL: test_mm_cvttps_epi32:
1330 ; X64-NEXT: cvttps2dq %xmm0, %xmm0
1332 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0)
1333 %bc = bitcast <4 x i32> %res to <2 x i64>
1336 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
1338 define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind {
1339 ; X32-LABEL: test_mm_cvttsd_si32:
1341 ; X32-NEXT: cvttsd2si %xmm0, %eax
1344 ; X64-LABEL: test_mm_cvttsd_si32:
1346 ; X64-NEXT: cvttsd2si %xmm0, %eax
1348 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
1351 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
1353 define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1354 ; X32-LABEL: test_mm_div_pd:
1356 ; X32-NEXT: divpd %xmm1, %xmm0
1359 ; X64-LABEL: test_mm_div_pd:
1361 ; X64-NEXT: divpd %xmm1, %xmm0
1363 %res = fdiv <2 x double> %a0, %a1
1364 ret <2 x double> %res
1367 define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1368 ; X32-LABEL: test_mm_div_sd:
1370 ; X32-NEXT: divsd %xmm1, %xmm0
1373 ; X64-LABEL: test_mm_div_sd:
1375 ; X64-NEXT: divsd %xmm1, %xmm0
1377 %ext0 = extractelement <2 x double> %a0, i32 0
1378 %ext1 = extractelement <2 x double> %a1, i32 0
1379 %fdiv = fdiv double %ext0, %ext1
1380 %res = insertelement <2 x double> %a0, double %fdiv, i32 0
1381 ret <2 x double> %res
1384 define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind {
1385 ; X32-LABEL: test_mm_extract_epi16:
1387 ; X32-NEXT: pextrw $1, %xmm0, %eax
1388 ; X32-NEXT: movzwl %ax, %eax
1391 ; X64-LABEL: test_mm_extract_epi16:
1393 ; X64-NEXT: pextrw $1, %xmm0, %eax
1394 ; X64-NEXT: movzwl %ax, %eax
1396 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1397 %ext = extractelement <8 x i16> %arg0, i32 1
1398 %res = zext i16 %ext to i32
1402 define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind {
1403 ; X32-LABEL: test_mm_insert_epi16:
1405 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
1406 ; X32-NEXT: pinsrw $1, %eax, %xmm0
1409 ; X64-LABEL: test_mm_insert_epi16:
1411 ; X64-NEXT: pinsrw $1, %edi, %xmm0
1413 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1414 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1
1415 %bc = bitcast <8 x i16> %res to <2 x i64>
1419 define void @test_mm_lfence() nounwind {
1420 ; X32-LABEL: test_mm_lfence:
1425 ; X64-LABEL: test_mm_lfence:
1429 call void @llvm.x86.sse2.lfence()
1432 declare void @llvm.x86.sse2.lfence() nounwind readnone
1434 define <2 x double> @test_mm_load_pd(double* %a0) nounwind {
1435 ; X32-LABEL: test_mm_load_pd:
1437 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1438 ; X32-NEXT: movaps (%eax), %xmm0
1441 ; X64-LABEL: test_mm_load_pd:
1443 ; X64-NEXT: movaps (%rdi), %xmm0
1445 %arg0 = bitcast double* %a0 to <2 x double>*
1446 %res = load <2 x double>, <2 x double>* %arg0, align 16
1447 ret <2 x double> %res
1450 define <2 x double> @test_mm_load_sd(double* %a0) nounwind {
1451 ; X32-LABEL: test_mm_load_sd:
1453 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1454 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1457 ; X64-LABEL: test_mm_load_sd:
1459 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1461 %ld = load double, double* %a0, align 1
1462 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1463 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
1464 ret <2 x double> %res1
1467 define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind {
1468 ; X32-LABEL: test_mm_load_si128:
1470 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1471 ; X32-NEXT: movaps (%eax), %xmm0
1474 ; X64-LABEL: test_mm_load_si128:
1476 ; X64-NEXT: movaps (%rdi), %xmm0
1478 %res = load <2 x i64>, <2 x i64>* %a0, align 16
1482 define <2 x double> @test_mm_load1_pd(double* %a0) nounwind {
1483 ; X32-LABEL: test_mm_load1_pd:
1485 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1486 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1487 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1490 ; X64-LABEL: test_mm_load1_pd:
1492 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1493 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
1495 %ld = load double, double* %a0, align 8
1496 %res0 = insertelement <2 x double> undef, double %ld, i32 0
1497 %res1 = insertelement <2 x double> %res0, double %ld, i32 1
1498 ret <2 x double> %res1
1501 define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind {
1502 ; X32-LABEL: test_mm_loadh_pd:
1504 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1505 ; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1508 ; X64-LABEL: test_mm_loadh_pd:
1510 ; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
1512 %ld = load double, double* %a1, align 8
1513 %res = insertelement <2 x double> %a0, double %ld, i32 1
1514 ret <2 x double> %res
1517 define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind {
1518 ; X32-LABEL: test_mm_loadl_epi64:
1520 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1521 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1524 ; X64-LABEL: test_mm_loadl_epi64:
1526 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1528 %bc = bitcast <2 x i64>* %a1 to i64*
1529 %ld = load i64, i64* %bc, align 1
1530 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0
1531 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1
1535 define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind {
1536 ; X32-LABEL: test_mm_loadl_pd:
1538 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1539 ; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1542 ; X64-LABEL: test_mm_loadl_pd:
1544 ; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1]
1546 %ld = load double, double* %a1, align 8
1547 %res = insertelement <2 x double> %a0, double %ld, i32 0
1548 ret <2 x double> %res
1551 define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind {
1552 ; X32-LABEL: test_mm_loadr_pd:
1554 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1555 ; X32-NEXT: movapd (%eax), %xmm0
1556 ; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1559 ; X64-LABEL: test_mm_loadr_pd:
1561 ; X64-NEXT: movapd (%rdi), %xmm0
1562 ; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
1564 %arg0 = bitcast double* %a0 to <2 x double>*
1565 %ld = load <2 x double>, <2 x double>* %arg0, align 16
1566 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1567 ret <2 x double> %res
1570 define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind {
1571 ; X32-LABEL: test_mm_loadu_pd:
1573 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1574 ; X32-NEXT: movups (%eax), %xmm0
1577 ; X64-LABEL: test_mm_loadu_pd:
1579 ; X64-NEXT: movups (%rdi), %xmm0
1581 %arg0 = bitcast double* %a0 to <2 x double>*
1582 %res = load <2 x double>, <2 x double>* %arg0, align 1
1583 ret <2 x double> %res
1586 define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind {
1587 ; X32-LABEL: test_mm_loadu_si128:
1589 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
1590 ; X32-NEXT: movups (%eax), %xmm0
1593 ; X64-LABEL: test_mm_loadu_si128:
1595 ; X64-NEXT: movups (%rdi), %xmm0
1597 %res = load <2 x i64>, <2 x i64>* %a0, align 1
1601 define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1602 ; X32-LABEL: test_mm_madd_epi16:
1604 ; X32-NEXT: pmaddwd %xmm1, %xmm0
1607 ; X64-LABEL: test_mm_madd_epi16:
1609 ; X64-NEXT: pmaddwd %xmm1, %xmm0
1611 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1612 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1613 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1)
1614 %bc = bitcast <4 x i32> %res to <2 x i64>
1617 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
1619 define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind {
1620 ; X32-LABEL: test_mm_maskmoveu_si128:
1622 ; X32-NEXT: pushl %edi
1623 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
1624 ; X32-NEXT: maskmovdqu %xmm1, %xmm0
1625 ; X32-NEXT: popl %edi
1628 ; X64-LABEL: test_mm_maskmoveu_si128:
1630 ; X64-NEXT: maskmovdqu %xmm1, %xmm0
1632 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1633 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1634 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2)
1637 declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
1639 define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1640 ; X32-LABEL: test_mm_max_epi16:
1642 ; X32-NEXT: pmaxsw %xmm1, %xmm0
1645 ; X64-LABEL: test_mm_max_epi16:
1647 ; X64-NEXT: pmaxsw %xmm1, %xmm0
1649 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1650 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1651 %cmp = icmp sgt <8 x i16> %arg0, %arg1
1652 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1653 %bc = bitcast <8 x i16> %sel to <2 x i64>
1657 define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1658 ; X32-LABEL: test_mm_max_epu8:
1660 ; X32-NEXT: pmaxub %xmm1, %xmm0
1663 ; X64-LABEL: test_mm_max_epu8:
1665 ; X64-NEXT: pmaxub %xmm1, %xmm0
1667 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1668 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1669 %cmp = icmp ugt <16 x i8> %arg0, %arg1
1670 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1671 %bc = bitcast <16 x i8> %sel to <2 x i64>
1675 define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1676 ; X32-LABEL: test_mm_max_pd:
1678 ; X32-NEXT: maxpd %xmm1, %xmm0
1681 ; X64-LABEL: test_mm_max_pd:
1683 ; X64-NEXT: maxpd %xmm1, %xmm0
1685 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
1686 ret <2 x double> %res
1688 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
1690 define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1691 ; X32-LABEL: test_mm_max_sd:
1693 ; X32-NEXT: maxsd %xmm1, %xmm0
1696 ; X64-LABEL: test_mm_max_sd:
1698 ; X64-NEXT: maxsd %xmm1, %xmm0
1700 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
1701 ret <2 x double> %res
1703 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
1705 define void @test_mm_mfence() nounwind {
1706 ; X32-LABEL: test_mm_mfence:
1711 ; X64-LABEL: test_mm_mfence:
1715 call void @llvm.x86.sse2.mfence()
1718 declare void @llvm.x86.sse2.mfence() nounwind readnone
1720 define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1721 ; X32-LABEL: test_mm_min_epi16:
1723 ; X32-NEXT: pminsw %xmm1, %xmm0
1726 ; X64-LABEL: test_mm_min_epi16:
1728 ; X64-NEXT: pminsw %xmm1, %xmm0
1730 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1731 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1732 %cmp = icmp slt <8 x i16> %arg0, %arg1
1733 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1
1734 %bc = bitcast <8 x i16> %sel to <2 x i64>
1738 define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1739 ; X32-LABEL: test_mm_min_epu8:
1741 ; X32-NEXT: pminub %xmm1, %xmm0
1744 ; X64-LABEL: test_mm_min_epu8:
1746 ; X64-NEXT: pminub %xmm1, %xmm0
1748 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1749 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
1750 %cmp = icmp ult <16 x i8> %arg0, %arg1
1751 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1
1752 %bc = bitcast <16 x i8> %sel to <2 x i64>
1756 define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1757 ; X32-LABEL: test_mm_min_pd:
1759 ; X32-NEXT: minpd %xmm1, %xmm0
1762 ; X64-LABEL: test_mm_min_pd:
1764 ; X64-NEXT: minpd %xmm1, %xmm0
1766 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
1767 ret <2 x double> %res
1769 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
1771 define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1772 ; X32-LABEL: test_mm_min_sd:
1774 ; X32-NEXT: minsd %xmm1, %xmm0
1777 ; X64-LABEL: test_mm_min_sd:
1779 ; X64-NEXT: minsd %xmm1, %xmm0
1781 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
1782 ret <2 x double> %res
1784 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
1786 define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind {
1787 ; X32-LABEL: test_mm_move_epi64:
1789 ; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1792 ; X64-LABEL: test_mm_move_epi64:
1794 ; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
1796 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
1800 define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1801 ; X32-LABEL: test_mm_move_sd:
1803 ; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1806 ; X64-LABEL: test_mm_move_sd:
1808 ; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1810 %ext0 = extractelement <2 x double> %a1, i32 0
1811 %res0 = insertelement <2 x double> undef, double %ext0, i32 0
1812 %ext1 = extractelement <2 x double> %a0, i32 1
1813 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1
1814 ret <2 x double> %res1
1817 define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind {
1818 ; X32-LABEL: test_mm_movemask_epi8:
1820 ; X32-NEXT: pmovmskb %xmm0, %eax
1823 ; X64-LABEL: test_mm_movemask_epi8:
1825 ; X64-NEXT: pmovmskb %xmm0, %eax
1827 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
1828 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0)
1831 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
1833 define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind {
1834 ; X32-LABEL: test_mm_movemask_pd:
1836 ; X32-NEXT: movmskpd %xmm0, %eax
1839 ; X64-LABEL: test_mm_movemask_pd:
1841 ; X64-NEXT: movmskpd %xmm0, %eax
1843 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
1846 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
1848 define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) {
1849 ; X32-LABEL: test_mm_mul_epu32:
1851 ; X32-NEXT: pmuludq %xmm1, %xmm0
1854 ; X64-LABEL: test_mm_mul_epu32:
1856 ; X64-NEXT: pmuludq %xmm1, %xmm0
1858 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1859 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1860 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1)
1863 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
1865 define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1866 ; X32-LABEL: test_mm_mul_pd:
1868 ; X32-NEXT: mulpd %xmm1, %xmm0
1871 ; X64-LABEL: test_mm_mul_pd:
1873 ; X64-NEXT: mulpd %xmm1, %xmm0
1875 %res = fmul <2 x double> %a0, %a1
1876 ret <2 x double> %res
1879 define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
1880 ; X32-LABEL: test_mm_mul_sd:
1882 ; X32-NEXT: mulsd %xmm1, %xmm0
1885 ; X64-LABEL: test_mm_mul_sd:
1887 ; X64-NEXT: mulsd %xmm1, %xmm0
1889 %ext0 = extractelement <2 x double> %a0, i32 0
1890 %ext1 = extractelement <2 x double> %a1, i32 0
1891 %fmul = fmul double %ext0, %ext1
1892 %res = insertelement <2 x double> %a0, double %fmul, i32 0
1893 ret <2 x double> %res
1896 define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1897 ; X32-LABEL: test_mm_mulhi_epi16:
1899 ; X32-NEXT: pmulhw %xmm1, %xmm0
1902 ; X64-LABEL: test_mm_mulhi_epi16:
1904 ; X64-NEXT: pmulhw %xmm1, %xmm0
1906 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1907 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1908 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1)
1909 %bc = bitcast <8 x i16> %res to <2 x i64>
1912 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
1914 define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) {
1915 ; X32-LABEL: test_mm_mulhi_epu16:
1917 ; X32-NEXT: pmulhuw %xmm1, %xmm0
1920 ; X64-LABEL: test_mm_mulhi_epu16:
1922 ; X64-NEXT: pmulhuw %xmm1, %xmm0
1924 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1925 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1926 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1)
1927 %bc = bitcast <8 x i16> %res to <2 x i64>
1930 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
1932 define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1933 ; X32-LABEL: test_mm_mullo_epi16:
1935 ; X32-NEXT: pmullw %xmm1, %xmm0
1938 ; X64-LABEL: test_mm_mullo_epi16:
1940 ; X64-NEXT: pmullw %xmm1, %xmm0
1942 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1943 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1944 %res = mul <8 x i16> %arg0, %arg1
1945 %bc = bitcast <8 x i16> %res to <2 x i64>
1949 define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
1950 ; X32-LABEL: test_mm_or_pd:
1952 ; X32-NEXT: orps %xmm1, %xmm0
1955 ; X64-LABEL: test_mm_or_pd:
1957 ; X64-NEXT: orps %xmm1, %xmm0
1959 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
1960 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
1961 %res = or <4 x i32> %arg0, %arg1
1962 %bc = bitcast <4 x i32> %res to <2 x double>
1963 ret <2 x double> %bc
1966 define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
1967 ; X32-LABEL: test_mm_or_si128:
1969 ; X32-NEXT: orps %xmm1, %xmm0
1972 ; X64-LABEL: test_mm_or_si128:
1974 ; X64-NEXT: orps %xmm1, %xmm0
1976 %res = or <2 x i64> %a0, %a1
1980 define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
1981 ; X32-LABEL: test_mm_packs_epi16:
1983 ; X32-NEXT: packsswb %xmm1, %xmm0
1986 ; X64-LABEL: test_mm_packs_epi16:
1988 ; X64-NEXT: packsswb %xmm1, %xmm0
1990 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
1991 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
1992 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
1993 %bc = bitcast <16 x i8> %res to <2 x i64>
1996 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
1998 define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) {
1999 ; X32-LABEL: test_mm_packs_epi32:
2001 ; X32-NEXT: packssdw %xmm1, %xmm0
2004 ; X64-LABEL: test_mm_packs_epi32:
2006 ; X64-NEXT: packssdw %xmm1, %xmm0
2008 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2009 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2010 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1)
2011 %bc = bitcast <8 x i16> %res to <2 x i64>
2014 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
2016 define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2017 ; X32-LABEL: test_mm_packus_epi16:
2019 ; X32-NEXT: packuswb %xmm1, %xmm0
2022 ; X64-LABEL: test_mm_packus_epi16:
2024 ; X64-NEXT: packuswb %xmm1, %xmm0
2026 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2027 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2028 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1)
2029 %bc = bitcast <16 x i8> %res to <2 x i64>
2032 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
2034 define void @test_mm_pause() nounwind {
2035 ; X32-LABEL: test_mm_pause:
2040 ; X64-LABEL: test_mm_pause:
2044 call void @llvm.x86.sse2.pause()
2047 declare void @llvm.x86.sse2.pause() nounwind readnone
2049 define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
2050 ; X32-LABEL: test_mm_sad_epu8:
2052 ; X32-NEXT: psadbw %xmm1, %xmm0
2055 ; X64-LABEL: test_mm_sad_epu8:
2057 ; X64-NEXT: psadbw %xmm1, %xmm0
2059 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2060 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
2061 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1)
2064 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
2066 define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2067 ; X32-LABEL: test_mm_set_epi8:
2069 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2070 ; X32-NEXT: movd %eax, %xmm0
2071 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2072 ; X32-NEXT: movd %eax, %xmm1
2073 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2074 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2075 ; X32-NEXT: movd %eax, %xmm0
2076 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2077 ; X32-NEXT: movd %eax, %xmm2
2078 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2079 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2080 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2081 ; X32-NEXT: movd %eax, %xmm0
2082 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2083 ; X32-NEXT: movd %eax, %xmm3
2084 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2085 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2086 ; X32-NEXT: movd %eax, %xmm0
2087 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2088 ; X32-NEXT: movd %eax, %xmm1
2089 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2090 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2091 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2092 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2093 ; X32-NEXT: movd %eax, %xmm0
2094 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2095 ; X32-NEXT: movd %eax, %xmm2
2096 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2097 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2098 ; X32-NEXT: movd %eax, %xmm0
2099 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2100 ; X32-NEXT: movd %eax, %xmm3
2101 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2102 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2103 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2104 ; X32-NEXT: movd %eax, %xmm0
2105 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2106 ; X32-NEXT: movd %eax, %xmm2
2107 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2108 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2109 ; X32-NEXT: movd %eax, %xmm4
2110 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2111 ; X32-NEXT: movd %eax, %xmm0
2112 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2113 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2114 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2115 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2118 ; X64-LABEL: test_mm_set_epi8:
2120 ; X64-NEXT: movzbl %dil, %eax
2121 ; X64-NEXT: movd %eax, %xmm0
2122 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2123 ; X64-NEXT: movd %eax, %xmm1
2124 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2125 ; X64-NEXT: movzbl %r8b, %eax
2126 ; X64-NEXT: movd %eax, %xmm0
2127 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2128 ; X64-NEXT: movd %eax, %xmm2
2129 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2130 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2131 ; X64-NEXT: movzbl %dl, %eax
2132 ; X64-NEXT: movd %eax, %xmm0
2133 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2134 ; X64-NEXT: movd %eax, %xmm3
2135 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2136 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2137 ; X64-NEXT: movd %eax, %xmm0
2138 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2139 ; X64-NEXT: movd %eax, %xmm1
2140 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2141 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2142 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2143 ; X64-NEXT: movzbl %sil, %eax
2144 ; X64-NEXT: movd %eax, %xmm0
2145 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2146 ; X64-NEXT: movd %eax, %xmm2
2147 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2148 ; X64-NEXT: movzbl %r9b, %eax
2149 ; X64-NEXT: movd %eax, %xmm0
2150 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2151 ; X64-NEXT: movd %eax, %xmm3
2152 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2153 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2154 ; X64-NEXT: movzbl %cl, %eax
2155 ; X64-NEXT: movd %eax, %xmm0
2156 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2157 ; X64-NEXT: movd %eax, %xmm2
2158 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2159 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2160 ; X64-NEXT: movd %eax, %xmm4
2161 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2162 ; X64-NEXT: movd %eax, %xmm0
2163 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2164 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2165 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2166 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2168 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
2169 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
2170 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2
2171 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3
2172 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4
2173 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5
2174 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6
2175 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7
2176 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8
2177 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9
2178 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10
2179 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11
2180 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12
2181 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13
2182 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14
2183 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15
2184 %res = bitcast <16 x i8> %res15 to <2 x i64>
2188 define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2189 ; X32-LABEL: test_mm_set_epi16:
2191 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2192 ; X32-NEXT: movd %eax, %xmm1
2193 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2194 ; X32-NEXT: movd %eax, %xmm2
2195 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2196 ; X32-NEXT: movd %eax, %xmm3
2197 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2198 ; X32-NEXT: movd %eax, %xmm4
2199 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2200 ; X32-NEXT: movd %eax, %xmm5
2201 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2202 ; X32-NEXT: movd %eax, %xmm6
2203 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2204 ; X32-NEXT: movd %eax, %xmm7
2205 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2206 ; X32-NEXT: movd %eax, %xmm0
2207 ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2208 ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2209 ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2210 ; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2211 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2212 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2213 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2216 ; X64-LABEL: test_mm_set_epi16:
2218 ; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2219 ; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2220 ; X64-NEXT: movd %edi, %xmm0
2221 ; X64-NEXT: movd %r8d, %xmm1
2222 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2223 ; X64-NEXT: movd %edx, %xmm0
2224 ; X64-NEXT: movd %eax, %xmm2
2225 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2226 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2227 ; X64-NEXT: movd %esi, %xmm0
2228 ; X64-NEXT: movd %r9d, %xmm1
2229 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2230 ; X64-NEXT: movd %ecx, %xmm3
2231 ; X64-NEXT: movd %r10d, %xmm0
2232 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2233 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2234 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2236 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
2237 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
2238 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2
2239 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3
2240 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4
2241 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5
2242 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6
2243 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2244 %res = bitcast <8 x i16> %res7 to <2 x i64>
2248 define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2249 ; X32-LABEL: test_mm_set_epi32:
2251 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2252 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2253 ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2254 ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2255 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2256 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2257 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2260 ; X64-LABEL: test_mm_set_epi32:
2262 ; X64-NEXT: movd %edi, %xmm0
2263 ; X64-NEXT: movd %edx, %xmm1
2264 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2265 ; X64-NEXT: movd %esi, %xmm2
2266 ; X64-NEXT: movd %ecx, %xmm0
2267 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2268 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2270 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
2271 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
2272 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2
2273 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2274 %res = bitcast <4 x i32> %res3 to <2 x i64>
2278 ; TODO test_mm_set_epi64
2280 define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
2281 ; X32-LABEL: test_mm_set_epi64x:
2283 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2284 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2285 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2286 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2287 ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2288 ; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2289 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2292 ; X64-LABEL: test_mm_set_epi64x:
2294 ; X64-NEXT: movd %rdi, %xmm1
2295 ; X64-NEXT: movd %rsi, %xmm0
2296 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2298 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0
2299 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2303 define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind {
2304 ; X32-LABEL: test_mm_set_pd:
2306 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2307 ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2308 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2311 ; X64-LABEL: test_mm_set_pd:
2313 ; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2314 ; X64-NEXT: movapd %xmm1, %xmm0
2316 %res0 = insertelement <2 x double> undef, double %a1, i32 0
2317 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2318 ret <2 x double> %res1
2321 define <2 x double> @test_mm_set_sd(double %a0) nounwind {
2322 ; X32-LABEL: test_mm_set_sd:
2324 ; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2325 ; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2328 ; X64-LABEL: test_mm_set_sd:
2330 ; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2332 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2333 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1
2334 ret <2 x double> %res1
2337 define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind {
2338 ; X32-LABEL: test_mm_set1_epi8:
2340 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2341 ; X32-NEXT: movd %eax, %xmm0
2342 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2343 ; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2344 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2347 ; X64-LABEL: test_mm_set1_epi8:
2349 ; X64-NEXT: movzbl %dil, %eax
2350 ; X64-NEXT: movd %eax, %xmm0
2351 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2352 ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2353 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2355 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0
2356 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1
2357 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2
2358 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3
2359 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4
2360 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5
2361 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6
2362 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7
2363 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8
2364 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9
2365 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10
2366 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11
2367 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12
2368 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13
2369 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14
2370 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15
2371 %res = bitcast <16 x i8> %res15 to <2 x i64>
2375 define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind {
2376 ; X32-LABEL: test_mm_set1_epi16:
2378 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2379 ; X32-NEXT: movd %eax, %xmm0
2380 ; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2381 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2384 ; X64-LABEL: test_mm_set1_epi16:
2386 ; X64-NEXT: movd %edi, %xmm0
2387 ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2388 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2390 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2391 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1
2392 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2
2393 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3
2394 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4
2395 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5
2396 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6
2397 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7
2398 %res = bitcast <8 x i16> %res7 to <2 x i64>
2402 define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind {
2403 ; X32-LABEL: test_mm_set1_epi32:
2405 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2406 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2409 ; X64-LABEL: test_mm_set1_epi32:
2411 ; X64-NEXT: movd %edi, %xmm0
2412 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2414 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2415 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1
2416 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2
2417 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3
2418 %res = bitcast <4 x i32> %res3 to <2 x i64>
2422 ; TODO test_mm_set1_epi64
2424 define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
2425 ; X32-LABEL: test_mm_set1_epi64x:
2427 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2428 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2429 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2430 ; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
2431 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2434 ; X64-LABEL: test_mm_set1_epi64x:
2436 ; X64-NEXT: movd %rdi, %xmm0
2437 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
2439 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2440 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1
2444 define <2 x double> @test_mm_set1_pd(double %a0) nounwind {
2445 ; X32-LABEL: test_mm_set1_pd:
2447 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2448 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2451 ; X64-LABEL: test_mm_set1_pd:
2453 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2455 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2456 %res1 = insertelement <2 x double> %res0, double %a0, i32 1
2457 ret <2 x double> %res1
2460 define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
2461 ; X32-LABEL: test_mm_setr_epi8:
2463 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2464 ; X32-NEXT: movd %eax, %xmm0
2465 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2466 ; X32-NEXT: movd %eax, %xmm1
2467 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2468 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2469 ; X32-NEXT: movd %eax, %xmm0
2470 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2471 ; X32-NEXT: movd %eax, %xmm2
2472 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2473 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2474 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2475 ; X32-NEXT: movd %eax, %xmm0
2476 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2477 ; X32-NEXT: movd %eax, %xmm3
2478 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2479 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2480 ; X32-NEXT: movd %eax, %xmm0
2481 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2482 ; X32-NEXT: movd %eax, %xmm1
2483 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2484 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2485 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2486 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2487 ; X32-NEXT: movd %eax, %xmm0
2488 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2489 ; X32-NEXT: movd %eax, %xmm2
2490 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2491 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2492 ; X32-NEXT: movd %eax, %xmm0
2493 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2494 ; X32-NEXT: movd %eax, %xmm3
2495 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2496 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2497 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2498 ; X32-NEXT: movd %eax, %xmm0
2499 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2500 ; X32-NEXT: movd %eax, %xmm2
2501 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2502 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2503 ; X32-NEXT: movd %eax, %xmm4
2504 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
2505 ; X32-NEXT: movd %eax, %xmm0
2506 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2507 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2508 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2509 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2512 ; X64-LABEL: test_mm_setr_epi8:
2514 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2515 ; X64-NEXT: movd %eax, %xmm0
2516 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2517 ; X64-NEXT: movd %eax, %xmm1
2518 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2519 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2520 ; X64-NEXT: movd %eax, %xmm0
2521 ; X64-NEXT: movzbl %cl, %eax
2522 ; X64-NEXT: movd %eax, %xmm2
2523 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2524 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2525 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2526 ; X64-NEXT: movd %eax, %xmm0
2527 ; X64-NEXT: movzbl %r9b, %eax
2528 ; X64-NEXT: movd %eax, %xmm3
2529 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2530 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2531 ; X64-NEXT: movd %eax, %xmm0
2532 ; X64-NEXT: movzbl %sil, %eax
2533 ; X64-NEXT: movd %eax, %xmm1
2534 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2535 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2536 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2537 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2538 ; X64-NEXT: movd %eax, %xmm0
2539 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2540 ; X64-NEXT: movd %eax, %xmm2
2541 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2542 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2543 ; X64-NEXT: movd %eax, %xmm0
2544 ; X64-NEXT: movzbl %dl, %eax
2545 ; X64-NEXT: movd %eax, %xmm3
2546 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
2547 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2548 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2549 ; X64-NEXT: movd %eax, %xmm0
2550 ; X64-NEXT: movzbl %r8b, %eax
2551 ; X64-NEXT: movd %eax, %xmm2
2552 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2553 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
2554 ; X64-NEXT: movd %eax, %xmm4
2555 ; X64-NEXT: movzbl %dil, %eax
2556 ; X64-NEXT: movd %eax, %xmm0
2557 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2558 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2559 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2560 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2562 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
2563 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
2564 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2
2565 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3
2566 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4
2567 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5
2568 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6
2569 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7
2570 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8
2571 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9
2572 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10
2573 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11
2574 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12
2575 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13
2576 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14
2577 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15
2578 %res = bitcast <16 x i8> %res15 to <2 x i64>
2582 define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
2583 ; X32-LABEL: test_mm_setr_epi16:
2585 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2586 ; X32-NEXT: movd %eax, %xmm1
2587 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2588 ; X32-NEXT: movd %eax, %xmm2
2589 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2590 ; X32-NEXT: movd %eax, %xmm3
2591 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2592 ; X32-NEXT: movd %eax, %xmm4
2593 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2594 ; X32-NEXT: movd %eax, %xmm5
2595 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2596 ; X32-NEXT: movd %eax, %xmm6
2597 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2598 ; X32-NEXT: movd %eax, %xmm7
2599 ; X32-NEXT: movw {{[0-9]+}}(%esp), %ax
2600 ; X32-NEXT: movd %eax, %xmm0
2601 ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2602 ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2603 ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2604 ; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2605 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
2606 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2607 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
2610 ; X64-LABEL: test_mm_setr_epi16:
2612 ; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
2613 ; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
2614 ; X64-NEXT: movd %eax, %xmm0
2615 ; X64-NEXT: movd %ecx, %xmm1
2616 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2617 ; X64-NEXT: movd %r9d, %xmm0
2618 ; X64-NEXT: movd %esi, %xmm2
2619 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2620 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2621 ; X64-NEXT: movd %r10d, %xmm0
2622 ; X64-NEXT: movd %edx, %xmm1
2623 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2624 ; X64-NEXT: movd %r8d, %xmm3
2625 ; X64-NEXT: movd %edi, %xmm0
2626 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
2627 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2628 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2630 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
2631 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
2632 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2
2633 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3
2634 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4
2635 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5
2636 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6
2637 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7
2638 %res = bitcast <8 x i16> %res7 to <2 x i64>
2642 define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
2643 ; X32-LABEL: test_mm_setr_epi32:
2645 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2646 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2647 ; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2648 ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2649 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2650 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2651 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2654 ; X64-LABEL: test_mm_setr_epi32:
2656 ; X64-NEXT: movd %ecx, %xmm0
2657 ; X64-NEXT: movd %esi, %xmm1
2658 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
2659 ; X64-NEXT: movd %edx, %xmm2
2660 ; X64-NEXT: movd %edi, %xmm0
2661 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2662 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2664 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
2665 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
2666 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2
2667 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3
2668 %res = bitcast <4 x i32> %res3 to <2 x i64>
2672 ; TODO test_mm_setr_epi64
2674 define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
2675 ; X32-LABEL: test_mm_setr_epi64x:
2677 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2678 ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2679 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2680 ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2681 ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
2682 ; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2683 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2686 ; X64-LABEL: test_mm_setr_epi64x:
2688 ; X64-NEXT: movd %rsi, %xmm1
2689 ; X64-NEXT: movd %rdi, %xmm0
2690 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2692 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0
2693 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1
2697 define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind {
2698 ; X32-LABEL: test_mm_setr_pd:
2700 ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
2701 ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
2702 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2705 ; X64-LABEL: test_mm_setr_pd:
2707 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2709 %res0 = insertelement <2 x double> undef, double %a0, i32 0
2710 %res1 = insertelement <2 x double> %res0, double %a1, i32 1
2711 ret <2 x double> %res1
2714 define <2 x double> @test_mm_setzero_pd() {
2715 ; X32-LABEL: test_mm_setzero_pd:
2717 ; X32-NEXT: xorps %xmm0, %xmm0
2720 ; X64-LABEL: test_mm_setzero_pd:
2722 ; X64-NEXT: xorps %xmm0, %xmm0
2724 ret <2 x double> zeroinitializer
2727 define <2 x i64> @test_mm_setzero_si128() {
2728 ; X32-LABEL: test_mm_setzero_si128:
2730 ; X32-NEXT: xorps %xmm0, %xmm0
2733 ; X64-LABEL: test_mm_setzero_si128:
2735 ; X64-NEXT: xorps %xmm0, %xmm0
2737 ret <2 x i64> zeroinitializer
2740 define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) {
2741 ; X32-LABEL: test_mm_shuffle_epi32:
2743 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2746 ; X64-LABEL: test_mm_shuffle_epi32:
2748 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2750 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2751 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
2752 %bc = bitcast <4 x i32> %res to <2 x i64>
2756 define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
2757 ; X32-LABEL: test_mm_shuffle_pd:
2759 ; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2762 ; X64-LABEL: test_mm_shuffle_pd:
2764 ; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
2766 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
2767 ret <2 x double> %res
2770 define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) {
2771 ; X32-LABEL: test_mm_shufflehi_epi16:
2773 ; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2776 ; X64-LABEL: test_mm_shufflehi_epi16:
2778 ; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
2780 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2781 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
2782 %bc = bitcast <8 x i16> %res to <2 x i64>
2786 define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) {
2787 ; X32-LABEL: test_mm_shufflelo_epi16:
2789 ; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2792 ; X64-LABEL: test_mm_shufflelo_epi16:
2794 ; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2796 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2797 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
2798 %bc = bitcast <8 x i16> %res to <2 x i64>
2802 define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2803 ; X32-LABEL: test_mm_sll_epi16:
2805 ; X32-NEXT: psllw %xmm1, %xmm0
2808 ; X64-LABEL: test_mm_sll_epi16:
2810 ; X64-NEXT: psllw %xmm1, %xmm0
2812 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2813 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2814 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1)
2815 %bc = bitcast <8 x i16> %res to <2 x i64>
2818 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
2820 define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2821 ; X32-LABEL: test_mm_sll_epi32:
2823 ; X32-NEXT: pslld %xmm1, %xmm0
2826 ; X64-LABEL: test_mm_sll_epi32:
2828 ; X64-NEXT: pslld %xmm1, %xmm0
2830 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2831 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2832 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1)
2833 %bc = bitcast <4 x i32> %res to <2 x i64>
2836 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
2838 define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2839 ; X32-LABEL: test_mm_sll_epi64:
2841 ; X32-NEXT: psllq %xmm1, %xmm0
2844 ; X64-LABEL: test_mm_sll_epi64:
2846 ; X64-NEXT: psllq %xmm1, %xmm0
2848 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
2851 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
2853 define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) {
2854 ; X32-LABEL: test_mm_slli_epi16:
2856 ; X32-NEXT: psllw $1, %xmm0
2859 ; X64-LABEL: test_mm_slli_epi16:
2861 ; X64-NEXT: psllw $1, %xmm0
2863 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2864 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1)
2865 %bc = bitcast <8 x i16> %res to <2 x i64>
2868 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
2870 define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) {
2871 ; X32-LABEL: test_mm_slli_epi32:
2873 ; X32-NEXT: pslld $1, %xmm0
2876 ; X64-LABEL: test_mm_slli_epi32:
2878 ; X64-NEXT: pslld $1, %xmm0
2880 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2881 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1)
2882 %bc = bitcast <4 x i32> %res to <2 x i64>
2885 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
2887 define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) {
2888 ; X32-LABEL: test_mm_slli_epi64:
2890 ; X32-NEXT: psllq $1, %xmm0
2893 ; X64-LABEL: test_mm_slli_epi64:
2895 ; X64-NEXT: psllq $1, %xmm0
2897 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1)
2900 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
2902 define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind {
2903 ; X32-LABEL: test_mm_slli_si128:
2905 ; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2908 ; X64-LABEL: test_mm_slli_si128:
2910 ; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
2912 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
2913 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
2914 %bc = bitcast <16 x i8> %res to <2 x i64>
2918 define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind {
2919 ; X32-LABEL: test_mm_sqrt_pd:
2921 ; X32-NEXT: sqrtpd %xmm0, %xmm0
2924 ; X64-LABEL: test_mm_sqrt_pd:
2926 ; X64-NEXT: sqrtpd %xmm0, %xmm0
2928 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
2929 ret <2 x double> %res
2931 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
2933 define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
2934 ; X32-LABEL: test_mm_sqrt_sd:
2936 ; X32-NEXT: sqrtsd %xmm0, %xmm1
2937 ; X32-NEXT: movaps %xmm1, %xmm0
2940 ; X64-LABEL: test_mm_sqrt_sd:
2942 ; X64-NEXT: sqrtsd %xmm0, %xmm1
2943 ; X64-NEXT: movaps %xmm1, %xmm0
2945 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
2946 %ext0 = extractelement <2 x double> %call, i32 0
2947 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
2948 %ext1 = extractelement <2 x double> %a1, i32 1
2949 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
2950 ret <2 x double> %ins1
2952 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
2954 define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) {
2955 ; X32-LABEL: test_mm_sra_epi16:
2957 ; X32-NEXT: psraw %xmm1, %xmm0
2960 ; X64-LABEL: test_mm_sra_epi16:
2962 ; X64-NEXT: psraw %xmm1, %xmm0
2964 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
2965 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2966 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1)
2967 %bc = bitcast <8 x i16> %res to <2 x i64>
2970 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
2972 define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2973 ; X32-LABEL: test_mm_sra_epi32:
2975 ; X32-NEXT: psrad %xmm1, %xmm0
2978 ; X64-LABEL: test_mm_sra_epi32:
2980 ; X64-NEXT: psrad %xmm1, %xmm0
2982 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2983 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2984 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1)
2985 %bc = bitcast <4 x i32> %res to <2 x i64>
2988 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
2990 define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) {
2991 ; X32-LABEL: test_mm_srai_epi16:
2993 ; X32-NEXT: psraw $1, %xmm0
2996 ; X64-LABEL: test_mm_srai_epi16:
2998 ; X64-NEXT: psraw $1, %xmm0
3000 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3001 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1)
3002 %bc = bitcast <8 x i16> %res to <2 x i64>
3005 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
3007 define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) {
3008 ; X32-LABEL: test_mm_srai_epi32:
3010 ; X32-NEXT: psrad $1, %xmm0
3013 ; X64-LABEL: test_mm_srai_epi32:
3015 ; X64-NEXT: psrad $1, %xmm0
3017 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3018 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1)
3019 %bc = bitcast <4 x i32> %res to <2 x i64>
3022 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
3024 define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3025 ; X32-LABEL: test_mm_srl_epi16:
3027 ; X32-NEXT: psrlw %xmm1, %xmm0
3030 ; X64-LABEL: test_mm_srl_epi16:
3032 ; X64-NEXT: psrlw %xmm1, %xmm0
3034 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3035 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3036 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1)
3037 %bc = bitcast <8 x i16> %res to <2 x i64>
3040 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
3042 define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3043 ; X32-LABEL: test_mm_srl_epi32:
3045 ; X32-NEXT: psrld %xmm1, %xmm0
3048 ; X64-LABEL: test_mm_srl_epi32:
3050 ; X64-NEXT: psrld %xmm1, %xmm0
3052 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3053 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3054 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1)
3055 %bc = bitcast <4 x i32> %res to <2 x i64>
3058 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
3060 define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3061 ; X32-LABEL: test_mm_srl_epi64:
3063 ; X32-NEXT: psrlq %xmm1, %xmm0
3066 ; X64-LABEL: test_mm_srl_epi64:
3068 ; X64-NEXT: psrlq %xmm1, %xmm0
3070 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
3073 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
3075 define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) {
3076 ; X32-LABEL: test_mm_srli_epi16:
3078 ; X32-NEXT: psrlw $1, %xmm0
3081 ; X64-LABEL: test_mm_srli_epi16:
3083 ; X64-NEXT: psrlw $1, %xmm0
3085 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3086 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1)
3087 %bc = bitcast <8 x i16> %res to <2 x i64>
3090 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
3092 define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) {
3093 ; X32-LABEL: test_mm_srli_epi32:
3095 ; X32-NEXT: psrld $1, %xmm0
3098 ; X64-LABEL: test_mm_srli_epi32:
3100 ; X64-NEXT: psrld $1, %xmm0
3102 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3103 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1)
3104 %bc = bitcast <4 x i32> %res to <2 x i64>
3107 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
3109 define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) {
3110 ; X32-LABEL: test_mm_srli_epi64:
3112 ; X32-NEXT: psrlq $1, %xmm0
3115 ; X64-LABEL: test_mm_srli_epi64:
3117 ; X64-NEXT: psrlq $1, %xmm0
3119 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1)
3122 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
3124 define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind {
3125 ; X32-LABEL: test_mm_srli_si128:
3127 ; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3130 ; X64-LABEL: test_mm_srli_si128:
3132 ; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
3134 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3135 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
3136 %bc = bitcast <16 x i8> %res to <2 x i64>
3140 define void @test_mm_store_pd(double *%a0, <2 x double> %a1) {
3141 ; X32-LABEL: test_mm_store_pd:
3143 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3144 ; X32-NEXT: movaps %xmm0, (%eax)
3147 ; X64-LABEL: test_mm_store_pd:
3149 ; X64-NEXT: movaps %xmm0, (%rdi)
3151 %arg0 = bitcast double* %a0 to <2 x double>*
3152 store <2 x double> %a1, <2 x double>* %arg0, align 16
3156 define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) {
3157 ; X32-LABEL: test_mm_store_pd1:
3159 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3160 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3161 ; X32-NEXT: movaps %xmm0, (%eax)
3164 ; X64-LABEL: test_mm_store_pd1:
3166 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3167 ; X64-NEXT: movaps %xmm0, (%rdi)
3169 %arg0 = bitcast double * %a0 to <2 x double>*
3170 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3171 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3175 define void @test_mm_store_sd(double *%a0, <2 x double> %a1) {
3176 ; X32-LABEL: test_mm_store_sd:
3178 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3179 ; X32-NEXT: movsd %xmm0, (%eax)
3182 ; X64-LABEL: test_mm_store_sd:
3184 ; X64-NEXT: movsd %xmm0, (%rdi)
3186 %ext = extractelement <2 x double> %a1, i32 0
3187 store double %ext, double* %a0, align 1
3191 define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3192 ; X32-LABEL: test_mm_store_si128:
3194 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3195 ; X32-NEXT: movaps %xmm0, (%eax)
3198 ; X64-LABEL: test_mm_store_si128:
3200 ; X64-NEXT: movaps %xmm0, (%rdi)
3202 store <2 x i64> %a1, <2 x i64>* %a0, align 16
3206 define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) {
3207 ; X32-LABEL: test_mm_store1_pd:
3209 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3210 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3211 ; X32-NEXT: movaps %xmm0, (%eax)
3214 ; X64-LABEL: test_mm_store1_pd:
3216 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
3217 ; X64-NEXT: movaps %xmm0, (%rdi)
3219 %arg0 = bitcast double * %a0 to <2 x double>*
3220 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
3221 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3225 define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) {
3226 ; X32-LABEL: test_mm_storeh_sd:
3228 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3229 ; X32-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
3230 ; X32-NEXT: movsd %xmm0, (%eax)
3233 ; X64-LABEL: test_mm_storeh_sd:
3235 ; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
3236 ; X64-NEXT: movsd %xmm0, (%rdi)
3238 %ext = extractelement <2 x double> %a1, i32 1
3239 store double %ext, double* %a0, align 8
3243 define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) {
3244 ; X32-LABEL: test_mm_storel_epi64:
3246 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3247 ; X32-NEXT: movlps %xmm0, (%eax)
3250 ; X64-LABEL: test_mm_storel_epi64:
3252 ; X64-NEXT: movd %xmm0, %rax
3253 ; X64-NEXT: movq %rax, (%rdi)
3255 %ext = extractelement <2 x i64> %a1, i32 0
3256 %bc = bitcast <2 x i64> *%a0 to i64*
3257 store i64 %ext, i64* %bc, align 8
3261 define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) {
3262 ; X32-LABEL: test_mm_storel_sd:
3264 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3265 ; X32-NEXT: movsd %xmm0, (%eax)
3268 ; X64-LABEL: test_mm_storel_sd:
3270 ; X64-NEXT: movsd %xmm0, (%rdi)
3272 %ext = extractelement <2 x double> %a1, i32 0
3273 store double %ext, double* %a0, align 8
3277 define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) {
3278 ; X32-LABEL: test_mm_storer_pd:
3280 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3281 ; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3282 ; X32-NEXT: movapd %xmm0, (%eax)
3285 ; X64-LABEL: test_mm_storer_pd:
3287 ; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
3288 ; X64-NEXT: movapd %xmm0, (%rdi)
3290 %arg0 = bitcast double* %a0 to <2 x double>*
3291 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3292 store <2 x double> %shuf, <2 x double>* %arg0, align 16
3296 define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) {
3297 ; X32-LABEL: test_mm_storeu_pd:
3299 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3300 ; X32-NEXT: movups %xmm0, (%eax)
3303 ; X64-LABEL: test_mm_storeu_pd:
3305 ; X64-NEXT: movups %xmm0, (%rdi)
3307 %arg0 = bitcast double* %a0 to <2 x double>*
3308 store <2 x double> %a1, <2 x double>* %arg0, align 1
3312 define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3313 ; X32-LABEL: test_mm_storeu_si128:
3315 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3316 ; X32-NEXT: movups %xmm0, (%eax)
3319 ; X64-LABEL: test_mm_storeu_si128:
3321 ; X64-NEXT: movups %xmm0, (%rdi)
3323 store <2 x i64> %a1, <2 x i64>* %a0, align 1
3327 define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) {
3328 ; X32-LABEL: test_mm_stream_pd:
3330 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3331 ; X32-NEXT: movntps %xmm0, (%eax)
3334 ; X64-LABEL: test_mm_stream_pd:
3336 ; X64-NEXT: movntps %xmm0, (%rdi)
3338 %arg0 = bitcast double* %a0 to <2 x double>*
3339 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0
3343 define void @test_mm_stream_si32(i32 *%a0, i32 %a1) {
3344 ; X32-LABEL: test_mm_stream_si32:
3346 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3347 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
3348 ; X32-NEXT: movntil %eax, (%ecx)
3351 ; X64-LABEL: test_mm_stream_si32:
3353 ; X64-NEXT: movntil %esi, (%rdi)
3355 store i32 %a1, i32* %a0, align 1, !nontemporal !0
3359 define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) {
3360 ; X32-LABEL: test_mm_stream_si128:
3362 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
3363 ; X32-NEXT: movntps %xmm0, (%eax)
3366 ; X64-LABEL: test_mm_stream_si128:
3368 ; X64-NEXT: movntps %xmm0, (%rdi)
3370 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0
3374 define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3375 ; X32-LABEL: test_mm_sub_epi8:
3377 ; X32-NEXT: psubb %xmm1, %xmm0
3380 ; X64-LABEL: test_mm_sub_epi8:
3382 ; X64-NEXT: psubb %xmm1, %xmm0
3384 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3385 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3386 %res = sub <16 x i8> %arg0, %arg1
3387 %bc = bitcast <16 x i8> %res to <2 x i64>
3391 define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3392 ; X32-LABEL: test_mm_sub_epi16:
3394 ; X32-NEXT: psubw %xmm1, %xmm0
3397 ; X64-LABEL: test_mm_sub_epi16:
3399 ; X64-NEXT: psubw %xmm1, %xmm0
3401 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3402 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3403 %res = sub <8 x i16> %arg0, %arg1
3404 %bc = bitcast <8 x i16> %res to <2 x i64>
3408 define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3409 ; X32-LABEL: test_mm_sub_epi32:
3411 ; X32-NEXT: psubd %xmm1, %xmm0
3414 ; X64-LABEL: test_mm_sub_epi32:
3416 ; X64-NEXT: psubd %xmm1, %xmm0
3418 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3419 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3420 %res = sub <4 x i32> %arg0, %arg1
3421 %bc = bitcast <4 x i32> %res to <2 x i64>
3425 define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3426 ; X32-LABEL: test_mm_sub_epi64:
3428 ; X32-NEXT: psubq %xmm1, %xmm0
3431 ; X64-LABEL: test_mm_sub_epi64:
3433 ; X64-NEXT: psubq %xmm1, %xmm0
3435 %res = sub <2 x i64> %a0, %a1
3439 define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3440 ; X32-LABEL: test_mm_sub_pd:
3442 ; X32-NEXT: subpd %xmm1, %xmm0
3445 ; X64-LABEL: test_mm_sub_pd:
3447 ; X64-NEXT: subpd %xmm1, %xmm0
3449 %res = fsub <2 x double> %a0, %a1
3450 ret <2 x double> %res
3453 define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3454 ; X32-LABEL: test_mm_sub_sd:
3456 ; X32-NEXT: subsd %xmm1, %xmm0
3459 ; X64-LABEL: test_mm_sub_sd:
3461 ; X64-NEXT: subsd %xmm1, %xmm0
3463 %ext0 = extractelement <2 x double> %a0, i32 0
3464 %ext1 = extractelement <2 x double> %a1, i32 0
3465 %fsub = fsub double %ext0, %ext1
3466 %res = insertelement <2 x double> %a0, double %fsub, i32 0
3467 ret <2 x double> %res
3470 define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3471 ; X32-LABEL: test_mm_subs_epi8:
3473 ; X32-NEXT: psubsb %xmm1, %xmm0
3476 ; X64-LABEL: test_mm_subs_epi8:
3478 ; X64-NEXT: psubsb %xmm1, %xmm0
3480 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3481 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3482 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1)
3483 %bc = bitcast <16 x i8> %res to <2 x i64>
3486 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
3488 define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3489 ; X32-LABEL: test_mm_subs_epi16:
3491 ; X32-NEXT: psubsw %xmm1, %xmm0
3494 ; X64-LABEL: test_mm_subs_epi16:
3496 ; X64-NEXT: psubsw %xmm1, %xmm0
3498 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3499 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3500 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1)
3501 %bc = bitcast <8 x i16> %res to <2 x i64>
3504 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
3506 define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3507 ; X32-LABEL: test_mm_subs_epu8:
3509 ; X32-NEXT: psubusb %xmm1, %xmm0
3512 ; X64-LABEL: test_mm_subs_epu8:
3514 ; X64-NEXT: psubusb %xmm1, %xmm0
3516 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3517 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3518 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1)
3519 %bc = bitcast <16 x i8> %res to <2 x i64>
3522 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
3524 define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3525 ; X32-LABEL: test_mm_subs_epu16:
3527 ; X32-NEXT: psubusw %xmm1, %xmm0
3530 ; X64-LABEL: test_mm_subs_epu16:
3532 ; X64-NEXT: psubusw %xmm1, %xmm0
3534 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3535 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3536 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1)
3537 %bc = bitcast <8 x i16> %res to <2 x i64>
3540 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
3542 define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3543 ; X32-LABEL: test_mm_ucomieq_sd:
3545 ; X32-NEXT: ucomisd %xmm1, %xmm0
3546 ; X32-NEXT: setnp %al
3547 ; X32-NEXT: sete %cl
3548 ; X32-NEXT: andb %al, %cl
3549 ; X32-NEXT: movzbl %cl, %eax
3552 ; X64-LABEL: test_mm_ucomieq_sd:
3554 ; X64-NEXT: ucomisd %xmm1, %xmm0
3555 ; X64-NEXT: setnp %al
3556 ; X64-NEXT: sete %cl
3557 ; X64-NEXT: andb %al, %cl
3558 ; X64-NEXT: movzbl %cl, %eax
3560 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
3563 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
3565 define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3566 ; X32-LABEL: test_mm_ucomige_sd:
3568 ; X32-NEXT: xorl %eax, %eax
3569 ; X32-NEXT: ucomisd %xmm1, %xmm0
3570 ; X32-NEXT: setae %al
3573 ; X64-LABEL: test_mm_ucomige_sd:
3575 ; X64-NEXT: xorl %eax, %eax
3576 ; X64-NEXT: ucomisd %xmm1, %xmm0
3577 ; X64-NEXT: setae %al
3579 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1)
3582 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
3584 define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3585 ; X32-LABEL: test_mm_ucomigt_sd:
3587 ; X32-NEXT: xorl %eax, %eax
3588 ; X32-NEXT: ucomisd %xmm1, %xmm0
3589 ; X32-NEXT: seta %al
3592 ; X64-LABEL: test_mm_ucomigt_sd:
3594 ; X64-NEXT: xorl %eax, %eax
3595 ; X64-NEXT: ucomisd %xmm1, %xmm0
3596 ; X64-NEXT: seta %al
3598 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1)
3601 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
3603 define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3604 ; X32-LABEL: test_mm_ucomile_sd:
3606 ; X32-NEXT: xorl %eax, %eax
3607 ; X32-NEXT: ucomisd %xmm0, %xmm1
3608 ; X32-NEXT: setae %al
3611 ; X64-LABEL: test_mm_ucomile_sd:
3613 ; X64-NEXT: xorl %eax, %eax
3614 ; X64-NEXT: ucomisd %xmm0, %xmm1
3615 ; X64-NEXT: setae %al
3617 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1)
3620 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
3622 define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3623 ; X32-LABEL: test_mm_ucomilt_sd:
3625 ; X32-NEXT: xorl %eax, %eax
3626 ; X32-NEXT: ucomisd %xmm0, %xmm1
3627 ; X32-NEXT: seta %al
3630 ; X64-LABEL: test_mm_ucomilt_sd:
3632 ; X64-NEXT: xorl %eax, %eax
3633 ; X64-NEXT: ucomisd %xmm0, %xmm1
3634 ; X64-NEXT: seta %al
3636 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1)
3639 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
3641 define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind {
3642 ; X32-LABEL: test_mm_ucomineq_sd:
3644 ; X32-NEXT: ucomisd %xmm1, %xmm0
3645 ; X32-NEXT: setp %al
3646 ; X32-NEXT: setne %cl
3647 ; X32-NEXT: orb %al, %cl
3648 ; X32-NEXT: movzbl %cl, %eax
3651 ; X64-LABEL: test_mm_ucomineq_sd:
3653 ; X64-NEXT: ucomisd %xmm1, %xmm0
3654 ; X64-NEXT: setp %al
3655 ; X64-NEXT: setne %cl
3656 ; X64-NEXT: orb %al, %cl
3657 ; X64-NEXT: movzbl %cl, %eax
3659 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1)
3662 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
3664 define <2 x double> @test_mm_undefined_pd() {
3665 ; X32-LABEL: test_mm_undefined_pd:
3669 ; X64-LABEL: test_mm_undefined_pd:
3672 ret <2 x double> undef
3675 define <2 x i64> @test_mm_undefined_si128() {
3676 ; X32-LABEL: test_mm_undefined_si128:
3680 ; X64-LABEL: test_mm_undefined_si128:
3686 define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3687 ; X32-LABEL: test_mm_unpackhi_epi8:
3689 ; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3692 ; X64-LABEL: test_mm_unpackhi_epi8:
3694 ; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3696 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3697 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3698 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
3699 %bc = bitcast <16 x i8> %res to <2 x i64>
3703 define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3704 ; X32-LABEL: test_mm_unpackhi_epi16:
3706 ; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3709 ; X64-LABEL: test_mm_unpackhi_epi16:
3711 ; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3713 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3714 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3715 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
3716 %bc = bitcast <8 x i16> %res to <2 x i64>
3720 define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3721 ; X32-LABEL: test_mm_unpackhi_epi32:
3723 ; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3726 ; X64-LABEL: test_mm_unpackhi_epi32:
3728 ; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3730 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3731 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3732 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3733 %bc = bitcast <4 x i32> %res to <2 x i64>
3737 define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3738 ; X32-LABEL: test_mm_unpackhi_epi64:
3740 ; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3743 ; X64-LABEL: test_mm_unpackhi_epi64:
3745 ; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3747 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
3751 define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) {
3752 ; X32-LABEL: test_mm_unpackhi_pd:
3754 ; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3757 ; X64-LABEL: test_mm_unpackhi_pd:
3759 ; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
3761 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
3762 ret <2 x double> %res
3765 define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) {
3766 ; X32-LABEL: test_mm_unpacklo_epi8:
3768 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3771 ; X64-LABEL: test_mm_unpacklo_epi8:
3773 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3775 %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
3776 %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
3777 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
3778 %bc = bitcast <16 x i8> %res to <2 x i64>
3782 define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) {
3783 ; X32-LABEL: test_mm_unpacklo_epi16:
3785 ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3788 ; X64-LABEL: test_mm_unpacklo_epi16:
3790 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
3792 %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
3793 %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
3794 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
3795 %bc = bitcast <8 x i16> %res to <2 x i64>
3799 define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3800 ; X32-LABEL: test_mm_unpacklo_epi32:
3802 ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3805 ; X64-LABEL: test_mm_unpacklo_epi32:
3807 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
3809 %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3810 %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3811 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3812 %bc = bitcast <4 x i32> %res to <2 x i64>
3816 define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3817 ; X32-LABEL: test_mm_unpacklo_epi64:
3819 ; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3822 ; X64-LABEL: test_mm_unpacklo_epi64:
3824 ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3826 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
3830 define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) {
3831 ; X32-LABEL: test_mm_unpacklo_pd:
3833 ; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3836 ; X64-LABEL: test_mm_unpacklo_pd:
3838 ; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
3840 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
3841 ret <2 x double> %res
3844 define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
3845 ; X32-LABEL: test_mm_xor_pd:
3847 ; X32-NEXT: xorps %xmm1, %xmm0
3850 ; X64-LABEL: test_mm_xor_pd:
3852 ; X64-NEXT: xorps %xmm1, %xmm0
3854 %arg0 = bitcast <2 x double> %a0 to <4 x i32>
3855 %arg1 = bitcast <2 x double> %a1 to <4 x i32>
3856 %res = xor <4 x i32> %arg0, %arg1
3857 %bc = bitcast <4 x i32> %res to <2 x double>
3858 ret <2 x double> %bc
3861 define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind {
3862 ; X32-LABEL: test_mm_xor_si128:
3864 ; X32-NEXT: xorps %xmm1, %xmm0
3867 ; X64-LABEL: test_mm_xor_si128:
3869 ; X64-NEXT: xorps %xmm1, %xmm0
3871 %res = xor <2 x i64> %a0, %a1