1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s
2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
4 // Don't include mm_malloc.h, it's system specific.
9 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
11 __m128i test_mm_add_epi8(__m128i A, __m128i B) {
12 // CHECK-LABEL: test_mm_add_epi8
13 // CHECK: add <16 x i8>
14 return _mm_add_epi8(A, B);
17 __m128i test_mm_add_epi16(__m128i A, __m128i B) {
18 // CHECK-LABEL: test_mm_add_epi16
19 // CHECK: add <8 x i16>
20 return _mm_add_epi16(A, B);
23 __m128i test_mm_add_epi32(__m128i A, __m128i B) {
24 // CHECK-LABEL: test_mm_add_epi32
25 // CHECK: add <4 x i32>
26 return _mm_add_epi32(A, B);
29 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
30 // CHECK-LABEL: test_mm_add_epi64
31 // CHECK: add <2 x i64>
32 return _mm_add_epi64(A, B);
35 __m128d test_mm_add_pd(__m128d A, __m128d B) {
36 // CHECK-LABEL: test_mm_add_pd
37 // CHECK: fadd <2 x double>
38 return _mm_add_pd(A, B);
41 __m128d test_mm_add_sd(__m128d A, __m128d B) {
42 // CHECK-LABEL: test_mm_add_sd
43 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
44 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
46 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
47 return _mm_add_sd(A, B);
50 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
51 // CHECK-LABEL: test_mm_adds_epi8
52 // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
53 return _mm_adds_epi8(A, B);
56 __m128i test_mm_adds_epi16(__m128i A, __m128i B) {
57 // CHECK-LABEL: test_mm_adds_epi16
58 // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
59 return _mm_adds_epi16(A, B);
62 __m128i test_mm_adds_epu8(__m128i A, __m128i B) {
63 // CHECK-LABEL: test_mm_adds_epu8
64 // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
65 return _mm_adds_epu8(A, B);
68 __m128i test_mm_adds_epu16(__m128i A, __m128i B) {
69 // CHECK-LABEL: test_mm_adds_epu16
70 // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
71 return _mm_adds_epu16(A, B);
74 __m128d test_mm_and_pd(__m128d A, __m128d B) {
75 // CHECK-LABEL: test_mm_and_pd
76 // CHECK: and <4 x i32>
77 return _mm_and_pd(A, B);
80 __m128i test_mm_and_si128(__m128i A, __m128i B) {
81 // CHECK-LABEL: test_mm_and_si128
82 // CHECK: and <2 x i64>
83 return _mm_and_si128(A, B);
86 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
87 // CHECK-LABEL: test_mm_andnot_pd
88 // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
89 // CHECK: and <4 x i32>
90 return _mm_andnot_pd(A, B);
93 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
94 // CHECK-LABEL: test_mm_andnot_si128
95 // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
96 // CHECK: and <2 x i64>
97 return _mm_andnot_si128(A, B);
100 __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
101 // CHECK-LABEL: test_mm_avg_epu8
102 // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
103 return _mm_avg_epu8(A, B);
106 __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
107 // CHECK-LABEL: test_mm_avg_epu16
108 // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
109 return _mm_avg_epu16(A, B);
112 __m128i test_mm_bslli_si128(__m128i A) {
113 // CHECK-LABEL: test_mm_bslli_si128
114 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
115 return _mm_bslli_si128(A, 5);
118 __m128i test_mm_bsrli_si128(__m128i A) {
119 // CHECK-LABEL: test_mm_bsrli_si128
120 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
121 return _mm_bsrli_si128(A, 5);
124 __m128 test_mm_castpd_ps(__m128d A) {
125 // CHECK-LABEL: test_mm_castpd_ps
126 // CHECK: bitcast <2 x double> %{{.*}} to <4 x float>
127 return _mm_castpd_ps(A);
130 __m128i test_mm_castpd_si128(__m128d A) {
131 // CHECK-LABEL: test_mm_castpd_si128
132 // CHECK: bitcast <2 x double> %{{.*}} to <2 x i64>
133 return _mm_castpd_si128(A);
136 __m128d test_mm_castps_pd(__m128 A) {
137 // CHECK-LABEL: test_mm_castps_pd
138 // CHECK: bitcast <4 x float> %{{.*}} to <2 x double>
139 return _mm_castps_pd(A);
142 __m128i test_mm_castps_si128(__m128 A) {
143 // CHECK-LABEL: test_mm_castps_si128
144 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
145 return _mm_castps_si128(A);
148 __m128d test_mm_castsi128_pd(__m128i A) {
149 // CHECK-LABEL: test_mm_castsi128_pd
150 // CHECK: bitcast <2 x i64> %{{.*}} to <2 x double>
151 return _mm_castsi128_pd(A);
154 __m128 test_mm_castsi128_ps(__m128i A) {
155 // CHECK-LABEL: test_mm_castsi128_ps
156 // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
157 return _mm_castsi128_ps(A);
160 void test_mm_clflush(void* A) {
161 // CHECK-LABEL: test_mm_clflush
162 // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
166 __m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
167 // CHECK-LABEL: test_mm_cmpeq_epi8
168 // CHECK: icmp eq <16 x i8>
169 return _mm_cmpeq_epi8(A, B);
172 __m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
173 // CHECK-LABEL: test_mm_cmpeq_epi16
174 // CHECK: icmp eq <8 x i16>
175 return _mm_cmpeq_epi16(A, B);
178 __m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
179 // CHECK-LABEL: test_mm_cmpeq_epi32
180 // CHECK: icmp eq <4 x i32>
181 return _mm_cmpeq_epi32(A, B);
184 __m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
185 // CHECK-LABEL: test_mm_cmpeq_pd
186 // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
187 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
188 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
189 // CHECK-NEXT: ret <2 x double> [[BC]]
190 return _mm_cmpeq_pd(A, B);
193 __m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
194 // CHECK-LABEL: test_mm_cmpeq_sd
195 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
196 return _mm_cmpeq_sd(A, B);
199 __m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
200 // CHECK-LABEL: test_mm_cmpge_pd
201 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
202 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
203 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
204 // CHECK-NEXT: ret <2 x double> [[BC]]
205 return _mm_cmpge_pd(A, B);
208 __m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
209 // CHECK-LABEL: test_mm_cmpge_sd
210 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
211 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
212 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
213 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
214 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
215 return _mm_cmpge_sd(A, B);
218 __m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
219 // CHECK-LABEL: test_mm_cmpgt_epi8
220 // CHECK: icmp sgt <16 x i8>
221 return _mm_cmpgt_epi8(A, B);
224 __m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
225 // CHECK-LABEL: test_mm_cmpgt_epi16
226 // CHECK: icmp sgt <8 x i16>
227 return _mm_cmpgt_epi16(A, B);
230 __m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
231 // CHECK-LABEL: test_mm_cmpgt_epi32
232 // CHECK: icmp sgt <4 x i32>
233 return _mm_cmpgt_epi32(A, B);
236 __m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
237 // CHECK-LABEL: test_mm_cmpgt_pd
238 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
239 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
240 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
241 // CHECK-NEXT: ret <2 x double> [[BC]]
242 return _mm_cmpgt_pd(A, B);
245 __m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
246 // CHECK-LABEL: test_mm_cmpgt_sd
247 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
248 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
249 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
250 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
251 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
252 return _mm_cmpgt_sd(A, B);
255 __m128d test_mm_cmple_pd(__m128d A, __m128d B) {
256 // CHECK-LABEL: test_mm_cmple_pd
257 // CHECK: [[CMP:%.*]] = fcmp ole <2 x double>
258 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
259 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
260 // CHECK-NEXT: ret <2 x double> [[BC]]
261 return _mm_cmple_pd(A, B);
264 __m128d test_mm_cmple_sd(__m128d A, __m128d B) {
265 // CHECK-LABEL: test_mm_cmple_sd
266 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
267 return _mm_cmple_sd(A, B);
270 __m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
271 // CHECK-LABEL: test_mm_cmplt_epi8
272 // CHECK: icmp sgt <16 x i8>
273 return _mm_cmplt_epi8(A, B);
276 __m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
277 // CHECK-LABEL: test_mm_cmplt_epi16
278 // CHECK: icmp sgt <8 x i16>
279 return _mm_cmplt_epi16(A, B);
282 __m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
283 // CHECK-LABEL: test_mm_cmplt_epi32
284 // CHECK: icmp sgt <4 x i32>
285 return _mm_cmplt_epi32(A, B);
288 __m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
289 // CHECK-LABEL: test_mm_cmplt_pd
290 // CHECK: [[CMP:%.*]] = fcmp olt <2 x double>
291 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
292 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
293 // CHECK-NEXT: ret <2 x double> [[BC]]
294 return _mm_cmplt_pd(A, B);
297 __m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
298 // CHECK-LABEL: test_mm_cmplt_sd
299 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
300 return _mm_cmplt_sd(A, B);
303 __m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
304 // CHECK-LABEL: test_mm_cmpneq_pd
305 // CHECK: [[CMP:%.*]] = fcmp une <2 x double>
306 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
307 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
308 // CHECK-NEXT: ret <2 x double> [[BC]]
309 return _mm_cmpneq_pd(A, B);
312 __m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
313 // CHECK-LABEL: test_mm_cmpneq_sd
314 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
315 return _mm_cmpneq_sd(A, B);
318 __m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
319 // CHECK-LABEL: test_mm_cmpnge_pd
320 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
321 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
322 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
323 // CHECK-NEXT: ret <2 x double> [[BC]]
324 return _mm_cmpnge_pd(A, B);
327 __m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
328 // CHECK-LABEL: test_mm_cmpnge_sd
329 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
330 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
331 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
332 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
333 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
334 return _mm_cmpnge_sd(A, B);
337 __m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
338 // CHECK-LABEL: test_mm_cmpngt_pd
339 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
340 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
341 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
342 // CHECK-NEXT: ret <2 x double> [[BC]]
343 return _mm_cmpngt_pd(A, B);
346 __m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
347 // CHECK-LABEL: test_mm_cmpngt_sd
348 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
349 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
350 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
351 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
352 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
353 return _mm_cmpngt_sd(A, B);
356 __m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
357 // CHECK-LABEL: test_mm_cmpnle_pd
358 // CHECK: [[CMP:%.*]] = fcmp ugt <2 x double>
359 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
360 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
361 // CHECK-NEXT: ret <2 x double> [[BC]]
362 return _mm_cmpnle_pd(A, B);
365 __m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
366 // CHECK-LABEL: test_mm_cmpnle_sd
367 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
368 return _mm_cmpnle_sd(A, B);
371 __m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
372 // CHECK-LABEL: test_mm_cmpnlt_pd
373 // CHECK: [[CMP:%.*]] = fcmp uge <2 x double>
374 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
375 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
376 // CHECK-NEXT: ret <2 x double> [[BC]]
377 return _mm_cmpnlt_pd(A, B);
380 __m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
381 // CHECK-LABEL: test_mm_cmpnlt_sd
382 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
383 return _mm_cmpnlt_sd(A, B);
386 __m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
387 // CHECK-LABEL: test_mm_cmpord_pd
388 // CHECK: [[CMP:%.*]] = fcmp ord <2 x double>
389 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
390 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
391 // CHECK-NEXT: ret <2 x double> [[BC]]
392 return _mm_cmpord_pd(A, B);
395 __m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
396 // CHECK-LABEL: test_mm_cmpord_sd
397 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
398 return _mm_cmpord_sd(A, B);
401 __m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
402 // CHECK-LABEL: test_mm_cmpunord_pd
403 // CHECK: [[CMP:%.*]] = fcmp uno <2 x double>
404 // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
405 // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
406 // CHECK-NEXT: ret <2 x double> [[BC]]
407 return _mm_cmpunord_pd(A, B);
410 __m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
411 // CHECK-LABEL: test_mm_cmpunord_sd
412 // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
413 return _mm_cmpunord_sd(A, B);
416 int test_mm_comieq_sd(__m128d A, __m128d B) {
417 // CHECK-LABEL: test_mm_comieq_sd
418 // CHECK: call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
419 return _mm_comieq_sd(A, B);
422 int test_mm_comige_sd(__m128d A, __m128d B) {
423 // CHECK-LABEL: test_mm_comige_sd
424 // CHECK: call i32 @llvm.x86.sse2.comige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
425 return _mm_comige_sd(A, B);
428 int test_mm_comigt_sd(__m128d A, __m128d B) {
429 // CHECK-LABEL: test_mm_comigt_sd
430 // CHECK: call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
431 return _mm_comigt_sd(A, B);
434 int test_mm_comile_sd(__m128d A, __m128d B) {
435 // CHECK-LABEL: test_mm_comile_sd
436 // CHECK: call i32 @llvm.x86.sse2.comile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
437 return _mm_comile_sd(A, B);
440 int test_mm_comilt_sd(__m128d A, __m128d B) {
441 // CHECK-LABEL: test_mm_comilt_sd
442 // CHECK: call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
443 return _mm_comilt_sd(A, B);
446 int test_mm_comineq_sd(__m128d A, __m128d B) {
447 // CHECK-LABEL: test_mm_comineq_sd
448 // CHECK: call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
449 return _mm_comineq_sd(A, B);
452 __m128d test_mm_cvtepi32_pd(__m128i A) {
453 // CHECK-LABEL: test_mm_cvtepi32_pd
454 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i32> <i32 0, i32 1>
455 // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
456 return _mm_cvtepi32_pd(A);
459 __m128 test_mm_cvtepi32_ps(__m128i A) {
460 // CHECK-LABEL: test_mm_cvtepi32_ps
461 // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %{{.*}})
462 return _mm_cvtepi32_ps(A);
465 __m128i test_mm_cvtpd_epi32(__m128d A) {
466 // CHECK-LABEL: test_mm_cvtpd_epi32
467 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %{{.*}})
468 return _mm_cvtpd_epi32(A);
471 __m128 test_mm_cvtpd_ps(__m128d A) {
472 // CHECK-LABEL: test_mm_cvtpd_ps
473 // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %{{.*}})
474 return _mm_cvtpd_ps(A);
477 __m128i test_mm_cvtps_epi32(__m128 A) {
478 // CHECK-LABEL: test_mm_cvtps_epi32
479 // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %{{.*}})
480 return _mm_cvtps_epi32(A);
483 __m128d test_mm_cvtps_pd(__m128 A) {
484 // CHECK-LABEL: test_mm_cvtps_pd
485 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
486 // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
487 return _mm_cvtps_pd(A);
490 double test_mm_cvtsd_f64(__m128d A) {
491 // CHECK-LABEL: test_mm_cvtsd_f64
492 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
493 return _mm_cvtsd_f64(A);
496 int test_mm_cvtsd_si32(__m128d A) {
497 // CHECK-LABEL: test_mm_cvtsd_si32
498 // CHECK: call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %{{.*}})
499 return _mm_cvtsd_si32(A);
502 long long test_mm_cvtsd_si64(__m128d A) {
503 // CHECK-LABEL: test_mm_cvtsd_si64
504 // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
505 return _mm_cvtsd_si64(A);
508 __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
509 // CHECK-LABEL: test_mm_cvtsd_ss
510 // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, <2 x double> %{{.*}})
511 return _mm_cvtsd_ss(A, B);
514 int test_mm_cvtsi128_si32(__m128i A) {
515 // CHECK-LABEL: test_mm_cvtsi128_si32
516 // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
517 return _mm_cvtsi128_si32(A);
520 long long test_mm_cvtsi128_si64(__m128i A) {
521 // CHECK-LABEL: test_mm_cvtsi128_si64
522 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
523 return _mm_cvtsi128_si64(A);
526 __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
527 // CHECK-LABEL: test_mm_cvtsi32_sd
528 // CHECK: sitofp i32 %{{.*}} to double
529 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
530 return _mm_cvtsi32_sd(A, B);
533 __m128i test_mm_cvtsi32_si128(int A) {
534 // CHECK-LABEL: test_mm_cvtsi32_si128
535 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
536 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
537 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
538 // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
539 return _mm_cvtsi32_si128(A);
542 __m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
543 // CHECK-LABEL: test_mm_cvtsi64_sd
544 // CHECK: sitofp i64 %{{.*}} to double
545 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
546 return _mm_cvtsi64_sd(A, B);
549 __m128i test_mm_cvtsi64_si128(long long A) {
550 // CHECK-LABEL: test_mm_cvtsi64_si128
551 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
552 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
553 return _mm_cvtsi64_si128(A);
556 __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
557 // CHECK-LABEL: test_mm_cvtss_sd
558 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
559 // CHECK: fpext float %{{.*}} to double
560 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
561 return _mm_cvtss_sd(A, B);
564 __m128i test_mm_cvttpd_epi32(__m128d A) {
565 // CHECK-LABEL: test_mm_cvttpd_epi32
566 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %{{.*}})
567 return _mm_cvttpd_epi32(A);
570 __m128i test_mm_cvttps_epi32(__m128 A) {
571 // CHECK-LABEL: test_mm_cvttps_epi32
572 // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}})
573 return _mm_cvttps_epi32(A);
576 int test_mm_cvttsd_si32(__m128d A) {
577 // CHECK-LABEL: test_mm_cvttsd_si32
578 // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}})
579 return _mm_cvttsd_si32(A);
582 long long test_mm_cvttsd_si64(__m128d A) {
583 // CHECK-LABEL: test_mm_cvttsd_si64
584 // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}})
585 return _mm_cvttsd_si64(A);
588 __m128d test_mm_div_pd(__m128d A, __m128d B) {
589 // CHECK-LABEL: test_mm_div_pd
590 // CHECK: fdiv <2 x double>
591 return _mm_div_pd(A, B);
594 __m128d test_mm_div_sd(__m128d A, __m128d B) {
595 // CHECK-LABEL: test_mm_div_sd
596 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
597 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
598 // CHECK: fdiv double
599 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
600 return _mm_div_sd(A, B);
603 // Lowering to pextrw requires optimization.
604 int test_mm_extract_epi16(__m128i A) {
605 // CHECK-LABEL: test_mm_extract_epi16
606 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
607 // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
608 // CHECK: zext i16 %{{.*}} to i32
609 return _mm_extract_epi16(A, 9);
612 __m128i test_mm_insert_epi16(__m128i A, int B) {
613 // CHECK-LABEL: test_mm_insert_epi16
614 // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
615 // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]]
616 return _mm_insert_epi16(A, B, 8);
619 void test_mm_lfence() {
620 // CHECK-LABEL: test_mm_lfence
621 // CHECK: call void @llvm.x86.sse2.lfence()
625 __m128d test_mm_load_pd(double const* A) {
626 // CHECK-LABEL: test_mm_load_pd
627 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
628 return _mm_load_pd(A);
631 __m128d test_mm_load_pd1(double const* A) {
632 // CHECK-LABEL: test_mm_load_pd1
633 // CHECK: load double, double* %{{.*}}, align 8
634 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
635 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
636 return _mm_load_pd1(A);
639 __m128d test_mm_load_sd(double const* A) {
640 // CHECK-LABEL: test_mm_load_sd
641 // CHECK: load double, double* %{{.*}}, align 1{{$}}
642 return _mm_load_sd(A);
645 __m128i test_mm_load_si128(__m128i const* A) {
646 // CHECK-LABEL: test_mm_load_si128
647 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
648 return _mm_load_si128(A);
651 __m128d test_mm_load1_pd(double const* A) {
652 // CHECK-LABEL: test_mm_load1_pd
653 // CHECK: load double, double* %{{.*}}, align 8
654 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
655 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
656 return _mm_load1_pd(A);
659 __m128d test_mm_loadh_pd(__m128d x, void* y) {
660 // CHECK-LABEL: test_mm_loadh_pd
661 // CHECK: load double, double* %{{.*}}, align 1{{$}}
662 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
663 return _mm_loadh_pd(x, y);
666 __m128i test_mm_loadl_epi64(__m128i* y) {
667 // CHECK: test_mm_loadl_epi64
668 // CHECK: load i64, i64* {{.*}}, align 1{{$}}
669 // CHECK: insertelement <2 x i64> undef, i64 {{.*}}, i32 0
670 // CHECK: insertelement <2 x i64> {{.*}}, i64 0, i32 1
671 return _mm_loadl_epi64(y);
674 __m128d test_mm_loadl_pd(__m128d x, void* y) {
675 // CHECK-LABEL: test_mm_loadl_pd
676 // CHECK: load double, double* %{{.*}}, align 1{{$}}
677 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
678 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
679 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
680 return _mm_loadl_pd(x, y);
683 __m128d test_mm_loadr_pd(double const* A) {
684 // CHECK-LABEL: test_mm_loadr_pd
685 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
686 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
687 return _mm_loadr_pd(A);
690 __m128d test_mm_loadu_pd(double const* A) {
691 // CHECK-LABEL: test_mm_loadu_pd
692 // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1{{$}}
693 return _mm_loadu_pd(A);
696 __m128i test_mm_loadu_si128(__m128i const* A) {
697 // CHECK-LABEL: test_mm_loadu_si128
698 // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1{{$}}
699 return _mm_loadu_si128(A);
702 __m128i test_mm_loadu_si64(void const* A) {
703 // CHECK-LABEL: test_mm_loadu_si64
704 // CHECK: load i64, i64* %{{.*}}, align 1{{$}}
705 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
706 // CHECK: insertelement <2 x i64> %{{.*}}, i64 0, i32 1
707 return _mm_loadu_si64(A);
710 __m128i test_mm_madd_epi16(__m128i A, __m128i B) {
711 // CHECK-LABEL: test_mm_madd_epi16
712 // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
713 return _mm_madd_epi16(A, B);
716 void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
717 // CHECK-LABEL: test_mm_maskmoveu_si128
718 // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
719 _mm_maskmoveu_si128(A, B, C);
722 __m128i test_mm_max_epi16(__m128i A, __m128i B) {
723 // CHECK-LABEL: test_mm_max_epi16
724 // CHECK: [[CMP:%.*]] = icmp sgt <8 x i16> [[X:%.*]], [[Y:%.*]]
725 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
726 return _mm_max_epi16(A, B);
729 __m128i test_mm_max_epu8(__m128i A, __m128i B) {
730 // CHECK-LABEL: test_mm_max_epu8
731 // CHECK: [[CMP:%.*]] = icmp ugt <16 x i8> [[X:%.*]], [[Y:%.*]]
732 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
733 return _mm_max_epu8(A, B);
736 __m128d test_mm_max_pd(__m128d A, __m128d B) {
737 // CHECK-LABEL: test_mm_max_pd
738 // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
739 return _mm_max_pd(A, B);
742 __m128d test_mm_max_sd(__m128d A, __m128d B) {
743 // CHECK-LABEL: test_mm_max_sd
744 // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
745 return _mm_max_sd(A, B);
748 void test_mm_mfence() {
749 // CHECK-LABEL: test_mm_mfence
750 // CHECK: call void @llvm.x86.sse2.mfence()
754 __m128i test_mm_min_epi16(__m128i A, __m128i B) {
755 // CHECK-LABEL: test_mm_min_epi16
756 // CHECK: [[CMP:%.*]] = icmp slt <8 x i16> [[X:%.*]], [[Y:%.*]]
757 // CHECK-NEXT: select <8 x i1> [[CMP]], <8 x i16> [[X]], <8 x i16> [[Y]]
758 return _mm_min_epi16(A, B);
761 __m128i test_mm_min_epu8(__m128i A, __m128i B) {
762 // CHECK-LABEL: test_mm_min_epu8
763 // CHECK: [[CMP:%.*]] = icmp ult <16 x i8> [[X:%.*]], [[Y:%.*]]
764 // CHECK-NEXT: select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
765 return _mm_min_epu8(A, B);
768 __m128d test_mm_min_pd(__m128d A, __m128d B) {
769 // CHECK-LABEL: test_mm_min_pd
770 // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
771 return _mm_min_pd(A, B);
774 __m128d test_mm_min_sd(__m128d A, __m128d B) {
775 // CHECK-LABEL: test_mm_min_sd
776 // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
777 return _mm_min_sd(A, B);
780 __m128i test_mm_move_epi64(__m128i A) {
781 // CHECK-LABEL: test_mm_move_epi64
782 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
783 return _mm_move_epi64(A);
786 __m128d test_mm_move_sd(__m128d A, __m128d B) {
787 // CHECK-LABEL: test_mm_move_sd
788 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
789 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
790 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
791 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
792 return _mm_move_sd(A, B);
795 int test_mm_movemask_epi8(__m128i A) {
796 // CHECK-LABEL: test_mm_movemask_epi8
797 // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
798 return _mm_movemask_epi8(A);
801 int test_mm_movemask_pd(__m128d A) {
802 // CHECK-LABEL: test_mm_movemask_pd
803 // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
804 return _mm_movemask_pd(A);
807 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
808 // CHECK-LABEL: test_mm_mul_epu32
809 // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
810 return _mm_mul_epu32(A, B);
813 __m128d test_mm_mul_pd(__m128d A, __m128d B) {
814 // CHECK-LABEL: test_mm_mul_pd
815 // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
816 return _mm_mul_pd(A, B);
819 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
820 // CHECK-LABEL: test_mm_mul_sd
821 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
822 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
823 // CHECK: fmul double
824 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
825 return _mm_mul_sd(A, B);
828 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
829 // CHECK-LABEL: test_mm_mulhi_epi16
830 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
831 return _mm_mulhi_epi16(A, B);
834 __m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
835 // CHECK-LABEL: test_mm_mulhi_epu16
836 // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
837 return _mm_mulhi_epu16(A, B);
840 __m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
841 // CHECK-LABEL: test_mm_mullo_epi16
842 // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
843 return _mm_mullo_epi16(A, B);
846 __m128d test_mm_or_pd(__m128d A, __m128d B) {
847 // CHECK-LABEL: test_mm_or_pd
848 // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
849 return _mm_or_pd(A, B);
852 __m128i test_mm_or_si128(__m128i A, __m128i B) {
853 // CHECK-LABEL: test_mm_or_si128
854 // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
855 return _mm_or_si128(A, B);
858 __m128i test_mm_packs_epi16(__m128i A, __m128i B) {
859 // CHECK-LABEL: test_mm_packs_epi16
860 // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
861 return _mm_packs_epi16(A, B);
864 __m128i test_mm_packs_epi32(__m128i A, __m128i B) {
865 // CHECK-LABEL: test_mm_packs_epi32
866 // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
867 return _mm_packs_epi32(A, B);
870 __m128i test_mm_packus_epi16(__m128i A, __m128i B) {
871 // CHECK-LABEL: test_mm_packus_epi16
872 // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
873 return _mm_packus_epi16(A, B);
876 void test_mm_pause() {
877 // CHECK-LABEL: test_mm_pause
878 // CHECK: call void @llvm.x86.sse2.pause()
882 __m128i test_mm_sad_epu8(__m128i A, __m128i B) {
883 // CHECK-LABEL: test_mm_sad_epu8
884 // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
885 return _mm_sad_epu8(A, B);
888 __m128i test_mm_set_epi8(char A, char B, char C, char D,
889 char E, char F, char G, char H,
890 char I, char J, char K, char L,
891 char M, char N, char O, char P) {
892 // CHECK-LABEL: test_mm_set_epi8
893 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
894 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
895 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
896 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
897 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
898 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
899 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
900 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
901 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
902 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
903 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
904 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
905 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
906 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
907 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
908 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
909 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
912 __m128i test_mm_set_epi16(short A, short B, short C, short D,
913 short E, short F, short G, short H) {
914 // CHECK-LABEL: test_mm_set_epi16
915 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
916 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
917 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
918 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
919 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
920 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
921 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
922 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
923 return _mm_set_epi16(A, B, C, D, E, F, G, H);
926 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
927 // CHECK-LABEL: test_mm_set_epi32
928 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
929 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
930 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
931 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
932 return _mm_set_epi32(A, B, C, D);
935 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
936 // CHECK-LABEL: test_mm_set_epi64
937 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
938 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
939 return _mm_set_epi64(A, B);
942 __m128i test_mm_set_epi64x(long long A, long long B) {
943 // CHECK-LABEL: test_mm_set_epi64x
944 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
945 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
946 return _mm_set_epi64x(A, B);
949 __m128d test_mm_set_pd(double A, double B) {
950 // CHECK-LABEL: test_mm_set_pd
951 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
952 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
953 return _mm_set_pd(A, B);
956 __m128d test_mm_set_sd(double A) {
957 // CHECK-LABEL: test_mm_set_sd
958 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
959 // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
960 return _mm_set_sd(A);
963 __m128i test_mm_set1_epi8(char A) {
964 // CHECK-LABEL: test_mm_set1_epi8
965 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
966 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
967 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
968 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
969 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
970 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
971 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
972 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
973 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
974 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
975 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
976 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
977 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
978 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
979 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
980 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
981 return _mm_set1_epi8(A);
984 __m128i test_mm_set1_epi16(short A) {
985 // CHECK-LABEL: test_mm_set1_epi16
986 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
987 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
988 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
989 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
990 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
991 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
992 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
993 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
994 return _mm_set1_epi16(A);
997 __m128i test_mm_set1_epi32(int A) {
998 // CHECK-LABEL: test_mm_set1_epi32
999 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1000 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1001 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1002 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1003 return _mm_set1_epi32(A);
1006 __m128i test_mm_set1_epi64(__m64 A) {
1007 // CHECK-LABEL: test_mm_set1_epi64
1008 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1009 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1010 return _mm_set1_epi64(A);
1013 __m128i test_mm_set1_epi64x(long long A) {
1014 // CHECK-LABEL: test_mm_set1_epi64x
1015 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1016 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1017 return _mm_set1_epi64x(A);
1020 __m128d test_mm_set1_pd(double A) {
1021 // CHECK-LABEL: test_mm_set1_pd
1022 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1023 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1024 return _mm_set1_pd(A);
1027 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
1028 char E, char F, char G, char H,
1029 char I, char J, char K, char L,
1030 char M, char N, char O, char P) {
1031 // CHECK-LABEL: test_mm_setr_epi8
1032 // CHECK: insertelement <16 x i8> undef, i8 %{{.*}}, i32 0
1033 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1
1034 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 2
1035 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 3
1036 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 4
1037 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 5
1038 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 6
1039 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 7
1040 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 8
1041 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 9
1042 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 10
1043 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 11
1044 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 12
1045 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 13
1046 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 14
1047 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
1048 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
1051 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
1052 short E, short F, short G, short H) {
1053 // CHECK-LABEL: test_mm_setr_epi16
1054 // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
1055 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 1
1056 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 2
1057 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 3
1058 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 4
1059 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 5
1060 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 6
1061 // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
1062 return _mm_setr_epi16(A, B, C, D, E, F, G, H);
1065 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
1066 // CHECK-LABEL: test_mm_setr_epi32
1067 // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
1068 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1
1069 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2
1070 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
1071 return _mm_setr_epi32(A, B, C, D);
1074 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
1075 // CHECK-LABEL: test_mm_setr_epi64
1076 // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
1077 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
1078 return _mm_setr_epi64(A, B);
1081 __m128d test_mm_setr_pd(double A, double B) {
1082 // CHECK-LABEL: test_mm_setr_pd
1083 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1084 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1085 return _mm_setr_pd(A, B);
1088 __m128d test_mm_setzero_pd() {
1089 // CHECK-LABEL: test_mm_setzero_pd
1090 // CHECK: store <2 x double> zeroinitializer
1091 return _mm_setzero_pd();
1094 __m128i test_mm_setzero_si128() {
1095 // CHECK-LABEL: test_mm_setzero_si128
1096 // CHECK: store <2 x i64> zeroinitializer
1097 return _mm_setzero_si128();
1100 __m128i test_mm_shuffle_epi32(__m128i A) {
1101 // CHECK-LABEL: test_mm_shuffle_epi32
1102 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
1103 return _mm_shuffle_epi32(A, 0);
1106 __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
1107 // CHECK-LABEL: test_mm_shuffle_pd
1108 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
1109 return _mm_shuffle_pd(A, B, 1);
1112 __m128i test_mm_shufflehi_epi16(__m128i A) {
1113 // CHECK-LABEL: test_mm_shufflehi_epi16
1114 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
1115 return _mm_shufflehi_epi16(A, 0);
1118 __m128i test_mm_shufflelo_epi16(__m128i A) {
1119 // CHECK-LABEL: test_mm_shufflelo_epi16
1120 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
1121 return _mm_shufflelo_epi16(A, 0);
1124 __m128i test_mm_sll_epi16(__m128i A, __m128i B) {
1125 // CHECK-LABEL: test_mm_sll_epi16
1126 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1127 return _mm_sll_epi16(A, B);
1130 __m128i test_mm_sll_epi32(__m128i A, __m128i B) {
1131 // CHECK-LABEL: test_mm_sll_epi32
1132 // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1133 return _mm_sll_epi32(A, B);
1136 __m128i test_mm_sll_epi64(__m128i A, __m128i B) {
1137 // CHECK-LABEL: test_mm_sll_epi64
1138 // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1139 return _mm_sll_epi64(A, B);
1142 __m128i test_mm_slli_epi16(__m128i A) {
1143 // CHECK-LABEL: test_mm_slli_epi16
1144 // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1145 return _mm_slli_epi16(A, 1);
1148 __m128i test_mm_slli_epi32(__m128i A) {
1149 // CHECK-LABEL: test_mm_slli_epi32
1150 // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1151 return _mm_slli_epi32(A, 1);
1154 __m128i test_mm_slli_epi64(__m128i A) {
1155 // CHECK-LABEL: test_mm_slli_epi64
1156 // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1157 return _mm_slli_epi64(A, 1);
1160 __m128i test_mm_slli_si128(__m128i A) {
1161 // CHECK-LABEL: test_mm_slli_si128
1162 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
1163 return _mm_slli_si128(A, 5);
1166 __m128i test_mm_slli_si128_2(__m128i A) {
1167 // CHECK-LABEL: test_mm_slli_si128_2
1168 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1169 return _mm_slli_si128(A, 17);
1172 __m128d test_mm_sqrt_pd(__m128d A) {
1173 // CHECK-LABEL: test_mm_sqrt_pd
1174 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}})
1175 return _mm_sqrt_pd(A);
1178 __m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
1179 // CHECK-LABEL: test_mm_sqrt_sd
1180 // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
1181 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1182 // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
1183 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1184 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
1185 return _mm_sqrt_sd(A, B);
1188 __m128i test_mm_sra_epi16(__m128i A, __m128i B) {
1189 // CHECK-LABEL: test_mm_sra_epi16
1190 // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1191 return _mm_sra_epi16(A, B);
1194 __m128i test_mm_sra_epi32(__m128i A, __m128i B) {
1195 // CHECK-LABEL: test_mm_sra_epi32
1196 // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1197 return _mm_sra_epi32(A, B);
1200 __m128i test_mm_srai_epi16(__m128i A) {
1201 // CHECK-LABEL: test_mm_srai_epi16
1202 // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1203 return _mm_srai_epi16(A, 1);
1206 __m128i test_mm_srai_epi32(__m128i A) {
1207 // CHECK-LABEL: test_mm_srai_epi32
1208 // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1209 return _mm_srai_epi32(A, 1);
1212 __m128i test_mm_srl_epi16(__m128i A, __m128i B) {
1213 // CHECK-LABEL: test_mm_srl_epi16
1214 // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1215 return _mm_srl_epi16(A, B);
1218 __m128i test_mm_srl_epi32(__m128i A, __m128i B) {
1219 // CHECK-LABEL: test_mm_srl_epi32
1220 // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
1221 return _mm_srl_epi32(A, B);
1224 __m128i test_mm_srl_epi64(__m128i A, __m128i B) {
1225 // CHECK-LABEL: test_mm_srl_epi64
1226 // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
1227 return _mm_srl_epi64(A, B);
1230 __m128i test_mm_srli_epi16(__m128i A) {
1231 // CHECK-LABEL: test_mm_srli_epi16
1232 // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %{{.*}}, i32 %{{.*}})
1233 return _mm_srli_epi16(A, 1);
1236 __m128i test_mm_srli_epi32(__m128i A) {
1237 // CHECK-LABEL: test_mm_srli_epi32
1238 // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %{{.*}}, i32 %{{.*}})
1239 return _mm_srli_epi32(A, 1);
1242 __m128i test_mm_srli_epi64(__m128i A) {
1243 // CHECK-LABEL: test_mm_srli_epi64
1244 // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %{{.*}}, i32 %{{.*}})
1245 return _mm_srli_epi64(A, 1);
1248 __m128i test_mm_srli_si128(__m128i A) {
1249 // CHECK-LABEL: test_mm_srli_si128
1250 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
1251 return _mm_srli_si128(A, 5);
1254 __m128i test_mm_srli_si128_2(__m128i A) {
1255 // CHECK-LABEL: test_mm_srli_si128_2
1256 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1257 return _mm_srli_si128(A, 17);
1260 void test_mm_store_pd(double* A, __m128d B) {
1261 // CHECK-LABEL: test_mm_store_pd
1262 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1266 void test_mm_store_pd1(double* x, __m128d y) {
1267 // CHECK-LABEL: test_mm_store_pd1
1268 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1269 // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
1270 _mm_store_pd1(x, y);
1273 void test_mm_store_sd(double* A, __m128d B) {
1274 // CHECK-LABEL: test_mm_store_sd
1275 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1276 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1280 void test_mm_store_si128(__m128i* A, __m128i B) {
1281 // CHECK-LABEL: test_mm_store_si128
1282 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
1283 _mm_store_si128(A, B);
1286 void test_mm_store1_pd(double* x, __m128d y) {
1287 // CHECK-LABEL: test_mm_store1_pd
1288 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
1289 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
1290 _mm_store1_pd(x, y);
1293 void test_mm_storeh_pd(double* A, __m128d B) {
1294 // CHECK-LABEL: test_mm_storeh_pd
1295 // CHECK: extractelement <2 x double> %{{.*}}, i32 1
1296 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1297 _mm_storeh_pd(A, B);
1300 void test_mm_storel_epi64(__m128i x, void* y) {
1301 // CHECK-LABEL: test_mm_storel_epi64
1302 // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
1303 // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
1304 _mm_storel_epi64(y, x);
1307 void test_mm_storel_pd(double* A, __m128d B) {
1308 // CHECK-LABEL: test_mm_storel_pd
1309 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1310 // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
1311 _mm_storel_pd(A, B);
1314 void test_mm_storer_pd(__m128d A, double* B) {
1315 // CHECK-LABEL: test_mm_storer_pd
1316 // CHECK: shufflevector <2 x double> {{.*}}, <2 x double> {{.*}}, <2 x i32> <i32 1, i32 0>
1317 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
1318 _mm_storer_pd(B, A);
1321 void test_mm_storeu_pd(double* A, __m128d B) {
1322 // CHECK-LABEL: test_mm_storeu_pd
1323 // CHECK: store {{.*}} <2 x double>* {{.*}}, align 1{{$}}
1324 // CHECK-NEXT: ret void
1325 _mm_storeu_pd(A, B);
1328 void test_mm_storeu_si128(__m128i* A, __m128i B) {
1329 // CHECK-LABEL: test_mm_storeu_si128
1330 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1{{$}}
1331 // CHECK-NEXT: ret void
1332 _mm_storeu_si128(A, B);
1335 void test_mm_stream_pd(double *A, __m128d B) {
1336 // CHECK-LABEL: test_mm_stream_pd
1337 // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
1338 _mm_stream_pd(A, B);
1341 void test_mm_stream_si32(int *A, int B) {
1342 // CHECK-LABEL: test_mm_stream_si32
1343 // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
1344 _mm_stream_si32(A, B);
1347 void test_mm_stream_si64(long long *A, long long B) {
1348 // CHECK-LABEL: test_mm_stream_si64
1349 // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
1350 _mm_stream_si64(A, B);
1353 void test_mm_stream_si128(__m128i *A, __m128i B) {
1354 // CHECK-LABEL: test_mm_stream_si128
1355 // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
1356 _mm_stream_si128(A, B);
1359 __m128i test_mm_sub_epi8(__m128i A, __m128i B) {
1360 // CHECK-LABEL: test_mm_sub_epi8
1361 // CHECK: sub <16 x i8>
1362 return _mm_sub_epi8(A, B);
1365 __m128i test_mm_sub_epi16(__m128i A, __m128i B) {
1366 // CHECK-LABEL: test_mm_sub_epi16
1367 // CHECK: sub <8 x i16>
1368 return _mm_sub_epi16(A, B);
1371 __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
1372 // CHECK-LABEL: test_mm_sub_epi32
1373 // CHECK: sub <4 x i32>
1374 return _mm_sub_epi32(A, B);
1377 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
1378 // CHECK-LABEL: test_mm_sub_epi64
1379 // CHECK: sub <2 x i64>
1380 return _mm_sub_epi64(A, B);
1383 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
1384 // CHECK-LABEL: test_mm_sub_pd
1385 // CHECK: fsub <2 x double>
1386 return _mm_sub_pd(A, B);
1389 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
1390 // CHECK-LABEL: test_mm_sub_sd
1391 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1392 // CHECK: extractelement <2 x double> %{{.*}}, i32 0
1393 // CHECK: fsub double
1394 // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
1395 return _mm_sub_sd(A, B);
1398 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
1399 // CHECK-LABEL: test_mm_subs_epi8
1400 // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1401 return _mm_subs_epi8(A, B);
1404 __m128i test_mm_subs_epi16(__m128i A, __m128i B) {
1405 // CHECK-LABEL: test_mm_subs_epi16
1406 // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1407 return _mm_subs_epi16(A, B);
1410 __m128i test_mm_subs_epu8(__m128i A, __m128i B) {
1411 // CHECK-LABEL: test_mm_subs_epu8
1412 // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
1413 return _mm_subs_epu8(A, B);
1416 __m128i test_mm_subs_epu16(__m128i A, __m128i B) {
1417 // CHECK-LABEL: test_mm_subs_epu16
1418 // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
1419 return _mm_subs_epu16(A, B);
1422 int test_mm_ucomieq_sd(__m128d A, __m128d B) {
1423 // CHECK-LABEL: test_mm_ucomieq_sd
1424 // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1425 return _mm_ucomieq_sd(A, B);
1428 int test_mm_ucomige_sd(__m128d A, __m128d B) {
1429 // CHECK-LABEL: test_mm_ucomige_sd
1430 // CHECK: call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1431 return _mm_ucomige_sd(A, B);
1434 int test_mm_ucomigt_sd(__m128d A, __m128d B) {
1435 // CHECK-LABEL: test_mm_ucomigt_sd
1436 // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1437 return _mm_ucomigt_sd(A, B);
1440 int test_mm_ucomile_sd(__m128d A, __m128d B) {
1441 // CHECK-LABEL: test_mm_ucomile_sd
1442 // CHECK: call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1443 return _mm_ucomile_sd(A, B);
1446 int test_mm_ucomilt_sd(__m128d A, __m128d B) {
1447 // CHECK-LABEL: test_mm_ucomilt_sd
1448 // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1449 return _mm_ucomilt_sd(A, B);
1452 int test_mm_ucomineq_sd(__m128d A, __m128d B) {
1453 // CHECK-LABEL: test_mm_ucomineq_sd
1454 // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
1455 return _mm_ucomineq_sd(A, B);
1458 __m128d test_mm_undefined_pd() {
1459 // CHECK-LABEL: @test_mm_undefined_pd
1460 // CHECK: ret <2 x double> undef
1461 return _mm_undefined_pd();
1464 __m128i test_mm_undefined_si128() {
1465 // CHECK-LABEL: @test_mm_undefined_si128
1466 // CHECK: ret <2 x i64> undef
1467 return _mm_undefined_si128();
1470 __m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
1471 // CHECK-LABEL: test_mm_unpackhi_epi8
1472 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1473 return _mm_unpackhi_epi8(A, B);
1476 __m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
1477 // CHECK-LABEL: test_mm_unpackhi_epi16
1478 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1479 return _mm_unpackhi_epi16(A, B);
1482 __m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
1483 // CHECK-LABEL: test_mm_unpackhi_epi32
1484 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1485 return _mm_unpackhi_epi32(A, B);
1488 __m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
1489 // CHECK-LABEL: test_mm_unpackhi_epi64
1490 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
1491 return _mm_unpackhi_epi64(A, B);
1494 __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
1495 // CHECK-LABEL: test_mm_unpackhi_pd
1496 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
1497 return _mm_unpackhi_pd(A, B);
1500 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
1501 // CHECK-LABEL: test_mm_unpacklo_epi8
1502 // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1503 return _mm_unpacklo_epi8(A, B);
1506 __m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
1507 // CHECK-LABEL: test_mm_unpacklo_epi16
1508 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1509 return _mm_unpacklo_epi16(A, B);
1512 __m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
1513 // CHECK-LABEL: test_mm_unpacklo_epi32
1514 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1515 return _mm_unpacklo_epi32(A, B);
1518 __m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
1519 // CHECK-LABEL: test_mm_unpacklo_epi64
1520 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
1521 return _mm_unpacklo_epi64(A, B);
1524 __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
1525 // CHECK-LABEL: test_mm_unpacklo_pd
1526 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
1527 return _mm_unpacklo_pd(A, B);
1530 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
1531 // CHECK-LABEL: test_mm_xor_pd
1532 // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
1533 return _mm_xor_pd(A, B);
1536 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
1537 // CHECK-LABEL: test_mm_xor_si128
1538 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
1539 return _mm_xor_si128(A, B);