1 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
2 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512
3 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \
4 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512
5 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \
6 // RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512
9 // CHECK-LABEL: define signext i8 @f0()
14 // CHECK-LABEL: define signext i16 @f1()
19 // CHECK-LABEL: define i32 @f2()
24 // CHECK-LABEL: define float @f3()
29 // CHECK-LABEL: define double @f4()
34 // CHECK-LABEL: define x86_fp80 @f5()
35 long double f5(void) {
39 // CHECK-LABEL: define void @f6(i8 signext %a0, i16 signext %a1, i32 %a2, i64 %a3, i8* %a4)
40 void f6(char a0, short a1, int a2, long long a3, void *a4) {
43 // CHECK-LABEL: define void @f7(i32 %a0)
44 typedef enum { A, B, C } e7;
48 // Test merging/passing of upper eightbyte with X87 class.
50 // CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result)
51 // CHECK-LABEL: define void @f8_2(%union.u8* byval align 16 %a0)
56 union u8 f8_1() { while (1) {} }
57 void f8_2(union u8 a0) {}
59 // CHECK-LABEL: define i64 @f9()
60 struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} }
62 // CHECK-LABEL: define void @f10(i64 %a0.coerce)
63 struct s10 { int a; int b; int : 0; };
64 void f10(struct s10 a0) {}
66 // CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result)
67 union { long double a; float b; } f11() { while (1) {} }
69 // CHECK-LABEL: define i32 @f12_0()
70 // CHECK-LABEL: define void @f12_1(i32 %a0.coerce)
71 struct s12 { int a __attribute__((aligned(16))); };
72 struct s12 f12_0(void) { while (1) {} }
73 void f12_1(struct s12 a0) {}
75 // Check that sret parameter is accounted for when checking available integer
77 // CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval align 8 %e, i32 %f)
79 struct s13_0 { long long f0[3]; };
80 struct s13_1 { long long f0[2]; };
81 struct s13_0 f13(int a, int b, int c, int d,
82 struct s13_1 e, int f) { while (1) {} }
84 // CHECK: define void @f14({{.*}}, i8 signext %X)
85 void f14(int a, int b, int c, int d, int e, int f, char X) {}
87 // CHECK: define void @f15({{.*}}, i8* %X)
88 void f15(int a, int b, int c, int d, int e, int f, void *X) {}
90 // CHECK: define void @f16({{.*}}, float %X)
91 void f16(float a, float b, float c, float d, float e, float f, float g, float h,
94 // CHECK: define void @f17({{.*}}, x86_fp80 %X)
95 void f17(float a, float b, float c, float d, float e, float f, float g, float h,
98 // Check for valid coercion. The struct should be passed/returned as i32, not
99 // as i64 for better code quality.
101 // CHECK-LABEL: define void @f18(i32 %a, i32 %f18_arg1.coerce)
102 struct f18_s0 { int f0; };
103 void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
105 // Check byval alignment.
107 // CHECK-LABEL: define void @f19(%struct.s19* byval align 16 %x)
111 void f19(struct s19 x) {}
113 // CHECK-LABEL: define void @f20(%struct.s20* byval align 32 %x)
114 struct __attribute__((aligned(32))) s20 {
118 void f20(struct s20 x) {}
126 // CHECK-LABEL: define i8* @f21(i64 %S.coerce0, i8* %S.coerce1)
127 const char *f21(struct StringRef S) { return S.x+S.Ptr; }
130 typedef __attribute__ ((aligned(16))) struct f22s { unsigned long long x[2]; } L;
131 void f22(L x, L y) { }
133 // CHECK: %x = alloca{{.*}}, align 16
134 // CHECK: %y = alloca{{.*}}, align 16
146 void f23(int A, struct f23S B) {
147 // CHECK-LABEL: define void @f23(i32 %A, i64 %B.coerce0, i32 %B.coerce1)
150 struct f24s { long a; int b; };
152 struct f23S f24(struct f23S *X, struct f24s *P2) {
155 // CHECK: define { i64, i32 } @f24(%struct.f23S* %X, %struct.f24s* %P2)
159 typedef float v4f32 __attribute__((__vector_size__(16)));
161 // CHECK-LABEL: define <4 x float> @f25(<4 x float> %X)
163 // CHECK: alloca <4 x float>
165 // CHECK: store <4 x float> %X, <4 x float>*
167 // CHECK: ret <4 x float>
176 struct foo26 f26(struct foo26 *P) {
177 // CHECK: define { i32*, float* } @f26(%struct.foo26* %P)
182 struct v4f32wrapper {
186 struct v4f32wrapper f27(struct v4f32wrapper X) {
187 // CHECK-LABEL: define <4 x float> @f27(<4 x float> %X.coerce)
191 // PR22563 - We should unwrap simple structs and arrays to pass
192 // and return them in the appropriate vector registers if possible.
194 typedef float v8f32 __attribute__((__vector_size__(32)));
195 struct v8f32wrapper {
199 struct v8f32wrapper f27a(struct v8f32wrapper X) {
200 // AVX-LABEL: define <8 x float> @f27a(<8 x float> %X.coerce)
204 struct v8f32wrapper_wrapper {
208 struct v8f32wrapper_wrapper f27b(struct v8f32wrapper_wrapper X) {
209 // AVX-LABEL: define <8 x float> @f27b(<8 x float> %X.coerce)
218 void f28(struct f28c C) {
219 // CHECK-LABEL: define void @f28(double %C.coerce0, i32 %C.coerce1)
229 void f29a(struct f29a A) {
230 // CHECK-LABEL: define void @f29a(double %A.coerce0, i32 %A.coerce1)
234 struct S0 { char f0[8]; char f2; char f3; char f4; };
235 void f30(struct S0 p_4) {
236 // CHECK-LABEL: define void @f30(i64 %p_4.coerce0, i24 %p_4.coerce1)
239 // Pass the third element as a float when followed by tail padding.
241 struct f31foo { float a, b, c; };
242 float f31(struct f31foo X) {
243 // CHECK-LABEL: define float @f31(<2 x float> %X.coerce0, float %X.coerce1)
247 _Complex float f32(_Complex float A, _Complex float B) {
249 // CHECK-LABEL: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce)
255 struct f33s { long x; float c,d; };
257 void f33(va_list X) {
258 va_arg(X, struct f33s);
261 typedef unsigned long long v1i64 __attribute__((__vector_size__(8)));
264 // CHECK-LABEL: define double @f34(double %arg.coerce)
265 v1i64 f34(v1i64 arg) { return arg; }
269 // CHECK-LABEL: define double @f35(double %arg.coerce)
270 typedef unsigned long v1i64_2 __attribute__((__vector_size__(8)));
271 v1i64_2 f35(v1i64_2 arg) { return arg+arg; }
274 // CHECK: declare void @func(%struct._str* byval align 16)
275 typedef struct _str {
289 // CHECK-LABEL: define double @f36(double %arg.coerce)
290 typedef unsigned v2i32 __attribute((__vector_size__(8)));
291 v2i32 f36(v2i32 arg) { return arg; }
293 // AVX: declare void @f38(<8 x float>)
294 // AVX: declare void @f37(<8 x float>)
295 // SSE: declare void @f38(%struct.s256* byval align 32)
296 // SSE: declare void @f37(<8 x float>* byval align 32)
297 typedef float __m256 __attribute__ ((__vector_size__ (32)));
307 void f39() { f38(x38); f37(x37); }
309 // The two next tests make sure that the struct below is passed
310 // in the same way regardless of avx being used
312 // CHECK: declare void @func40(%struct.t128* byval align 16)
313 typedef float __m128 __attribute__ ((__vector_size__ (16)));
314 typedef struct t128 {
319 extern void func40(two128 s);
320 void func41(two128 s) {
324 // CHECK: declare void @func42(%struct.t128_2* byval align 16)
328 typedef struct t128_2 {
332 extern void func42(SA s);
337 // CHECK-LABEL: define i32 @f44
339 // CHECK-NEXT: add i64 %{{[0-9]+}}, 31
340 // CHECK-NEXT: and i64 %{{[0-9]+}}, -32
341 // CHECK-NEXT: inttoptr
342 typedef int T44 __attribute((vector_size(32)));
343 struct s44 { T44 x; int y; };
344 int f44(int i, ...) {
345 __builtin_va_list ap;
346 __builtin_va_start(ap, i);
347 struct s44 s = __builtin_va_arg(ap, struct s44);
348 __builtin_va_end(ap);
352 // Text that vec3 returns the correct LLVM IR type.
353 // AVX-LABEL: define i32 @foo(<3 x i64> %X)
354 typedef long long3 __attribute((ext_vector_type(3)));
360 // Make sure we don't use a varargs convention for a function without a
361 // prototype where AVX types are involved.
363 // AVX: call i32 bitcast (i32 (...)* @f45 to i32 (<8 x float>)*)
366 void test45() { f45(x45); }
368 // Make sure we use byval to pass 64-bit vectors in memory; the LLVM call
369 // lowering can't handle this case correctly because it runs after legalization.
371 // CHECK: call void @f46({{.*}}<2 x float>* byval align 8 {{.*}}, <2 x float>* byval align 8 {{.*}})
372 typedef float v46 __attribute((vector_size(8)));
373 void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46);
374 void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
376 // Check that we pass the struct below without using byval, which helps out
380 // CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
381 struct s47 { unsigned a; };
382 void f47(int,int,int,int,int,int,struct s47);
383 void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); }
386 // In the following example, there are holes in T4 at the 3rd byte and the 4th
387 // byte, however, T2 does not have those holes. T4 is chosen to be the
388 // representing type for union T1, but we can't use load or store of T4 since
389 // it will skip the 3rd byte and the 4th byte.
390 // In general, Since we don't accurately represent the data fields of a union,
391 // do not use load or store of the representing llvm type for the union.
392 typedef _Complex int T2;
393 typedef _Complex char T5;
394 typedef _Complex int T7;
395 typedef struct T4 { T5 field0; T7 field1; } T4;
396 typedef union T1 { T2 field0; T4 field1; } T1;
405 void test49_helper(double, ...);
406 void test49(double d, double e) {
409 // CHECK-LABEL: define void @test49(
410 // CHECK: [[T0:%.*]] = load double, double*
411 // CHECK-NEXT: [[T1:%.*]] = load double, double*
412 // CHECK-NEXT: call void (double, ...) @test49_helper(double [[T0]], double [[T1]])
414 void test50_helper();
415 void test50(double d, double e) {
418 // CHECK-LABEL: define void @test50(
419 // CHECK: [[T0:%.*]] = load double, double*
420 // CHECK-NEXT: [[T1:%.*]] = load double, double*
421 // CHECK-NEXT: call void (double, double, ...) bitcast (void (...)* @test50_helper to void (double, double, ...)*)(double [[T0]], double [[T1]])
423 struct test51_s { __uint128_t intval; };
424 void test51(struct test51_s *s, __builtin_va_list argList) {
425 *s = __builtin_va_arg(argList, struct test51_s);
428 // CHECK-LABEL: define void @test51
429 // CHECK: [[TMP_ADDR:%.*]] = alloca [[STRUCT_TEST51:%.*]], align 16
431 // CHECK: [[REG_SAVE_AREA_PTR:%.*]] = getelementptr inbounds {{.*}}, i32 0, i32 3
432 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load i8*, i8** [[REG_SAVE_AREA_PTR]]
433 // CHECK-NEXT: [[VALUE_ADDR:%.*]] = getelementptr i8, i8* [[REG_SAVE_AREA]], i32 {{.*}}
434 // CHECK-NEXT: [[CASTED_VALUE_ADDR:%.*]] = bitcast i8* [[VALUE_ADDR]] to [[STRUCT_TEST51]]
435 // CHECK-NEXT: [[CASTED_TMP_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[TMP_ADDR]] to i8*
436 // CHECK-NEXT: [[RECASTED_VALUE_ADDR:%.*]] = bitcast [[STRUCT_TEST51]]* [[CASTED_VALUE_ADDR]] to i8*
437 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[CASTED_TMP_ADDR]], i8* align 8 [[RECASTED_VALUE_ADDR]], i64 16, i1 false)
438 // CHECK-NEXT: add i32 {{.*}}, 16
439 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}}
440 // CHECK-NEXT: br label
442 void test52_helper(int, ...);
445 test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
447 // AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
449 void test53(__m256 *m, __builtin_va_list argList) {
450 *m = __builtin_va_arg(argList, __m256);
452 // AVX-LABEL: define void @test53
456 void test54_helper(__m256, ...);
459 test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
460 test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
462 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
463 // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}})
465 typedef float __m512 __attribute__ ((__vector_size__ (64)));
473 // On AVX512, aggregates which contain a __m512 type are classified as SSE/SSEUP
474 // as per https://github.com/hjl-tools/x86-psABI/commit/30f9c9 3.2.3p2 Rule 1
476 // AVX512: declare void @f55(<16 x float>)
477 // NO-AVX512: declare void @f55(%struct.s512* byval align 64)
480 // __m512 has type SSE/SSEUP on AVX512.
482 // AVX512: declare void @f56(<16 x float>)
483 // NO-AVX512: declare void @f56(<16 x float>* byval align 64)
485 void f57() { f55(x55); f56(x56); }
487 // Like for __m128 on AVX, check that the struct below is passed
488 // in the same way regardless of AVX512 being used.
490 // CHECK: declare void @f58(%struct.t256* byval align 32)
491 typedef struct t256 {
496 extern void f58(two256 s);
501 // CHECK: declare void @f60(%struct.sat256* byval align 32)
502 typedef struct at256 {
505 typedef struct sat256 {
509 extern void f60(SAtwo256 s);
510 void f61(SAtwo256 s) {
514 // AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
515 void f62_helper(int, ...);
518 f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
521 // Like for __m256 on AVX, we always pass __m512 in memory, and don't
522 // need to use the register save area.
524 // AVX512-LABEL: define void @f63
527 void f63(__m512 *m, __builtin_va_list argList) {
528 *m = __builtin_va_arg(argList, __m512);
531 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
532 // AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[^)]+}})
533 void f64_helper(__m512, ...);
536 f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
537 f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
544 // SSE-LABEL: @f65(%struct.t65* byval align 32 %{{[^,)]+}})
545 // AVX: @f65(<8 x float> %{{[^,)]+}})
546 void f65(struct t65 a0) {