1 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=pentium4 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2 %s
2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=yonah | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE3 %s
3 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3 %s
4 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=penryn | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE41 %s
5 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42 %s
6 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 %s
7 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 %s
8 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 %s
10 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
11 target triple = "x86_64-apple-macosx10.8.0"
13 define i32 @cmp(i32 %arg) {
15 ;SSE2: cost of 3 {{.*}} fcmp
16 ;SSE3: cost of 3 {{.*}} fcmp
17 ;SSSE3: cost of 3 {{.*}} fcmp
18 ;SSE41: cost of 3 {{.*}} fcmp
19 ;SSE42: cost of 1 {{.*}} fcmp
20 ;AVX: cost of 1 {{.*}} fcmp
21 %A = fcmp olt <2 x float> undef, undef
23 ;SSE2: cost of 7 {{.*}} fcmp
24 ;SSE3: cost of 7 {{.*}} fcmp
25 ;SSSE3: cost of 7 {{.*}} fcmp
26 ;SSE41: cost of 7 {{.*}} fcmp
27 ;SSE42: cost of 1 {{.*}} fcmp
28 ;AVX: cost of 1 {{.*}} fcmp
29 %B = fcmp olt <4 x float> undef, undef
31 ;SSE2: cost of 14 {{.*}} fcmp
32 ;SSE3: cost of 14 {{.*}} fcmp
33 ;SSSE3: cost of 14 {{.*}} fcmp
34 ;SSE41: cost of 14 {{.*}} fcmp
35 ;SSE42: cost of 2 {{.*}} fcmp
36 ;AVX: cost of 1 {{.*}} fcmp
37 %C = fcmp olt <8 x float> undef, undef
39 ;SSE2: cost of 3 {{.*}} fcmp
40 ;SSE3: cost of 3 {{.*}} fcmp
41 ;SSSE3: cost of 3 {{.*}} fcmp
42 ;SSE41: cost of 3 {{.*}} fcmp
43 ;SSE42: cost of 1 {{.*}} fcmp
44 ;AVX: cost of 1 {{.*}} fcmp
45 %D = fcmp olt <2 x double> undef, undef
47 ;SSE2: cost of 6 {{.*}} fcmp
48 ;SSE3: cost of 6 {{.*}} fcmp
49 ;SSSE3: cost of 6 {{.*}} fcmp
50 ;SSE41: cost of 6 {{.*}} fcmp
51 ;SSE42: cost of 2 {{.*}} fcmp
52 ;AVX: cost of 1 {{.*}} fcmp
53 %E = fcmp olt <4 x double> undef, undef
55 ; AVX512: cost of 1 {{.*}} %E1 = fcmp
56 %E1 = fcmp olt <16 x float> undef, undef
58 ; AVX512: cost of 1 {{.*}} %E2 = fcmp
59 %E2 = fcmp olt <8 x double> undef, undef
61 ; AVX512: cost of 2 {{.*}} %E3 = fcmp
62 %E3 = fcmp olt <16 x double> undef, undef
66 ;SSE2: cost of 1 {{.*}} icmp
67 ;SSE3: cost of 1 {{.*}} icmp
68 ;SSSE3: cost of 1 {{.*}} icmp
69 ;SSE41: cost of 1 {{.*}} icmp
70 ;SSE42: cost of 1 {{.*}} icmp
71 ;AVX: cost of 1 {{.*}} icmp
72 %F = icmp eq <16 x i8> undef, undef
74 ;SSE2: cost of 1 {{.*}} icmp
75 ;SSE3: cost of 1 {{.*}} icmp
76 ;SSSE3: cost of 1 {{.*}} icmp
77 ;SSE41: cost of 1 {{.*}} icmp
78 ;SSE42: cost of 1 {{.*}} icmp
79 ;AVX: cost of 1 {{.*}} icmp
80 %G = icmp eq <8 x i16> undef, undef
82 ;SSE2: cost of 1 {{.*}} icmp
83 ;SSE3: cost of 1 {{.*}} icmp
84 ;SSSE3: cost of 1 {{.*}} icmp
85 ;SSE41: cost of 1 {{.*}} icmp
86 ;SSE42: cost of 1 {{.*}} icmp
87 ;AVX: cost of 1 {{.*}} icmp
88 %H = icmp eq <4 x i32> undef, undef
90 ;SSE2: cost of 8 {{.*}} icmp
91 ;SSE3: cost of 8 {{.*}} icmp
92 ;SSSE3: cost of 8 {{.*}} icmp
93 ;SSE41: cost of 8 {{.*}} icmp
94 ;SSE42: cost of 1 {{.*}} icmp
95 ;AVX: cost of 1 {{.*}} icmp
96 %I = icmp eq <2 x i64> undef, undef
98 ;SSE2: cost of 16 {{.*}} icmp
99 ;SSE3: cost of 16 {{.*}} icmp
100 ;SSSE3: cost of 16 {{.*}} icmp
101 ;SSE41: cost of 16 {{.*}} icmp
102 ;SSE42: cost of 2 {{.*}} icmp
103 ;AVX1: cost of 4 {{.*}} icmp
104 ;AVX2: cost of 1 {{.*}} icmp
105 %J = icmp eq <4 x i64> undef, undef
107 ;SSE2: cost of 2 {{.*}} icmp
108 ;SSE3: cost of 2 {{.*}} icmp
109 ;SSSE3: cost of 2 {{.*}} icmp
110 ;SSE41: cost of 2 {{.*}} icmp
111 ;SSE42: cost of 2 {{.*}} icmp
112 ;AVX1: cost of 4 {{.*}} icmp
113 ;AVX2: cost of 1 {{.*}} icmp
114 %K = icmp eq <8 x i32> undef, undef
116 ;SSE2: cost of 2 {{.*}} icmp
117 ;SSE3: cost of 2 {{.*}} icmp
118 ;SSSE3: cost of 2 {{.*}} icmp
119 ;SSE41: cost of 2 {{.*}} icmp
120 ;SSE42: cost of 2 {{.*}} icmp
121 ;AVX1: cost of 4 {{.*}} icmp
122 ;AVX2: cost of 1 {{.*}} icmp
123 %L = icmp eq <16 x i16> undef, undef
125 ;SSE2: cost of 2 {{.*}} icmp
126 ;SSE3: cost of 2 {{.*}} icmp
127 ;SSSE3: cost of 2 {{.*}} icmp
128 ;SSE41: cost of 2 {{.*}} icmp
129 ;SSE42: cost of 2 {{.*}} icmp
130 ;AVX1: cost of 4 {{.*}} icmp
131 ;AVX2: cost of 1 {{.*}} icmp
132 %M = icmp eq <32 x i8> undef, undef
134 ; AVX512: cost of 1 {{.*}} %M1 = icmp
135 %M1 = icmp eq <16 x i32> undef, undef
137 ; AVX512: cost of 1 {{.*}} %M2 = icmp
138 %M2 = icmp eq <8 x i64> undef, undef
140 ; AVX512: cost of 2 {{.*}} %M3 = icmp
141 %M3 = icmp eq <16 x i64> undef, undef
143 ;CHECK: cost of 0 {{.*}} ret