1 ; RUN: opt < %s -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefix=SLM
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-linux-gnu"
7 define i8 @slm-costs_8_scalar_mul(i8 %a, i8 %b) {
9 ; SLM: cost of 1 {{.*}} mul nsw i8
10 %res = mul nsw i8 %a, %b
14 define <2 x i8> @slm-costs_8_v2_mul(<2 x i8> %a, <2 x i8> %b) {
16 ; SLM: cost of 11 {{.*}} mul nsw <2 x i8>
17 %res = mul nsw <2 x i8> %a, %b
21 define <4 x i8> @slm-costs_8_v4_mul(<4 x i8> %a, <4 x i8> %b) {
23 ; SLM: cost of 3 {{.*}} mul nsw <4 x i8>
24 %res = mul nsw <4 x i8> %a, %b
28 define <4 x i32> @slm-costs_8_v4_zext_mul(<4 x i8> %a) {
30 ; SLM: cost of 3 {{.*}} mul nsw <4 x i32>
31 %zext = zext <4 x i8> %a to <4 x i32>
32 %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 255, i32 255>
36 define <4 x i32> @slm-costs_8_v4_zext_mul_fail(<4 x i8> %a) {
38 ; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
39 %zext = zext <4 x i8> %a to <4 x i32>
40 %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 -1, i32 255>
44 define <4 x i32> @slm-costs_8_v4_zext_mul_fail_2(<4 x i8> %a) {
46 ; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
47 %zext = zext <4 x i8> %a to <4 x i32>
48 %res = mul nsw <4 x i32> %zext, <i32 255, i32 256, i32 255, i32 255>
52 define <4 x i32> @slm-costs_8_v4_sext_mul(<4 x i8> %a) {
54 ; SLM: cost of 3 {{.*}} mul nsw <4 x i32>
55 %sext = sext <4 x i8> %a to <4 x i32>
56 %res = mul nsw <4 x i32> %sext, <i32 127, i32 -128, i32 127, i32 -128>
60 define <4 x i32> @slm-costs_8_v4_sext_mul_fail(<4 x i8> %a) {
62 ; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
63 %sext = sext <4 x i8> %a to <4 x i32>
64 %res = mul nsw <4 x i32> %sext, <i32 127, i32 -128, i32 128, i32 -128>
68 define <4 x i32> @slm-costs_8_v4_sext_mul_fail_2(<4 x i8> %a) {
70 ; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
71 %sext = sext <4 x i8> %a to <4 x i32>
72 %res = mul nsw <4 x i32> %sext, <i32 127, i32 -129, i32 127, i32 -128>
76 define <8 x i8> @slm-costs_8_v8_mul(<8 x i8> %a, <8 x i8> %b) {
78 ; SLM: cost of 2 {{.*}} mul nsw <8 x i8>
79 %res = mul nsw <8 x i8> %a, %b
83 define <16 x i8> @slm-costs_8_v16_mul(<16 x i8> %a, <16 x i8> %b) {
85 ; SLM: cost of 14 {{.*}} mul nsw <16 x i8>
86 %res = mul nsw <16 x i8> %a, %b
91 define i16 @slm-costs_16_scalar_mul(i16 %a, i16 %b) {
93 ; SLM: cost of 1 {{.*}} mul nsw i16
94 %res = mul nsw i16 %a, %b
98 define <2 x i16> @slm-costs_16_v2_mul(<2 x i16> %a, <2 x i16> %b) {
100 ; SLM: cost of 11 {{.*}} mul nsw <2 x i16>
101 %res = mul nsw <2 x i16> %a, %b
105 define <4 x i16> @slm-costs_16_v4_mul(<4 x i16> %a, <4 x i16> %b) {
107 ; SLM: cost of 5 {{.*}} mul nsw <4 x i16>
108 %res = mul nsw <4 x i16> %a, %b
112 define <4 x i32> @slm-costs_16_v4_zext_mul(<4 x i16> %a) {
114 ; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
115 %zext = zext <4 x i16> %a to <4 x i32>
116 %res = mul nsw <4 x i32> %zext, <i32 65535, i32 65535, i32 65535, i32 65535>
120 define <4 x i32> @slm-costs_16_v4_zext_mul_fail(<4 x i16> %a) {
122 ; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
123 %zext = zext <4 x i16> %a to <4 x i32>
124 %res = mul nsw <4 x i32> %zext, <i32 -1, i32 65535, i32 65535, i32 65535>
128 define <4 x i32> @slm-costs_16_v4_zext_mul_fail_2(<4 x i16> %a) {
130 ; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
131 %zext = zext <4 x i16> %a to <4 x i32>
132 %res = mul nsw <4 x i32> %zext, <i32 65536, i32 65535, i32 65535, i32 65535>
136 define <4 x i32> @slm-costs_16_v4_sext_mul(<4 x i16> %a) {
138 ; SLM: cost of 5 {{.*}} mul nsw <4 x i32>
139 %sext = sext <4 x i16> %a to <4 x i32>
140 %res = mul nsw <4 x i32> %sext, <i32 32767, i32 -32768, i32 32767, i32 -32768>
144 define <4 x i32> @slm-costs_16_v4_sext_mul_fail(<4 x i16> %a) {
146 ; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
147 %sext = sext <4 x i16> %a to <4 x i32>
148 %res = mul nsw <4 x i32> %sext, <i32 32767, i32 -32768, i32 32768, i32 -32768>
152 define <4 x i32> @slm-costs_16_v4_sext_mul_fail_2(<4 x i16> %a) {
154 ; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
155 %sext = sext <4 x i16> %a to <4 x i32>
156 %res = mul nsw <4 x i32> %sext, <i32 32767, i32 -32768, i32 32767, i32 -32769>
160 define <8 x i16> @slm-costs_16_v8_mul(<8 x i16> %a, <8 x i16> %b) {
162 ; SLM: cost of 2 {{.*}} mul nsw <8 x i16>
163 %res = mul nsw <8 x i16> %a, %b
167 define <16 x i16> @slm-costs_16_v16_mul(<16 x i16> %a, <16 x i16> %b) {
169 ; SLM: cost of 4 {{.*}} mul nsw <16 x i16>
170 %res = mul nsw <16 x i16> %a, %b
175 define i32 @slm-costs_32_scalar_mul(i32 %a, i32 %b) {
177 ; SLM: cost of 1 {{.*}} mul nsw i32
178 %res = mul nsw i32 %a, %b
182 define <2 x i32> @slm-costs_32_v2_mul(<2 x i32> %a, <2 x i32> %b) {
184 ; SLM: cost of 11 {{.*}} mul nsw <2 x i32>
185 %res = mul nsw <2 x i32> %a, %b
189 define <4 x i32> @slm-costs_32_v4_mul(<4 x i32> %a, <4 x i32> %b) {
191 ; SLM: cost of 11 {{.*}} mul nsw <4 x i32>
192 %res = mul nsw <4 x i32> %a, %b
196 define <8 x i32> @slm-costs_32_v8_mul(<8 x i32> %a, <8 x i32> %b) {
198 ; SLM: cost of 22 {{.*}} mul nsw <8 x i32>
199 %res = mul nsw <8 x i32> %a, %b
203 define <16 x i32> @slm-costs_32_v16_mul(<16 x i32> %a, <16 x i32> %b) {
205 ; SLM: cost of 44 {{.*}} mul nsw <16 x i32>
206 %res = mul nsw <16 x i32> %a, %b
211 define i64 @slm-costs_64_scalar_mul(i64 %a, i64 %b) {
213 ; SLM: cost of 1 {{.*}} mul nsw i64
214 %res = mul nsw i64 %a, %b
218 define <2 x i64> @slm-costs_64_v2_mul(<2 x i64> %a, <2 x i64> %b) {
220 ; SLM: cost of 11 {{.*}} mul nsw <2 x i64>
221 %res = mul nsw <2 x i64> %a, %b
225 define <4 x i64> @slm-costs_64_v4_mul(<4 x i64> %a, <4 x i64> %b) {
227 ; SLM: cost of 22 {{.*}} mul nsw <4 x i64>
228 %res = mul nsw <4 x i64> %a, %b
232 define <8 x i64> @slm-costs_64_v8_mul(<8 x i64> %a, <8 x i64> %b) {
234 ; SLM: cost of 44 {{.*}} mul nsw <8 x i64>
235 %res = mul nsw <8 x i64> %a, %b
239 define <16 x i64> @slm-costs_64_v16_mul(<16 x i64> %a, <16 x i64> %b) {
241 ; SLM: cost of 88 {{.*}} mul nsw <16 x i64>
242 %res = mul nsw <16 x i64> %a, %b
247 define double @slm-costs_mulsd(double %a, double %b) {
249 ; SLM: cost of 2 {{.*}} fmul double
250 %res = fmul double %a, %b
255 define <2 x double> @slm-costs_mulpd(<2 x double> %a, <2 x double> %b) {
257 ; SLM: cost of 4 {{.*}} fmul <2 x double>
258 %res = fmul <2 x double> %a, %b
259 ret <2 x double> %res
263 define <4 x float> @slm-costs_mulps(<4 x float> %a, <4 x float> %b) {
265 ; SLM: cost of 2 {{.*}} fmul <4 x float>
266 %res = fmul <4 x float> %a, %b
271 define float @slm-costs_divss(float %a, float %b) {
273 ; SLM: cost of 17 {{.*}} fdiv float
274 %res = fdiv float %a, %b
279 define <4 x float> @slm-costs_divps(<4 x float> %a, <4 x float> %b) {
281 ; SLM: cost of 39 {{.*}} fdiv <4 x float>
282 %res = fdiv <4 x float> %a, %b
287 define double @slm-costs_divsd(double %a, double %b) {
289 ; SLM: cost of 32 {{.*}} fdiv double
290 %res = fdiv double %a, %b
295 define <2 x double> @slm-costs_divpd(<2 x double> %a, <2 x double> %b) {
297 ; SLM: cost of 69 {{.*}} fdiv <2 x double>
298 %res = fdiv <2 x double> %a, %b
299 ret <2 x double> %res
303 define <2 x double> @slm-costs_addpd(<2 x double> %a, <2 x double> %b) {
305 ; SLM: cost of 2 {{.*}} fadd <2 x double>
306 %res = fadd <2 x double> %a, %b
307 ret <2 x double> %res
311 define <2 x double> @slm-costs_subpd(<2 x double> %a, <2 x double> %b) {
313 ; SLM: cost of 2 {{.*}} fsub <2 x double>
314 %res = fsub <2 x double> %a, %b
315 ret <2 x double> %res