1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK -check-prefix=XOP --check-prefix=XOPAVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK -check-prefix=XOP --check-prefix=XOPAVX2
7 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
8 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
9 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
11 ; Verify the cost of vector arithmetic shift right instructions.
17 define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
18 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
19 ; SSE2: Found an estimated cost of 12 for instruction: %shift
20 ; SSE41: Found an estimated cost of 12 for instruction: %shift
21 ; AVX: Found an estimated cost of 12 for instruction: %shift
22 ; AVX2: Found an estimated cost of 4 for instruction: %shift
23 ; AVX512: Found an estimated cost of 4 for instruction: %shift
24 ; XOP: Found an estimated cost of 2 for instruction: %shift
25 %shift = ashr <2 x i64> %a, %b
29 define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
30 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
31 ; SSE2: Found an estimated cost of 24 for instruction: %shift
32 ; SSE41: Found an estimated cost of 24 for instruction: %shift
33 ; AVX: Found an estimated cost of 24 for instruction: %shift
34 ; AVX2: Found an estimated cost of 4 for instruction: %shift
35 ; AVX512: Found an estimated cost of 4 for instruction: %shift
36 ; XOP: Found an estimated cost of 4 for instruction: %shift
37 %shift = ashr <4 x i64> %a, %b
41 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
42 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64':
43 ; SSE2: Found an estimated cost of 48 for instruction: %shift
44 ; SSE41: Found an estimated cost of 48 for instruction: %shift
45 ; AVX: Found an estimated cost of 48 for instruction: %shift
46 ; AVX2: Found an estimated cost of 8 for instruction: %shift
47 ; AVX512: Found an estimated cost of 1 for instruction: %shift
48 ; XOP: Found an estimated cost of 8 for instruction: %shift
49 %shift = ashr <8 x i64> %a, %b
53 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
54 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
55 ; SSE2: Found an estimated cost of 16 for instruction: %shift
56 ; SSE41: Found an estimated cost of 12 for instruction: %shift
57 ; AVX: Found an estimated cost of 12 for instruction: %shift
58 ; AVX2: Found an estimated cost of 1 for instruction: %shift
59 ; AVX512: Found an estimated cost of 1 for instruction: %shift
60 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
61 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
62 %shift = ashr <4 x i32> %a, %b
66 define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
67 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
68 ; SSE2: Found an estimated cost of 32 for instruction: %shift
69 ; SSE41: Found an estimated cost of 24 for instruction: %shift
70 ; AVX: Found an estimated cost of 24 for instruction: %shift
71 ; AVX2: Found an estimated cost of 1 for instruction: %shift
72 ; AVX512: Found an estimated cost of 1 for instruction: %shift
73 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
74 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
75 %shift = ashr <8 x i32> %a, %b
79 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
80 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
81 ; SSE2: Found an estimated cost of 64 for instruction: %shift
82 ; SSE41: Found an estimated cost of 48 for instruction: %shift
83 ; AVX: Found an estimated cost of 48 for instruction: %shift
84 ; AVX2: Found an estimated cost of 2 for instruction: %shift
85 ; AVX512: Found an estimated cost of 1 for instruction: %shift
86 ; XOPAVX: Found an estimated cost of 8 for instruction: %shift
87 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
88 %shift = ashr <16 x i32> %a, %b
92 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
93 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
94 ; SSE2: Found an estimated cost of 32 for instruction: %shift
95 ; SSE41: Found an estimated cost of 14 for instruction: %shift
96 ; AVX: Found an estimated cost of 14 for instruction: %shift
97 ; AVX2: Found an estimated cost of 14 for instruction: %shift
98 ; AVX512: Found an estimated cost of 14 for instruction: %shift
99 ; XOP: Found an estimated cost of 2 for instruction: %shift
100 %shift = ashr <8 x i16> %a, %b
104 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
105 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
106 ; SSE2: Found an estimated cost of 64 for instruction: %shift
107 ; SSE41: Found an estimated cost of 28 for instruction: %shift
108 ; AVX: Found an estimated cost of 28 for instruction: %shift
109 ; AVX2: Found an estimated cost of 10 for instruction: %shift
110 ; AVX512: Found an estimated cost of 10 for instruction: %shift
111 ; XOP: Found an estimated cost of 4 for instruction: %shift
112 %shift = ashr <16 x i16> %a, %b
113 ret <16 x i16> %shift
116 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
117 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16':
118 ; SSE2: Found an estimated cost of 128 for instruction: %shift
119 ; SSE41: Found an estimated cost of 56 for instruction: %shift
120 ; AVX: Found an estimated cost of 56 for instruction: %shift
121 ; AVX2: Found an estimated cost of 20 for instruction: %shift
122 ; AVX512F: Found an estimated cost of 20 for instruction: %shift
123 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
124 ; XOP: Found an estimated cost of 8 for instruction: %shift
125 %shift = ashr <32 x i16> %a, %b
126 ret <32 x i16> %shift
129 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
130 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
131 ; SSE2: Found an estimated cost of 54 for instruction: %shift
132 ; SSE41: Found an estimated cost of 24 for instruction: %shift
133 ; AVX: Found an estimated cost of 24 for instruction: %shift
134 ; AVX2: Found an estimated cost of 24 for instruction: %shift
135 ; AVX512: Found an estimated cost of 24 for instruction: %shift
136 ; XOP: Found an estimated cost of 2 for instruction: %shift
137 %shift = ashr <16 x i8> %a, %b
141 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
142 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
143 ; SSE2: Found an estimated cost of 108 for instruction: %shift
144 ; SSE41: Found an estimated cost of 48 for instruction: %shift
145 ; AVX: Found an estimated cost of 48 for instruction: %shift
146 ; AVX2: Found an estimated cost of 24 for instruction: %shift
147 ; AVX512F: Found an estimated cost of 24 for instruction: %shift
148 ; AVX512BW: Found an estimated cost of 24 for instruction: %shift
149 ; XOP: Found an estimated cost of 4 for instruction: %shift
150 %shift = ashr <32 x i8> %a, %b
154 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
155 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8':
156 ; SSE2: Found an estimated cost of 216 for instruction: %shift
157 ; SSE41: Found an estimated cost of 96 for instruction: %shift
158 ; AVX: Found an estimated cost of 96 for instruction: %shift
159 ; AVX2: Found an estimated cost of 48 for instruction: %shift
160 ; AVX512F: Found an estimated cost of 48 for instruction: %shift
161 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
162 ; XOP: Found an estimated cost of 8 for instruction: %shift
163 %shift = ashr <64 x i8> %a, %b
168 ; Uniform Variable Shifts
171 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
172 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
173 ; SSE2: Found an estimated cost of 12 for instruction: %shift
174 ; SSE41: Found an estimated cost of 12 for instruction: %shift
175 ; AVX: Found an estimated cost of 12 for instruction: %shift
176 ; AVX2: Found an estimated cost of 4 for instruction: %shift
177 ; AVX512: Found an estimated cost of 4 for instruction: %shift
178 ; XOP: Found an estimated cost of 2 for instruction: %shift
179 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
180 %shift = ashr <2 x i64> %a, %splat
184 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
185 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
186 ; SSE2: Found an estimated cost of 24 for instruction: %shift
187 ; SSE41: Found an estimated cost of 24 for instruction: %shift
188 ; AVX: Found an estimated cost of 24 for instruction: %shift
189 ; AVX2: Found an estimated cost of 4 for instruction: %shift
190 ; AVX512: Found an estimated cost of 4 for instruction: %shift
191 ; XOP: Found an estimated cost of 4 for instruction: %shift
192 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
193 %shift = ashr <4 x i64> %a, %splat
197 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
198 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64':
199 ; SSE2: Found an estimated cost of 48 for instruction: %shift
200 ; SSE41: Found an estimated cost of 48 for instruction: %shift
201 ; AVX: Found an estimated cost of 48 for instruction: %shift
202 ; AVX2: Found an estimated cost of 8 for instruction: %shift
203 ; AVX512: Found an estimated cost of 1 for instruction: %shift
204 ; XOP: Found an estimated cost of 8 for instruction: %shift
205 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
206 %shift = ashr <8 x i64> %a, %splat
210 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
211 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
212 ; SSE2: Found an estimated cost of 16 for instruction: %shift
213 ; SSE41: Found an estimated cost of 12 for instruction: %shift
214 ; AVX: Found an estimated cost of 12 for instruction: %shift
215 ; AVX2: Found an estimated cost of 1 for instruction: %shift
216 ; AVX512: Found an estimated cost of 1 for instruction: %shift
217 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
218 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
219 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
220 %shift = ashr <4 x i32> %a, %splat
224 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
225 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
226 ; SSE2: Found an estimated cost of 32 for instruction: %shift
227 ; SSE41: Found an estimated cost of 24 for instruction: %shift
228 ; AVX: Found an estimated cost of 24 for instruction: %shift
229 ; AVX2: Found an estimated cost of 1 for instruction: %shift
230 ; AVX512: Found an estimated cost of 1 for instruction: %shift
231 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
232 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
233 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
234 %shift = ashr <8 x i32> %a, %splat
238 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
239 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
240 ; SSE2: Found an estimated cost of 64 for instruction: %shift
241 ; SSE41: Found an estimated cost of 48 for instruction: %shift
242 ; AVX: Found an estimated cost of 48 for instruction: %shift
243 ; AVX2: Found an estimated cost of 2 for instruction: %shift
244 ; AVX512: Found an estimated cost of 1 for instruction: %shift
245 ; XOPAVX: Found an estimated cost of 8 for instruction: %shift
246 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
247 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
248 %shift = ashr <16 x i32> %a, %splat
249 ret <16 x i32> %shift
252 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
253 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
254 ; SSE2: Found an estimated cost of 32 for instruction: %shift
255 ; SSE41: Found an estimated cost of 14 for instruction: %shift
256 ; AVX: Found an estimated cost of 14 for instruction: %shift
257 ; AVX2: Found an estimated cost of 14 for instruction: %shift
258 ; AVX512: Found an estimated cost of 14 for instruction: %shift
259 ; XOP: Found an estimated cost of 2 for instruction: %shift
260 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
261 %shift = ashr <8 x i16> %a, %splat
265 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
266 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
267 ; SSE2: Found an estimated cost of 64 for instruction: %shift
268 ; SSE41: Found an estimated cost of 28 for instruction: %shift
269 ; AVX: Found an estimated cost of 28 for instruction: %shift
270 ; AVX2: Found an estimated cost of 10 for instruction: %shift
271 ; AVX512: Found an estimated cost of 10 for instruction: %shift
272 ; XOP: Found an estimated cost of 4 for instruction: %shift
273 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
274 %shift = ashr <16 x i16> %a, %splat
275 ret <16 x i16> %shift
278 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
279 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16':
280 ; SSE2: Found an estimated cost of 128 for instruction: %shift
281 ; SSE41: Found an estimated cost of 56 for instruction: %shift
282 ; AVX: Found an estimated cost of 56 for instruction: %shift
283 ; AVX2: Found an estimated cost of 20 for instruction: %shift
284 ; AVX512F: Found an estimated cost of 20 for instruction: %shift
285 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
286 ; XOP: Found an estimated cost of 8 for instruction: %shift
287 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
288 %shift = ashr <32 x i16> %a, %splat
289 ret <32 x i16> %shift
292 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
293 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
294 ; SSE2: Found an estimated cost of 54 for instruction: %shift
295 ; SSE41: Found an estimated cost of 24 for instruction: %shift
296 ; AVX: Found an estimated cost of 24 for instruction: %shift
297 ; AVX2: Found an estimated cost of 24 for instruction: %shift
298 ; AVX512: Found an estimated cost of 24 for instruction: %shift
299 ; XOP: Found an estimated cost of 2 for instruction: %shift
300 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
301 %shift = ashr <16 x i8> %a, %splat
305 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
306 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
307 ; SSE2: Found an estimated cost of 108 for instruction: %shift
308 ; SSE41: Found an estimated cost of 48 for instruction: %shift
309 ; AVX: Found an estimated cost of 48 for instruction: %shift
310 ; AVX2: Found an estimated cost of 24 for instruction: %shift
311 ; AVX512: Found an estimated cost of 24 for instruction: %shift
312 ; XOP: Found an estimated cost of 4 for instruction: %shift
313 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
314 %shift = ashr <32 x i8> %a, %splat
318 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
319 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8':
320 ; SSE2: Found an estimated cost of 216 for instruction: %shift
321 ; SSE41: Found an estimated cost of 96 for instruction: %shift
322 ; AVX: Found an estimated cost of 96 for instruction: %shift
323 ; AVX2: Found an estimated cost of 48 for instruction: %shift
324 ; AVX512F: Found an estimated cost of 48 for instruction: %shift
325 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
326 ; XOP: Found an estimated cost of 8 for instruction: %shift
327 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
328 %shift = ashr <64 x i8> %a, %splat
336 define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
337 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
338 ; SSE2: Found an estimated cost of 12 for instruction: %shift
339 ; SSE41: Found an estimated cost of 12 for instruction: %shift
340 ; AVX: Found an estimated cost of 12 for instruction: %shift
341 ; AVX2: Found an estimated cost of 4 for instruction: %shift
342 ; AVX512: Found an estimated cost of 4 for instruction: %shift
343 ; XOP: Found an estimated cost of 2 for instruction: %shift
344 %shift = ashr <2 x i64> %a, <i64 1, i64 7>
348 define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
349 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
350 ; SSE2: Found an estimated cost of 24 for instruction: %shift
351 ; SSE41: Found an estimated cost of 24 for instruction: %shift
352 ; AVX: Found an estimated cost of 24 for instruction: %shift
353 ; AVX2: Found an estimated cost of 4 for instruction: %shift
354 ; AVX512: Found an estimated cost of 4 for instruction: %shift
355 ; XOP: Found an estimated cost of 4 for instruction: %shift
356 %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
360 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
361 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64':
362 ; SSE2: Found an estimated cost of 48 for instruction: %shift
363 ; SSE41: Found an estimated cost of 48 for instruction: %shift
364 ; AVX: Found an estimated cost of 48 for instruction: %shift
365 ; AVX2: Found an estimated cost of 8 for instruction: %shift
366 ; AVX512: Found an estimated cost of 1 for instruction: %shift
367 ; XOP: Found an estimated cost of 8 for instruction: %shift
368 %shift = ashr <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
372 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
373 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
374 ; SSE2: Found an estimated cost of 16 for instruction: %shift
375 ; SSE41: Found an estimated cost of 12 for instruction: %shift
376 ; AVX: Found an estimated cost of 12 for instruction: %shift
377 ; AVX2: Found an estimated cost of 1 for instruction: %shift
378 ; AVX512: Found an estimated cost of 1 for instruction: %shift
379 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
380 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
381 %shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
385 define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
386 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
387 ; SSE2: Found an estimated cost of 32 for instruction: %shift
388 ; SSE41: Found an estimated cost of 24 for instruction: %shift
389 ; AVX: Found an estimated cost of 24 for instruction: %shift
390 ; AVX2: Found an estimated cost of 1 for instruction: %shift
391 ; AVX512: Found an estimated cost of 1 for instruction: %shift
392 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
393 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
394 %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
398 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
399 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32':
400 ; SSE2: Found an estimated cost of 64 for instruction: %shift
401 ; SSE41: Found an estimated cost of 48 for instruction: %shift
402 ; AVX: Found an estimated cost of 48 for instruction: %shift
403 ; AVX2: Found an estimated cost of 2 for instruction: %shift
404 ; AVX512: Found an estimated cost of 1 for instruction: %shift
405 ; XOPAVX: Found an estimated cost of 8 for instruction: %shift
406 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
407 %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
408 ret <16 x i32> %shift
411 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
412 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
413 ; SSE2: Found an estimated cost of 32 for instruction: %shift
414 ; SSE41: Found an estimated cost of 14 for instruction: %shift
415 ; AVX: Found an estimated cost of 14 for instruction: %shift
416 ; AVX2: Found an estimated cost of 14 for instruction: %shift
417 ; AVX512: Found an estimated cost of 14 for instruction: %shift
418 ; XOP: Found an estimated cost of 2 for instruction: %shift
419 %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
423 define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
424 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
425 ; SSE2: Found an estimated cost of 64 for instruction: %shift
426 ; SSE41: Found an estimated cost of 28 for instruction: %shift
427 ; AVX: Found an estimated cost of 28 for instruction: %shift
428 ; AVX2: Found an estimated cost of 10 for instruction: %shift
429 ; AVX512: Found an estimated cost of 10 for instruction: %shift
430 ; XOP: Found an estimated cost of 4 for instruction: %shift
431 %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
432 ret <16 x i16> %shift
435 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
436 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16':
437 ; SSE2: Found an estimated cost of 128 for instruction: %shift
438 ; SSE41: Found an estimated cost of 56 for instruction: %shift
439 ; AVX: Found an estimated cost of 56 for instruction: %shift
440 ; AVX2: Found an estimated cost of 20 for instruction: %shift
441 ; AVX512F: Found an estimated cost of 20 for instruction: %shift
442 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
443 ; XOP: Found an estimated cost of 8 for instruction: %shift
444 %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
445 ret <32 x i16> %shift
448 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
449 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
450 ; SSE2: Found an estimated cost of 54 for instruction: %shift
451 ; SSE41: Found an estimated cost of 24 for instruction: %shift
452 ; AVX: Found an estimated cost of 24 for instruction: %shift
453 ; AVX2: Found an estimated cost of 24 for instruction: %shift
454 ; AVX512: Found an estimated cost of 24 for instruction: %shift
455 ; XOP: Found an estimated cost of 2 for instruction: %shift
456 %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
460 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
461 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
462 ; SSE2: Found an estimated cost of 108 for instruction: %shift
463 ; SSE41: Found an estimated cost of 48 for instruction: %shift
464 ; AVX: Found an estimated cost of 48 for instruction: %shift
465 ; AVX2: Found an estimated cost of 24 for instruction: %shift
466 ; AVX512: Found an estimated cost of 24 for instruction: %shift
467 ; XOP: Found an estimated cost of 4 for instruction: %shift
468 %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
472 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
473 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8':
474 ; SSE2: Found an estimated cost of 216 for instruction: %shift
475 ; SSE41: Found an estimated cost of 96 for instruction: %shift
476 ; AVX: Found an estimated cost of 96 for instruction: %shift
477 ; AVX2: Found an estimated cost of 48 for instruction: %shift
478 ; AVX512F: Found an estimated cost of 48 for instruction: %shift
479 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
480 ; XOP: Found an estimated cost of 8 for instruction: %shift
481 %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
486 ; Uniform Constant Shifts
489 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
490 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
491 ; SSE2: Found an estimated cost of 4 for instruction: %shift
492 ; SSE41: Found an estimated cost of 4 for instruction: %shift
493 ; AVX: Found an estimated cost of 4 for instruction: %shift
494 ; AVX2: Found an estimated cost of 4 for instruction: %shift
495 ; AVX512: Found an estimated cost of 4 for instruction: %shift
496 ; XOP: Found an estimated cost of 2 for instruction: %shift
497 %shift = ashr <2 x i64> %a, <i64 7, i64 7>
501 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
502 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
503 ; SSE2: Found an estimated cost of 8 for instruction: %shift
504 ; SSE41: Found an estimated cost of 8 for instruction: %shift
505 ; AVX: Found an estimated cost of 8 for instruction: %shift
506 ; AVX2: Found an estimated cost of 4 for instruction: %shift
507 ; AVX512: Found an estimated cost of 4 for instruction: %shift
508 ; XOP: Found an estimated cost of 4 for instruction: %shift
509 %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
513 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
514 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64':
515 ; SSE2: Found an estimated cost of 16 for instruction: %shift
516 ; SSE41: Found an estimated cost of 16 for instruction: %shift
517 ; AVX: Found an estimated cost of 16 for instruction: %shift
518 ; AVX2: Found an estimated cost of 8 for instruction: %shift
519 ; AVX512: Found an estimated cost of 1 for instruction: %shift
520 ; XOP: Found an estimated cost of 8 for instruction: %shift
521 %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
525 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
526 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
527 ; SSE2: Found an estimated cost of 1 for instruction: %shift
528 ; SSE41: Found an estimated cost of 1 for instruction: %shift
529 ; AVX: Found an estimated cost of 1 for instruction: %shift
530 ; AVX2: Found an estimated cost of 1 for instruction: %shift
531 ; AVX512: Found an estimated cost of 1 for instruction: %shift
532 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
533 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
534 %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
538 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
539 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
540 ; SSE2: Found an estimated cost of 2 for instruction: %shift
541 ; SSE41: Found an estimated cost of 2 for instruction: %shift
542 ; AVX: Found an estimated cost of 2 for instruction: %shift
543 ; AVX2: Found an estimated cost of 1 for instruction: %shift
544 ; AVX512: Found an estimated cost of 1 for instruction: %shift
545 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
546 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
547 %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
551 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
552 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32':
553 ; SSE2: Found an estimated cost of 4 for instruction: %shift
554 ; SSE41: Found an estimated cost of 4 for instruction: %shift
555 ; AVX: Found an estimated cost of 4 for instruction: %shift
556 ; AVX2: Found an estimated cost of 2 for instruction: %shift
557 ; AVX512: Found an estimated cost of 1 for instruction: %shift
558 ; XOPAVX: Found an estimated cost of 8 for instruction: %shift
559 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
560 %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
561 ret <16 x i32> %shift
564 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
565 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
566 ; SSE2: Found an estimated cost of 1 for instruction: %shift
567 ; SSE41: Found an estimated cost of 1 for instruction: %shift
568 ; AVX: Found an estimated cost of 1 for instruction: %shift
569 ; AVX2: Found an estimated cost of 1 for instruction: %shift
570 ; AVX512: Found an estimated cost of 1 for instruction: %shift
571 ; XOP: Found an estimated cost of 2 for instruction: %shift
572 %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
576 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
577 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
578 ; SSE2: Found an estimated cost of 2 for instruction: %shift
579 ; SSE41: Found an estimated cost of 2 for instruction: %shift
580 ; AVX: Found an estimated cost of 2 for instruction: %shift
581 ; AVX2: Found an estimated cost of 10 for instruction: %shift
582 ; AVX512: Found an estimated cost of 10 for instruction: %shift
583 ; XOP: Found an estimated cost of 4 for instruction: %shift
584 %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
585 ret <16 x i16> %shift
588 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
589 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16':
590 ; SSE2: Found an estimated cost of 4 for instruction: %shift
591 ; SSE41: Found an estimated cost of 4 for instruction: %shift
592 ; AVX: Found an estimated cost of 4 for instruction: %shift
593 ; AVX2: Found an estimated cost of 20 for instruction: %shift
594 ; AVX512F: Found an estimated cost of 20 for instruction: %shift
595 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
596 ; XOP: Found an estimated cost of 8 for instruction: %shift
597 %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
598 ret <32 x i16> %shift
601 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
602 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
603 ; SSE2: Found an estimated cost of 4 for instruction: %shift
604 ; SSE41: Found an estimated cost of 4 for instruction: %shift
605 ; AVX: Found an estimated cost of 4 for instruction: %shift
606 ; AVX2: Found an estimated cost of 4 for instruction: %shift
607 ; AVX512: Found an estimated cost of 4 for instruction: %shift
608 ; XOP: Found an estimated cost of 2 for instruction: %shift
609 %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
613 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
614 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
615 ; SSE2: Found an estimated cost of 8 for instruction: %shift
616 ; SSE41: Found an estimated cost of 8 for instruction: %shift
617 ; AVX: Found an estimated cost of 8 for instruction: %shift
618 ; AVX2: Found an estimated cost of 24 for instruction: %shift
619 ; AVX512: Found an estimated cost of 24 for instruction: %shift
620 ; XOP: Found an estimated cost of 4 for instruction: %shift
621 %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
625 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
626 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
627 ; SSE2: Found an estimated cost of 16 for instruction: %shift
628 ; SSE41: Found an estimated cost of 16 for instruction: %shift
629 ; AVX: Found an estimated cost of 16 for instruction: %shift
630 ; AVX2: Found an estimated cost of 48 for instruction: %shift
631 ; AVX512F: Found an estimated cost of 48 for instruction: %shift
632 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
633 ; XOP: Found an estimated cost of 8 for instruction: %shift
634 %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>