1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX2
7 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
8 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
9 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
10 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
11 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq,+avx512vl -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
12 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw,+avx512vl -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BWVL
14 ; Verify the cost of vector shift left instructions.
21 define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
22 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
23 ; SSE2: Found an estimated cost of 4 for instruction: %shift
24 ; SSE41: Found an estimated cost of 4 for instruction: %shift
25 ; AVX: Found an estimated cost of 4 for instruction: %shift
26 ; AVX2: Found an estimated cost of 1 for instruction: %shift
27 ; AVX512: Found an estimated cost of 1 for instruction: %shift
28 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
29 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
30 %shift = shl <2 x i64> %a, %b
34 define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
35 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
36 ; SSE2: Found an estimated cost of 8 for instruction: %shift
37 ; SSE41: Found an estimated cost of 8 for instruction: %shift
38 ; AVX: Found an estimated cost of 8 for instruction: %shift
39 ; AVX2: Found an estimated cost of 1 for instruction: %shift
40 ; AVX512: Found an estimated cost of 1 for instruction: %shift
41 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
42 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
43 %shift = shl <4 x i64> %a, %b
47 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
48 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64':
49 ; SSE2: Found an estimated cost of 16 for instruction: %shift
50 ; SSE41: Found an estimated cost of 16 for instruction: %shift
51 ; AVX: Found an estimated cost of 16 for instruction: %shift
52 ; AVX2: Found an estimated cost of 2 for instruction: %shift
53 ; AVX512: Found an estimated cost of 1 for instruction: %shift
54 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
55 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
56 %shift = shl <8 x i64> %a, %b
60 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
61 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
62 ; SSE2: Found an estimated cost of 10 for instruction: %shift
63 ; SSE41: Found an estimated cost of 4 for instruction: %shift
64 ; AVX: Found an estimated cost of 4 for instruction: %shift
65 ; AVX2: Found an estimated cost of 1 for instruction: %shift
66 ; AVX512: Found an estimated cost of 1 for instruction: %shift
67 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
68 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
69 %shift = shl <4 x i32> %a, %b
73 define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
74 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
75 ; SSE2: Found an estimated cost of 20 for instruction: %shift
76 ; SSE41: Found an estimated cost of 8 for instruction: %shift
77 ; AVX: Found an estimated cost of 8 for instruction: %shift
78 ; AVX2: Found an estimated cost of 1 for instruction: %shift
79 ; AVX512: Found an estimated cost of 1 for instruction: %shift
80 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
81 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
82 %shift = shl <8 x i32> %a, %b
86 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
87 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
88 ; SSE2: Found an estimated cost of 40 for instruction: %shift
89 ; SSE41: Found an estimated cost of 16 for instruction: %shift
90 ; AVX: Found an estimated cost of 16 for instruction: %shift
91 ; AVX2: Found an estimated cost of 2 for instruction: %shift
92 ; AVX512: Found an estimated cost of 1 for instruction: %shift
93 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
94 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
95 %shift = shl <16 x i32> %a, %b
99 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
100 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
101 ; SSE2: Found an estimated cost of 32 for instruction: %shift
102 ; SSE41: Found an estimated cost of 14 for instruction: %shift
103 ; AVX: Found an estimated cost of 14 for instruction: %shift
104 ; AVX2: Found an estimated cost of 14 for instruction: %shift
105 ; AVX512: Found an estimated cost of 14 for instruction: %shift
106 ; XOP: Found an estimated cost of 1 for instruction: %shift
107 %shift = shl <8 x i16> %a, %b
111 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
112 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
113 ; SSE2: Found an estimated cost of 64 for instruction: %shift
114 ; SSE41: Found an estimated cost of 28 for instruction: %shift
115 ; AVX: Found an estimated cost of 28 for instruction: %shift
116 ; AVX2: Found an estimated cost of 10 for instruction: %shift
117 ; AVX512: Found an estimated cost of 10 for instruction: %shift
118 ; XOP: Found an estimated cost of 2 for instruction: %shift
119 %shift = shl <16 x i16> %a, %b
120 ret <16 x i16> %shift
123 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
124 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16':
125 ; SSE2: Found an estimated cost of 128 for instruction: %shift
126 ; SSE41: Found an estimated cost of 56 for instruction: %shift
127 ; AVX: Found an estimated cost of 56 for instruction: %shift
128 ; AVX2: Found an estimated cost of 20 for instruction: %shift
129 ; AVX512F: Found an estimated cost of 20 for instruction: %shift
130 ; AVX512BW: Found an estimated cost of 1 for instruction: %shift
131 ; XOP: Found an estimated cost of 4 for instruction: %shift
132 %shift = shl <32 x i16> %a, %b
133 ret <32 x i16> %shift
136 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
137 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
138 ; SSE2: Found an estimated cost of 26 for instruction: %shift
139 ; SSE41: Found an estimated cost of 11 for instruction: %shift
140 ; AVX: Found an estimated cost of 11 for instruction: %shift
141 ; AVX2: Found an estimated cost of 11 for instruction: %shift
142 ; AVX512: Found an estimated cost of 11 for instruction: %shift
143 ; XOP: Found an estimated cost of 1 for instruction: %shift
144 %shift = shl <16 x i8> %a, %b
148 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
149 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
150 ; SSE2: Found an estimated cost of 52 for instruction: %shift
151 ; SSE41: Found an estimated cost of 22 for instruction: %shift
152 ; AVX: Found an estimated cost of 22 for instruction: %shift
153 ; AVX2: Found an estimated cost of 11 for instruction: %shift
154 ; AVX512: Found an estimated cost of 11 for instruction: %shift
155 ; XOP: Found an estimated cost of 2 for instruction: %shift
156 %shift = shl <32 x i8> %a, %b
160 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
161 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8':
162 ; SSE2: Found an estimated cost of 104 for instruction: %shift
163 ; SSE41: Found an estimated cost of 44 for instruction: %shift
164 ; AVX: Found an estimated cost of 44 for instruction: %shift
165 ; AVX2: Found an estimated cost of 22 for instruction: %shift
166 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
167 ; AVX512BW: Found an estimated cost of 11 for instruction: %shift
168 ; AVX512VL: Found an estimated cost of 22 for instruction: %shift
169 ; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift
170 ; XOP: Found an estimated cost of 4 for instruction: %shift
171 %shift = shl <64 x i8> %a, %b
176 ; Uniform Variable Shifts
179 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
180 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
181 ; SSE2: Found an estimated cost of 4 for instruction: %shift
182 ; SSE41: Found an estimated cost of 4 for instruction: %shift
183 ; AVX: Found an estimated cost of 4 for instruction: %shift
184 ; AVX2: Found an estimated cost of 1 for instruction: %shift
185 ; AVX512: Found an estimated cost of 1 for instruction: %shift
186 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
187 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
188 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
189 %shift = shl <2 x i64> %a, %splat
193 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
194 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
195 ; SSE2: Found an estimated cost of 8 for instruction: %shift
196 ; SSE41: Found an estimated cost of 8 for instruction: %shift
197 ; AVX: Found an estimated cost of 8 for instruction: %shift
198 ; AVX2: Found an estimated cost of 1 for instruction: %shift
199 ; AVX512: Found an estimated cost of 1 for instruction: %shift
200 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
201 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
202 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
203 %shift = shl <4 x i64> %a, %splat
207 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
208 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64':
209 ; SSE2: Found an estimated cost of 16 for instruction: %shift
210 ; SSE41: Found an estimated cost of 16 for instruction: %shift
211 ; AVX: Found an estimated cost of 16 for instruction: %shift
212 ; AVX2: Found an estimated cost of 2 for instruction: %shift
213 ; AVX512: Found an estimated cost of 1 for instruction: %shift
214 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
215 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
216 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
217 %shift = shl <8 x i64> %a, %splat
221 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
222 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
223 ; SSE2: Found an estimated cost of 10 for instruction: %shift
224 ; SSE41: Found an estimated cost of 4 for instruction: %shift
225 ; AVX: Found an estimated cost of 4 for instruction: %shift
226 ; AVX2: Found an estimated cost of 1 for instruction: %shift
227 ; AVX512: Found an estimated cost of 1 for instruction: %shift
228 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
229 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
230 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
231 %shift = shl <4 x i32> %a, %splat
235 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
236 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
237 ; SSE2: Found an estimated cost of 20 for instruction: %shift
238 ; SSE41: Found an estimated cost of 8 for instruction: %shift
239 ; AVX: Found an estimated cost of 8 for instruction: %shift
240 ; AVX2: Found an estimated cost of 1 for instruction: %shift
241 ; AVX512: Found an estimated cost of 1 for instruction: %shift
242 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
243 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
244 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
245 %shift = shl <8 x i32> %a, %splat
249 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
250 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
251 ; SSE2: Found an estimated cost of 40 for instruction: %shift
252 ; SSE41: Found an estimated cost of 16 for instruction: %shift
253 ; AVX: Found an estimated cost of 16 for instruction: %shift
254 ; AVX2: Found an estimated cost of 2 for instruction: %shift
255 ; AVX512: Found an estimated cost of 1 for instruction: %shift
256 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
257 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
258 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
259 %shift = shl <16 x i32> %a, %splat
260 ret <16 x i32> %shift
263 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
264 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
265 ; SSE2: Found an estimated cost of 32 for instruction: %shift
266 ; SSE41: Found an estimated cost of 14 for instruction: %shift
267 ; AVX: Found an estimated cost of 14 for instruction: %shift
268 ; AVX2: Found an estimated cost of 14 for instruction: %shift
269 ; AVX512: Found an estimated cost of 14 for instruction: %shift
270 ; XOP: Found an estimated cost of 1 for instruction: %shift
271 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
272 %shift = shl <8 x i16> %a, %splat
276 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
277 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
278 ; SSE2: Found an estimated cost of 64 for instruction: %shift
279 ; SSE41: Found an estimated cost of 28 for instruction: %shift
280 ; AVX: Found an estimated cost of 28 for instruction: %shift
281 ; AVX2: Found an estimated cost of 10 for instruction: %shift
282 ; AVX512: Found an estimated cost of 10 for instruction: %shift
283 ; XOP: Found an estimated cost of 2 for instruction: %shift
284 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
285 %shift = shl <16 x i16> %a, %splat
286 ret <16 x i16> %shift
289 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
290 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16':
291 ; SSE2: Found an estimated cost of 128 for instruction: %shift
292 ; SSE41: Found an estimated cost of 56 for instruction: %shift
293 ; AVX: Found an estimated cost of 56 for instruction: %shift
294 ; AVX2: Found an estimated cost of 20 for instruction: %shift
295 ; AVX512F: Found an estimated cost of 20 for instruction: %shift
296 ; AVX512BW: Found an estimated cost of 1 for instruction: %shift
297 ; AVX512VL: Found an estimated cost of 20 for instruction: %shift
298 ; AVX512BWVL: Found an estimated cost of 1 for instruction: %shift
299 ; XOP: Found an estimated cost of 4 for instruction: %shift
300 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
301 %shift = shl <32 x i16> %a, %splat
302 ret <32 x i16> %shift
305 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
306 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
307 ; SSE2: Found an estimated cost of 26 for instruction: %shift
308 ; SSE41: Found an estimated cost of 11 for instruction: %shift
309 ; AVX: Found an estimated cost of 11 for instruction: %shift
310 ; AVX2: Found an estimated cost of 11 for instruction: %shift
311 ; AVX512: Found an estimated cost of 11 for instruction: %shift
312 ; XOP: Found an estimated cost of 1 for instruction: %shift
313 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
314 %shift = shl <16 x i8> %a, %splat
318 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
319 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
320 ; SSE2: Found an estimated cost of 52 for instruction: %shift
321 ; SSE41: Found an estimated cost of 22 for instruction: %shift
322 ; AVX: Found an estimated cost of 22 for instruction: %shift
323 ; AVX2: Found an estimated cost of 11 for instruction: %shift
324 ; AVX512: Found an estimated cost of 11 for instruction: %shift
325 ; XOP: Found an estimated cost of 2 for instruction: %shift
326 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
327 %shift = shl <32 x i8> %a, %splat
331 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
332 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8':
333 ; SSE2: Found an estimated cost of 104 for instruction: %shift
334 ; SSE41: Found an estimated cost of 44 for instruction: %shift
335 ; AVX: Found an estimated cost of 44 for instruction: %shift
336 ; AVX2: Found an estimated cost of 22 for instruction: %shift
337 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
338 ; AVX512BW: Found an estimated cost of 11 for instruction: %shift
339 ; AVX512VL: Found an estimated cost of 22 for instruction: %shift
340 ; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift
341 ; XOP: Found an estimated cost of 4 for instruction: %shift
342 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
343 %shift = shl <64 x i8> %a, %splat
351 define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
352 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
353 ; SSE2: Found an estimated cost of 4 for instruction: %shift
354 ; SSE41: Found an estimated cost of 4 for instruction: %shift
355 ; AVX: Found an estimated cost of 4 for instruction: %shift
356 ; AVX2: Found an estimated cost of 1 for instruction: %shift
357 ; AVX512: Found an estimated cost of 1 for instruction: %shift
358 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
359 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
360 %shift = shl <2 x i64> %a, <i64 1, i64 7>
364 define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
365 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
366 ; SSE2: Found an estimated cost of 8 for instruction: %shift
367 ; SSE41: Found an estimated cost of 8 for instruction: %shift
368 ; AVX: Found an estimated cost of 8 for instruction: %shift
369 ; AVX2: Found an estimated cost of 1 for instruction: %shift
370 ; AVX512: Found an estimated cost of 1 for instruction: %shift
371 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
372 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
373 %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
377 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
378 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64':
379 ; SSE2: Found an estimated cost of 16 for instruction: %shift
380 ; SSE41: Found an estimated cost of 16 for instruction: %shift
381 ; AVX: Found an estimated cost of 16 for instruction: %shift
382 ; AVX2: Found an estimated cost of 2 for instruction: %shift
383 ; AVX512: Found an estimated cost of 1 for instruction: %shift
384 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
385 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
386 %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
390 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
391 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
392 ; SSE2: Found an estimated cost of 6 for instruction: %shift
393 ; SSE41: Found an estimated cost of 1 for instruction: %shift
394 ; AVX: Found an estimated cost of 1 for instruction: %shift
395 ; AVX2: Found an estimated cost of 1 for instruction: %shift
396 ; AVX512: Found an estimated cost of 1 for instruction: %shift
397 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
398 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
399 %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
403 define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
404 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
405 ; SSE2: Found an estimated cost of 12 for instruction: %shift
406 ; SSE41: Found an estimated cost of 2 for instruction: %shift
407 ; AVX: Found an estimated cost of 4 for instruction: %shift
408 ; AVX2: Found an estimated cost of 1 for instruction: %shift
409 ; AVX512: Found an estimated cost of 1 for instruction: %shift
410 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
411 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
412 %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
416 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
417 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32':
418 ; SSE2: Found an estimated cost of 24 for instruction: %shift
419 ; SSE41: Found an estimated cost of 4 for instruction: %shift
420 ; AVX: Found an estimated cost of 8 for instruction: %shift
421 ; AVX2: Found an estimated cost of 2 for instruction: %shift
422 ; AVX512: Found an estimated cost of 1 for instruction: %shift
423 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
424 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
425 %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
426 ret <16 x i32> %shift
429 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
430 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
431 ; SSE2: Found an estimated cost of 1 for instruction: %shift
432 ; SSE41: Found an estimated cost of 1 for instruction: %shift
433 ; AVX: Found an estimated cost of 1 for instruction: %shift
434 ; AVX2: Found an estimated cost of 1 for instruction: %shift
435 ; AVX512: Found an estimated cost of 1 for instruction: %shift
436 ; XOP: Found an estimated cost of 1 for instruction: %shift
437 %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
441 define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
442 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
443 ; SSE2: Found an estimated cost of 2 for instruction: %shift
444 ; SSE41: Found an estimated cost of 2 for instruction: %shift
445 ; AVX: Found an estimated cost of 4 for instruction: %shift
446 ; AVX2: Found an estimated cost of 1 for instruction: %shift
447 ; AVX512: Found an estimated cost of 1 for instruction: %shift
448 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
449 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
450 %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
451 ret <16 x i16> %shift
454 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
455 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16':
456 ; SSE2: Found an estimated cost of 4 for instruction: %shift
457 ; SSE41: Found an estimated cost of 4 for instruction: %shift
458 ; AVX: Found an estimated cost of 8 for instruction: %shift
459 ; AVX2: Found an estimated cost of 2 for instruction: %shift
460 ; AVX512F: Found an estimated cost of 2 for instruction: %shift
461 ; AVX512BW: Found an estimated cost of 1 for instruction: %shift
462 ; AVX512VL: Found an estimated cost of 2 for instruction: %shift
463 ; AVX512BWVL: Found an estimated cost of 1 for instruction: %shift
464 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
465 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
466 %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
467 ret <32 x i16> %shift
470 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
471 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
472 ; SSE2: Found an estimated cost of 26 for instruction: %shift
473 ; SSE41: Found an estimated cost of 11 for instruction: %shift
474 ; AVX: Found an estimated cost of 11 for instruction: %shift
475 ; AVX2: Found an estimated cost of 11 for instruction: %shift
476 ; AVX512: Found an estimated cost of 11 for instruction: %shift
477 ; XOP: Found an estimated cost of 1 for instruction: %shift
478 %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
482 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
483 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
484 ; SSE2: Found an estimated cost of 52 for instruction: %shift
485 ; SSE41: Found an estimated cost of 22 for instruction: %shift
486 ; AVX: Found an estimated cost of 22 for instruction: %shift
487 ; AVX2: Found an estimated cost of 11 for instruction: %shift
488 ; AVX512: Found an estimated cost of 11 for instruction: %shift
489 ; XOP: Found an estimated cost of 2 for instruction: %shift
490 %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
494 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
495 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8':
496 ; SSE2: Found an estimated cost of 104 for instruction: %shift
497 ; SSE41: Found an estimated cost of 44 for instruction: %shift
498 ; AVX: Found an estimated cost of 44 for instruction: %shift
499 ; AVX2: Found an estimated cost of 22 for instruction: %shift
500 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
501 ; AVX512BW: Found an estimated cost of 11 for instruction: %shift
502 ; AVX512VL: Found an estimated cost of 22 for instruction: %shift
503 ; AVX512BWVL: Found an estimated cost of 11 for instruction: %shift
504 ; XOP: Found an estimated cost of 4 for instruction: %shift
505 %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
510 ; Uniform Constant Shifts
513 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
514 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
515 ; SSE2: Found an estimated cost of 1 for instruction: %shift
516 ; SSE41: Found an estimated cost of 1 for instruction: %shift
517 ; AVX: Found an estimated cost of 1 for instruction: %shift
518 ; AVX2: Found an estimated cost of 1 for instruction: %shift
519 ; AVX512: Found an estimated cost of 1 for instruction: %shift
520 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
521 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
522 %shift = shl <2 x i64> %a, <i64 7, i64 7>
526 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
527 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
528 ; SSE2: Found an estimated cost of 2 for instruction: %shift
529 ; SSE41: Found an estimated cost of 2 for instruction: %shift
530 ; AVX: Found an estimated cost of 2 for instruction: %shift
531 ; AVX2: Found an estimated cost of 1 for instruction: %shift
532 ; AVX512: Found an estimated cost of 1 for instruction: %shift
533 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
534 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
535 %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
539 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
540 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64':
541 ; SSE2: Found an estimated cost of 4 for instruction: %shift
542 ; SSE41: Found an estimated cost of 4 for instruction: %shift
543 ; AVX: Found an estimated cost of 4 for instruction: %shift
544 ; AVX2: Found an estimated cost of 2 for instruction: %shift
545 ; AVX512: Found an estimated cost of 1 for instruction: %shift
546 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
547 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
548 %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
552 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
553 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
554 ; SSE2: Found an estimated cost of 1 for instruction: %shift
555 ; SSE41: Found an estimated cost of 1 for instruction: %shift
556 ; AVX: Found an estimated cost of 1 for instruction: %shift
557 ; AVX2: Found an estimated cost of 1 for instruction: %shift
558 ; AVX512: Found an estimated cost of 1 for instruction: %shift
559 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
560 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
561 %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
565 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
566 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
567 ; SSE2: Found an estimated cost of 2 for instruction: %shift
568 ; SSE41: Found an estimated cost of 2 for instruction: %shift
569 ; AVX: Found an estimated cost of 2 for instruction: %shift
570 ; AVX2: Found an estimated cost of 1 for instruction: %shift
571 ; AVX512: Found an estimated cost of 1 for instruction: %shift
572 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
573 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
574 %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
578 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
579 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32':
580 ; SSE2: Found an estimated cost of 4 for instruction: %shift
581 ; SSE41: Found an estimated cost of 4 for instruction: %shift
582 ; AVX: Found an estimated cost of 4 for instruction: %shift
583 ; AVX2: Found an estimated cost of 2 for instruction: %shift
584 ; AVX512: Found an estimated cost of 1 for instruction: %shift
585 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
586 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
587 %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
588 ret <16 x i32> %shift
591 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
592 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
593 ; SSE2: Found an estimated cost of 1 for instruction: %shift
594 ; SSE41: Found an estimated cost of 1 for instruction: %shift
595 ; AVX: Found an estimated cost of 1 for instruction: %shift
596 ; AVX2: Found an estimated cost of 1 for instruction: %shift
597 ; AVX512: Found an estimated cost of 1 for instruction: %shift
598 ; XOP: Found an estimated cost of 1 for instruction: %shift
599 %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
603 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
604 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
605 ; SSE2: Found an estimated cost of 2 for instruction: %shift
606 ; SSE41: Found an estimated cost of 2 for instruction: %shift
607 ; AVX: Found an estimated cost of 2 for instruction: %shift
608 ; AVX2: Found an estimated cost of 1 for instruction: %shift
609 ; AVX512: Found an estimated cost of 1 for instruction: %shift
610 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
611 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
612 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
613 ret <16 x i16> %shift
616 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
617 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16':
618 ; SSE2: Found an estimated cost of 4 for instruction: %shift
619 ; SSE41: Found an estimated cost of 4 for instruction: %shift
620 ; AVX: Found an estimated cost of 4 for instruction: %shift
621 ; AVX2: Found an estimated cost of 2 for instruction: %shift
622 ; AVX512F: Found an estimated cost of 2 for instruction: %shift
623 ; AVX512BW: Found an estimated cost of 1 for instruction: %shift
624 ; AVX512VL: Found an estimated cost of 2 for instruction: %shift
625 ; AVX512BWVL: Found an estimated cost of 1 for instruction: %shift
626 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
627 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
628 %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
629 ret <32 x i16> %shift
632 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
633 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
634 ; SSE2: Found an estimated cost of 2 for instruction: %shift
635 ; SSE41: Found an estimated cost of 2 for instruction: %shift
636 ; AVX: Found an estimated cost of 2 for instruction: %shift
637 ; AVX2: Found an estimated cost of 2 for instruction: %shift
638 ; AVX512: Found an estimated cost of 2 for instruction: %shift
639 ; XOP: Found an estimated cost of 2 for instruction: %shift
640 %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
644 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
645 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
646 ; SSE2: Found an estimated cost of 4 for instruction: %shift
647 ; SSE41: Found an estimated cost of 4 for instruction: %shift
648 ; AVX: Found an estimated cost of 4 for instruction: %shift
649 ; AVX2: Found an estimated cost of 2 for instruction: %shift
650 ; AVX512: Found an estimated cost of 2 for instruction: %shift
651 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
652 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
653 %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
657 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
658 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
659 ; SSE2: Found an estimated cost of 8 for instruction: %shift
660 ; SSE41: Found an estimated cost of 8 for instruction: %shift
661 ; AVX: Found an estimated cost of 8 for instruction: %shift
662 ; AVX2: Found an estimated cost of 4 for instruction: %shift
663 ; AVX512F: Found an estimated cost of 4 for instruction: %shift
664 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
665 ; AVX512VL: Found an estimated cost of 4 for instruction: %shift
666 ; AVX512BWVL: Found an estimated cost of 2 for instruction: %shift
667 ; XOPAVX: Found an estimated cost of 8 for instruction: %shift
668 ; XOPAVX2: Found an estimated cost of 4 for instruction: %shift
669 %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
677 ; We always emit a single pmullw in the case of v8i16 vector shifts by
678 ; non-uniform constant.
680 define <8 x i16> @test1(<8 x i16> %a) {
681 %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
684 ; CHECK: 'Cost Model Analysis' for function 'test1':
685 ; CHECK: Found an estimated cost of 1 for instruction: %shl
688 define <8 x i16> @test2(<8 x i16> %a) {
689 %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
692 ; CHECK: 'Cost Model Analysis' for function 'test2':
693 ; CHECK: Found an estimated cost of 1 for instruction: %shl
696 ; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction.
697 ; Make sure that the estimated cost is always 1 except for the case where
698 ; we only have SSE2 support. With SSE2, we are forced to special lower the
699 ; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle.
701 define <4 x i32> @test3(<4 x i32> %a) {
702 %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
705 ; CHECK: 'Cost Model Analysis' for function 'test3':
706 ; SSE2: Found an estimated cost of 6 for instruction: %shl
707 ; SSE41: Found an estimated cost of 1 for instruction: %shl
708 ; AVX: Found an estimated cost of 1 for instruction: %shl
709 ; AVX2: Found an estimated cost of 1 for instruction: %shl
710 ; XOP: Found an estimated cost of 1 for instruction: %shl
713 define <4 x i32> @test4(<4 x i32> %a) {
714 %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
717 ; CHECK: 'Cost Model Analysis' for function 'test4':
718 ; SSE2: Found an estimated cost of 6 for instruction: %shl
719 ; SSE41: Found an estimated cost of 1 for instruction: %shl
720 ; AVX: Found an estimated cost of 1 for instruction: %shl
721 ; AVX2: Found an estimated cost of 1 for instruction: %shl
722 ; XOP: Found an estimated cost of 1 for instruction: %shl
725 ; On AVX2 we are able to lower the following shift into a single
726 ; vpsllvq. Therefore, the expected cost is only 1.
727 ; In all other cases, this shift is scalarized as the target does not support
728 ; vpsllv instructions.
730 define <2 x i64> @test5(<2 x i64> %a) {
731 %shl = shl <2 x i64> %a, <i64 2, i64 3>
734 ; CHECK: 'Cost Model Analysis' for function 'test5':
735 ; SSE2: Found an estimated cost of 4 for instruction: %shl
736 ; SSE41: Found an estimated cost of 4 for instruction: %shl
737 ; AVX: Found an estimated cost of 4 for instruction: %shl
738 ; AVX2: Found an estimated cost of 1 for instruction: %shl
739 ; XOP: Found an estimated cost of 1 for instruction: %shl
742 ; v16i16 and v8i32 shift left by non-uniform constant are lowered into
743 ; vector multiply instructions. With AVX (but not AVX2), the vector multiply
744 ; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert.
746 ; With AVX2, instruction vpmullw works with 256bit quantities and
747 ; therefore there is no need to split the resulting vector multiply into
748 ; a sequence of two multiply.
750 ; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice
751 ; the cost computed in the case of 'test1'. That is because the backend
752 ; simply emits 2 pmullw with no extract/insert.
755 define <16 x i16> @test6(<16 x i16> %a) {
756 %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
759 ; CHECK: 'Cost Model Analysis' for function 'test6':
760 ; SSE2: Found an estimated cost of 2 for instruction: %shl
761 ; SSE41: Found an estimated cost of 2 for instruction: %shl
762 ; AVX: Found an estimated cost of 4 for instruction: %shl
763 ; AVX2: Found an estimated cost of 1 for instruction: %shl
764 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl
765 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
768 ; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice
769 ; the cost computed in the case of 'test3'. That is because the multiply
770 ; is type-legalized into two 4i32 vector multiply.
772 define <8 x i32> @test7(<8 x i32> %a) {
773 %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
776 ; CHECK: 'Cost Model Analysis' for function 'test7':
777 ; SSE2: Found an estimated cost of 12 for instruction: %shl
778 ; SSE41: Found an estimated cost of 2 for instruction: %shl
779 ; AVX: Found an estimated cost of 4 for instruction: %shl
780 ; AVX2: Found an estimated cost of 1 for instruction: %shl
781 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl
782 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
785 ; On AVX2 we are able to lower the following shift into a single
786 ; vpsllvq. Therefore, the expected cost is only 1.
787 ; In all other cases, this shift is scalarized as the target does not support
788 ; vpsllv instructions.
790 define <4 x i64> @test8(<4 x i64> %a) {
791 %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
794 ; CHECK: 'Cost Model Analysis' for function 'test8':
795 ; SSE2: Found an estimated cost of 8 for instruction: %shl
796 ; SSE41: Found an estimated cost of 8 for instruction: %shl
797 ; AVX: Found an estimated cost of 8 for instruction: %shl
798 ; AVX2: Found an estimated cost of 1 for instruction: %shl
799 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl
800 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
803 ; Same as 'test6', with the difference that the cost is double.
805 define <32 x i16> @test9(<32 x i16> %a) {
806 %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
809 ; CHECK: 'Cost Model Analysis' for function 'test9':
810 ; SSE2: Found an estimated cost of 4 for instruction: %shl
811 ; SSE41: Found an estimated cost of 4 for instruction: %shl
812 ; AVX: Found an estimated cost of 8 for instruction: %shl
813 ; AVX2: Found an estimated cost of 2 for instruction: %shl
814 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl
815 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl
818 ; Same as 'test7', except that now the cost is double.
820 define <16 x i32> @test10(<16 x i32> %a) {
821 %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
824 ; CHECK: 'Cost Model Analysis' for function 'test10':
825 ; SSE2: Found an estimated cost of 24 for instruction: %shl
826 ; SSE41: Found an estimated cost of 4 for instruction: %shl
827 ; AVX: Found an estimated cost of 8 for instruction: %shl
828 ; AVX2: Found an estimated cost of 2 for instruction: %shl
829 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl
830 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl
833 ; On AVX2 we are able to lower the following shift into a sequence of
834 ; two vpsllvq instructions. Therefore, the expected cost is only 2.
835 ; In all other cases, this shift is scalarized as we don't have vpsllv
838 define <8 x i64> @test11(<8 x i64> %a) {
839 %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
842 ; CHECK: 'Cost Model Analysis' for function 'test11':
843 ; SSE2: Found an estimated cost of 16 for instruction: %shl
844 ; SSE41: Found an estimated cost of 16 for instruction: %shl
845 ; AVX: Found an estimated cost of 16 for instruction: %shl
846 ; AVX2: Found an estimated cost of 2 for instruction: %shl
847 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl
848 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl