1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=XOP --check-prefix=XOPAVX2
7 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
8 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
9 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512bw -cost-model -analyze | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
11 ; Verify the cost of vector shift left instructions.
18 define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
19 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
20 ; SSE2: Found an estimated cost of 4 for instruction: %shift
21 ; SSE41: Found an estimated cost of 4 for instruction: %shift
22 ; AVX: Found an estimated cost of 4 for instruction: %shift
23 ; AVX2: Found an estimated cost of 1 for instruction: %shift
24 ; AVX512: Found an estimated cost of 1 for instruction: %shift
25 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
26 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
27 %shift = shl <2 x i64> %a, %b
31 define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
32 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
33 ; SSE2: Found an estimated cost of 8 for instruction: %shift
34 ; SSE41: Found an estimated cost of 8 for instruction: %shift
35 ; AVX: Found an estimated cost of 8 for instruction: %shift
36 ; AVX2: Found an estimated cost of 1 for instruction: %shift
37 ; AVX512: Found an estimated cost of 1 for instruction: %shift
38 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
39 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
40 %shift = shl <4 x i64> %a, %b
44 define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
45 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64':
46 ; SSE2: Found an estimated cost of 16 for instruction: %shift
47 ; SSE41: Found an estimated cost of 16 for instruction: %shift
48 ; AVX: Found an estimated cost of 16 for instruction: %shift
49 ; AVX2: Found an estimated cost of 2 for instruction: %shift
50 ; AVX512: Found an estimated cost of 1 for instruction: %shift
51 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
52 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
53 %shift = shl <8 x i64> %a, %b
57 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
58 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
59 ; SSE2: Found an estimated cost of 10 for instruction: %shift
60 ; SSE41: Found an estimated cost of 10 for instruction: %shift
61 ; AVX: Found an estimated cost of 10 for instruction: %shift
62 ; AVX2: Found an estimated cost of 1 for instruction: %shift
63 ; AVX512: Found an estimated cost of 1 for instruction: %shift
64 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
65 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
66 %shift = shl <4 x i32> %a, %b
70 define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
71 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
72 ; SSE2: Found an estimated cost of 20 for instruction: %shift
73 ; SSE41: Found an estimated cost of 20 for instruction: %shift
74 ; AVX: Found an estimated cost of 20 for instruction: %shift
75 ; AVX2: Found an estimated cost of 1 for instruction: %shift
76 ; AVX512: Found an estimated cost of 1 for instruction: %shift
77 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
78 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
79 %shift = shl <8 x i32> %a, %b
83 define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
84 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32':
85 ; SSE2: Found an estimated cost of 40 for instruction: %shift
86 ; SSE41: Found an estimated cost of 40 for instruction: %shift
87 ; AVX: Found an estimated cost of 40 for instruction: %shift
88 ; AVX2: Found an estimated cost of 2 for instruction: %shift
89 ; AVX512: Found an estimated cost of 1 for instruction: %shift
90 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
91 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
92 %shift = shl <16 x i32> %a, %b
96 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
97 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
98 ; SSE2: Found an estimated cost of 32 for instruction: %shift
99 ; SSE41: Found an estimated cost of 14 for instruction: %shift
100 ; AVX: Found an estimated cost of 14 for instruction: %shift
101 ; AVX2: Found an estimated cost of 14 for instruction: %shift
102 ; AVX512: Found an estimated cost of 14 for instruction: %shift
103 ; XOP: Found an estimated cost of 1 for instruction: %shift
104 %shift = shl <8 x i16> %a, %b
108 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
109 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
110 ; SSE2: Found an estimated cost of 64 for instruction: %shift
111 ; SSE41: Found an estimated cost of 28 for instruction: %shift
112 ; AVX: Found an estimated cost of 28 for instruction: %shift
113 ; AVX2: Found an estimated cost of 10 for instruction: %shift
114 ; AVX512: Found an estimated cost of 10 for instruction: %shift
115 ; XOP: Found an estimated cost of 2 for instruction: %shift
116 %shift = shl <16 x i16> %a, %b
117 ret <16 x i16> %shift
120 define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
121 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16':
122 ; SSE2: Found an estimated cost of 128 for instruction: %shift
123 ; SSE41: Found an estimated cost of 56 for instruction: %shift
124 ; AVX: Found an estimated cost of 56 for instruction: %shift
125 ; AVX2: Found an estimated cost of 20 for instruction: %shift
126 ; AVX512F: Found an estimated cost of 20 for instruction: %shift
127 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
128 ; XOP: Found an estimated cost of 4 for instruction: %shift
129 %shift = shl <32 x i16> %a, %b
130 ret <32 x i16> %shift
133 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
134 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
135 ; SSE2: Found an estimated cost of 26 for instruction: %shift
136 ; SSE41: Found an estimated cost of 11 for instruction: %shift
137 ; AVX: Found an estimated cost of 11 for instruction: %shift
138 ; AVX2: Found an estimated cost of 11 for instruction: %shift
139 ; AVX512: Found an estimated cost of 11 for instruction: %shift
140 ; XOP: Found an estimated cost of 1 for instruction: %shift
141 %shift = shl <16 x i8> %a, %b
145 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
146 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
147 ; SSE2: Found an estimated cost of 52 for instruction: %shift
148 ; SSE41: Found an estimated cost of 22 for instruction: %shift
149 ; AVX: Found an estimated cost of 22 for instruction: %shift
150 ; AVX2: Found an estimated cost of 11 for instruction: %shift
151 ; AVX512: Found an estimated cost of 11 for instruction: %shift
152 ; XOP: Found an estimated cost of 2 for instruction: %shift
153 %shift = shl <32 x i8> %a, %b
157 define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
158 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8':
159 ; SSE2: Found an estimated cost of 104 for instruction: %shift
160 ; SSE41: Found an estimated cost of 44 for instruction: %shift
161 ; AVX: Found an estimated cost of 44 for instruction: %shift
162 ; AVX2: Found an estimated cost of 22 for instruction: %shift
163 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
164 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
165 ; XOP: Found an estimated cost of 4 for instruction: %shift
166 %shift = shl <64 x i8> %a, %b
171 ; Uniform Variable Shifts
174 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
175 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
176 ; SSE2: Found an estimated cost of 4 for instruction: %shift
177 ; SSE41: Found an estimated cost of 4 for instruction: %shift
178 ; AVX: Found an estimated cost of 4 for instruction: %shift
179 ; AVX2: Found an estimated cost of 1 for instruction: %shift
180 ; AVX512: Found an estimated cost of 1 for instruction: %shift
181 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
182 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
183 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
184 %shift = shl <2 x i64> %a, %splat
188 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
189 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
190 ; SSE2: Found an estimated cost of 8 for instruction: %shift
191 ; SSE41: Found an estimated cost of 8 for instruction: %shift
192 ; AVX: Found an estimated cost of 8 for instruction: %shift
193 ; AVX2: Found an estimated cost of 1 for instruction: %shift
194 ; AVX512: Found an estimated cost of 1 for instruction: %shift
195 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
196 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
197 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
198 %shift = shl <4 x i64> %a, %splat
202 define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) {
203 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64':
204 ; SSE2: Found an estimated cost of 16 for instruction: %shift
205 ; SSE41: Found an estimated cost of 16 for instruction: %shift
206 ; AVX: Found an estimated cost of 16 for instruction: %shift
207 ; AVX2: Found an estimated cost of 2 for instruction: %shift
208 ; AVX512: Found an estimated cost of 1 for instruction: %shift
209 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
210 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
211 %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
212 %shift = shl <8 x i64> %a, %splat
216 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
217 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
218 ; SSE2: Found an estimated cost of 10 for instruction: %shift
219 ; SSE41: Found an estimated cost of 10 for instruction: %shift
220 ; AVX: Found an estimated cost of 10 for instruction: %shift
221 ; AVX2: Found an estimated cost of 1 for instruction: %shift
222 ; AVX512: Found an estimated cost of 1 for instruction: %shift
223 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
224 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
225 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
226 %shift = shl <4 x i32> %a, %splat
230 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
231 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
232 ; SSE2: Found an estimated cost of 20 for instruction: %shift
233 ; SSE41: Found an estimated cost of 20 for instruction: %shift
234 ; AVX: Found an estimated cost of 20 for instruction: %shift
235 ; AVX2: Found an estimated cost of 1 for instruction: %shift
236 ; AVX512: Found an estimated cost of 1 for instruction: %shift
237 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
238 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
239 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
240 %shift = shl <8 x i32> %a, %splat
244 define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) {
245 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32':
246 ; SSE2: Found an estimated cost of 40 for instruction: %shift
247 ; SSE41: Found an estimated cost of 40 for instruction: %shift
248 ; AVX: Found an estimated cost of 40 for instruction: %shift
249 ; AVX2: Found an estimated cost of 2 for instruction: %shift
250 ; AVX512: Found an estimated cost of 1 for instruction: %shift
251 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
252 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
253 %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
254 %shift = shl <16 x i32> %a, %splat
255 ret <16 x i32> %shift
258 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
259 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
260 ; SSE2: Found an estimated cost of 32 for instruction: %shift
261 ; SSE41: Found an estimated cost of 14 for instruction: %shift
262 ; AVX: Found an estimated cost of 14 for instruction: %shift
263 ; AVX2: Found an estimated cost of 14 for instruction: %shift
264 ; AVX512: Found an estimated cost of 14 for instruction: %shift
265 ; XOP: Found an estimated cost of 1 for instruction: %shift
266 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
267 %shift = shl <8 x i16> %a, %splat
271 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
272 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
273 ; SSE2: Found an estimated cost of 64 for instruction: %shift
274 ; SSE41: Found an estimated cost of 28 for instruction: %shift
275 ; AVX: Found an estimated cost of 28 for instruction: %shift
276 ; AVX2: Found an estimated cost of 10 for instruction: %shift
277 ; AVX512: Found an estimated cost of 10 for instruction: %shift
278 ; XOP: Found an estimated cost of 2 for instruction: %shift
279 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
280 %shift = shl <16 x i16> %a, %splat
281 ret <16 x i16> %shift
284 define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) {
285 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16':
286 ; SSE2: Found an estimated cost of 128 for instruction: %shift
287 ; SSE41: Found an estimated cost of 56 for instruction: %shift
288 ; AVX: Found an estimated cost of 56 for instruction: %shift
289 ; AVX2: Found an estimated cost of 20 for instruction: %shift
290 ; AVX512F: Found an estimated cost of 20 for instruction: %shift
291 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
292 ; XOP: Found an estimated cost of 4 for instruction: %shift
293 %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
294 %shift = shl <32 x i16> %a, %splat
295 ret <32 x i16> %shift
298 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
299 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
300 ; SSE2: Found an estimated cost of 26 for instruction: %shift
301 ; SSE41: Found an estimated cost of 11 for instruction: %shift
302 ; AVX: Found an estimated cost of 11 for instruction: %shift
303 ; AVX2: Found an estimated cost of 11 for instruction: %shift
304 ; AVX512: Found an estimated cost of 11 for instruction: %shift
305 ; XOP: Found an estimated cost of 1 for instruction: %shift
306 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
307 %shift = shl <16 x i8> %a, %splat
311 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
312 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
313 ; SSE2: Found an estimated cost of 52 for instruction: %shift
314 ; SSE41: Found an estimated cost of 22 for instruction: %shift
315 ; AVX: Found an estimated cost of 22 for instruction: %shift
316 ; AVX2: Found an estimated cost of 11 for instruction: %shift
317 ; AVX512: Found an estimated cost of 11 for instruction: %shift
318 ; XOP: Found an estimated cost of 2 for instruction: %shift
319 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
320 %shift = shl <32 x i8> %a, %splat
324 define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
325 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8':
326 ; SSE2: Found an estimated cost of 104 for instruction: %shift
327 ; SSE41: Found an estimated cost of 44 for instruction: %shift
328 ; AVX: Found an estimated cost of 44 for instruction: %shift
329 ; AVX2: Found an estimated cost of 22 for instruction: %shift
330 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
331 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
332 ; XOP: Found an estimated cost of 4 for instruction: %shift
333 %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
334 %shift = shl <64 x i8> %a, %splat
342 define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
343 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
344 ; SSE2: Found an estimated cost of 4 for instruction: %shift
345 ; SSE41: Found an estimated cost of 4 for instruction: %shift
346 ; AVX: Found an estimated cost of 4 for instruction: %shift
347 ; AVX2: Found an estimated cost of 1 for instruction: %shift
348 ; AVX512: Found an estimated cost of 1 for instruction: %shift
349 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
350 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
351 %shift = shl <2 x i64> %a, <i64 1, i64 7>
355 define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
356 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
357 ; SSE2: Found an estimated cost of 8 for instruction: %shift
358 ; SSE41: Found an estimated cost of 8 for instruction: %shift
359 ; AVX: Found an estimated cost of 8 for instruction: %shift
360 ; AVX2: Found an estimated cost of 1 for instruction: %shift
361 ; AVX512: Found an estimated cost of 1 for instruction: %shift
362 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
363 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
364 %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
368 define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) {
369 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64':
370 ; SSE2: Found an estimated cost of 16 for instruction: %shift
371 ; SSE41: Found an estimated cost of 16 for instruction: %shift
372 ; AVX: Found an estimated cost of 16 for instruction: %shift
373 ; AVX2: Found an estimated cost of 2 for instruction: %shift
374 ; AVX512: Found an estimated cost of 1 for instruction: %shift
375 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
376 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
377 %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 15, i64 31, i64 1, i64 7, i64 15, i64 31>
381 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
382 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
383 ; SSE2: Found an estimated cost of 6 for instruction: %shift
384 ; SSE41: Found an estimated cost of 1 for instruction: %shift
385 ; AVX: Found an estimated cost of 1 for instruction: %shift
386 ; AVX2: Found an estimated cost of 1 for instruction: %shift
387 ; AVX512: Found an estimated cost of 1 for instruction: %shift
388 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
389 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
390 %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
394 define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
395 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
396 ; SSE2: Found an estimated cost of 12 for instruction: %shift
397 ; SSE41: Found an estimated cost of 2 for instruction: %shift
398 ; AVX: Found an estimated cost of 4 for instruction: %shift
399 ; AVX2: Found an estimated cost of 1 for instruction: %shift
400 ; AVX512: Found an estimated cost of 1 for instruction: %shift
401 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
402 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
403 %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
407 define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) {
408 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32':
409 ; SSE2: Found an estimated cost of 24 for instruction: %shift
410 ; SSE41: Found an estimated cost of 4 for instruction: %shift
411 ; AVX: Found an estimated cost of 8 for instruction: %shift
412 ; AVX2: Found an estimated cost of 2 for instruction: %shift
413 ; AVX512: Found an estimated cost of 1 for instruction: %shift
414 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
415 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
416 %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
417 ret <16 x i32> %shift
420 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
421 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
422 ; SSE2: Found an estimated cost of 1 for instruction: %shift
423 ; SSE41: Found an estimated cost of 1 for instruction: %shift
424 ; AVX: Found an estimated cost of 1 for instruction: %shift
425 ; AVX2: Found an estimated cost of 1 for instruction: %shift
426 ; AVX512: Found an estimated cost of 1 for instruction: %shift
427 ; XOP: Found an estimated cost of 1 for instruction: %shift
428 %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
432 define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
433 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
434 ; SSE2: Found an estimated cost of 2 for instruction: %shift
435 ; SSE41: Found an estimated cost of 2 for instruction: %shift
436 ; AVX: Found an estimated cost of 4 for instruction: %shift
437 ; AVX2: Found an estimated cost of 1 for instruction: %shift
438 ; AVX512: Found an estimated cost of 1 for instruction: %shift
439 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
440 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
441 %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
442 ret <16 x i16> %shift
445 define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) {
446 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16':
447 ; SSE2: Found an estimated cost of 4 for instruction: %shift
448 ; SSE41: Found an estimated cost of 4 for instruction: %shift
449 ; AVX: Found an estimated cost of 8 for instruction: %shift
450 ; AVX2: Found an estimated cost of 2 for instruction: %shift
451 ; AVX512F: Found an estimated cost of 2 for instruction: %shift
452 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
453 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
454 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
455 %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
456 ret <32 x i16> %shift
459 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
460 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
461 ; SSE2: Found an estimated cost of 26 for instruction: %shift
462 ; SSE41: Found an estimated cost of 11 for instruction: %shift
463 ; AVX: Found an estimated cost of 11 for instruction: %shift
464 ; AVX2: Found an estimated cost of 11 for instruction: %shift
465 ; AVX512: Found an estimated cost of 11 for instruction: %shift
466 ; XOP: Found an estimated cost of 1 for instruction: %shift
467 %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
471 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
472 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
473 ; SSE2: Found an estimated cost of 52 for instruction: %shift
474 ; SSE41: Found an estimated cost of 22 for instruction: %shift
475 ; AVX: Found an estimated cost of 22 for instruction: %shift
476 ; AVX2: Found an estimated cost of 11 for instruction: %shift
477 ; AVX512: Found an estimated cost of 11 for instruction: %shift
478 ; XOP: Found an estimated cost of 2 for instruction: %shift
479 %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
483 define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
484 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8':
485 ; SSE2: Found an estimated cost of 104 for instruction: %shift
486 ; SSE41: Found an estimated cost of 44 for instruction: %shift
487 ; AVX: Found an estimated cost of 44 for instruction: %shift
488 ; AVX2: Found an estimated cost of 22 for instruction: %shift
489 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
490 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
491 ; XOP: Found an estimated cost of 4 for instruction: %shift
492 %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
497 ; Uniform Constant Shifts
500 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
501 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
502 ; SSE2: Found an estimated cost of 1 for instruction: %shift
503 ; SSE41: Found an estimated cost of 1 for instruction: %shift
504 ; AVX: Found an estimated cost of 1 for instruction: %shift
505 ; AVX2: Found an estimated cost of 1 for instruction: %shift
506 ; AVX512: Found an estimated cost of 1 for instruction: %shift
507 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
508 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
509 %shift = shl <2 x i64> %a, <i64 7, i64 7>
513 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
514 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
515 ; SSE2: Found an estimated cost of 2 for instruction: %shift
516 ; SSE41: Found an estimated cost of 2 for instruction: %shift
517 ; AVX: Found an estimated cost of 2 for instruction: %shift
518 ; AVX2: Found an estimated cost of 1 for instruction: %shift
519 ; AVX512: Found an estimated cost of 1 for instruction: %shift
520 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
521 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
522 %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
526 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
527 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64':
528 ; SSE2: Found an estimated cost of 4 for instruction: %shift
529 ; SSE41: Found an estimated cost of 4 for instruction: %shift
530 ; AVX: Found an estimated cost of 4 for instruction: %shift
531 ; AVX2: Found an estimated cost of 2 for instruction: %shift
532 ; AVX512: Found an estimated cost of 1 for instruction: %shift
533 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
534 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
535 %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
539 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
540 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
541 ; SSE2: Found an estimated cost of 1 for instruction: %shift
542 ; SSE41: Found an estimated cost of 1 for instruction: %shift
543 ; AVX: Found an estimated cost of 1 for instruction: %shift
544 ; AVX2: Found an estimated cost of 1 for instruction: %shift
545 ; AVX512: Found an estimated cost of 1 for instruction: %shift
546 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
547 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
548 %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
552 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
553 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
554 ; SSE2: Found an estimated cost of 2 for instruction: %shift
555 ; SSE41: Found an estimated cost of 2 for instruction: %shift
556 ; AVX: Found an estimated cost of 2 for instruction: %shift
557 ; AVX2: Found an estimated cost of 1 for instruction: %shift
558 ; AVX512: Found an estimated cost of 1 for instruction: %shift
559 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
560 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
561 %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
565 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
566 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32':
567 ; SSE2: Found an estimated cost of 4 for instruction: %shift
568 ; SSE41: Found an estimated cost of 4 for instruction: %shift
569 ; AVX: Found an estimated cost of 4 for instruction: %shift
570 ; AVX2: Found an estimated cost of 2 for instruction: %shift
571 ; AVX512: Found an estimated cost of 1 for instruction: %shift
572 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
573 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
574 %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
575 ret <16 x i32> %shift
578 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
579 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
580 ; SSE2: Found an estimated cost of 1 for instruction: %shift
581 ; SSE41: Found an estimated cost of 1 for instruction: %shift
582 ; AVX: Found an estimated cost of 1 for instruction: %shift
583 ; AVX2: Found an estimated cost of 1 for instruction: %shift
584 ; AVX512: Found an estimated cost of 1 for instruction: %shift
585 ; XOP: Found an estimated cost of 1 for instruction: %shift
586 %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
590 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
591 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
592 ; SSE2: Found an estimated cost of 2 for instruction: %shift
593 ; SSE41: Found an estimated cost of 2 for instruction: %shift
594 ; AVX: Found an estimated cost of 2 for instruction: %shift
595 ; AVX2: Found an estimated cost of 1 for instruction: %shift
596 ; AVX512: Found an estimated cost of 1 for instruction: %shift
597 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
598 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
599 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
600 ret <16 x i16> %shift
603 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
604 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16':
605 ; SSE2: Found an estimated cost of 4 for instruction: %shift
606 ; SSE41: Found an estimated cost of 4 for instruction: %shift
607 ; AVX: Found an estimated cost of 4 for instruction: %shift
608 ; AVX2: Found an estimated cost of 2 for instruction: %shift
609 ; AVX512F: Found an estimated cost of 2 for instruction: %shift
610 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
611 ; XOPAVX: Found an estimated cost of 4 for instruction: %shift
612 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
613 %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
614 ret <32 x i16> %shift
617 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
618 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
619 ; SSE2: Found an estimated cost of 1 for instruction: %shift
620 ; SSE41: Found an estimated cost of 1 for instruction: %shift
621 ; AVX: Found an estimated cost of 1 for instruction: %shift
622 ; AVX2: Found an estimated cost of 1 for instruction: %shift
623 ; AVX512: Found an estimated cost of 1 for instruction: %shift
624 ; XOP: Found an estimated cost of 1 for instruction: %shift
625 %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
629 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
630 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
631 ; SSE2: Found an estimated cost of 2 for instruction: %shift
632 ; SSE41: Found an estimated cost of 2 for instruction: %shift
633 ; AVX: Found an estimated cost of 2 for instruction: %shift
634 ; AVX2: Found an estimated cost of 11 for instruction: %shift
635 ; AVX512: Found an estimated cost of 11 for instruction: %shift
636 ; XOP: Found an estimated cost of 2 for instruction: %shift
637 %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
641 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
642 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8':
643 ; SSE2: Found an estimated cost of 4 for instruction: %shift
644 ; SSE41: Found an estimated cost of 4 for instruction: %shift
645 ; AVX: Found an estimated cost of 4 for instruction: %shift
646 ; AVX2: Found an estimated cost of 22 for instruction: %shift
647 ; AVX512F: Found an estimated cost of 22 for instruction: %shift
648 ; AVX512BW: Found an estimated cost of 2 for instruction: %shift
649 ; XOP: Found an estimated cost of 4 for instruction: %shift
650 %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
658 ; We always emit a single pmullw in the case of v8i16 vector shifts by
659 ; non-uniform constant.
661 define <8 x i16> @test1(<8 x i16> %a) {
662 %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
665 ; CHECK: 'Cost Model Analysis' for function 'test1':
666 ; CHECK: Found an estimated cost of 1 for instruction: %shl
669 define <8 x i16> @test2(<8 x i16> %a) {
670 %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
673 ; CHECK: 'Cost Model Analysis' for function 'test2':
674 ; CHECK: Found an estimated cost of 1 for instruction: %shl
677 ; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction.
678 ; Make sure that the estimated cost is always 1 except for the case where
679 ; we only have SSE2 support. With SSE2, we are forced to special lower the
680 ; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle.
682 define <4 x i32> @test3(<4 x i32> %a) {
683 %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
686 ; CHECK: 'Cost Model Analysis' for function 'test3':
687 ; SSE2: Found an estimated cost of 6 for instruction: %shl
688 ; SSE41: Found an estimated cost of 1 for instruction: %shl
689 ; AVX: Found an estimated cost of 1 for instruction: %shl
690 ; AVX2: Found an estimated cost of 1 for instruction: %shl
691 ; XOP: Found an estimated cost of 1 for instruction: %shl
694 define <4 x i32> @test4(<4 x i32> %a) {
695 %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
698 ; CHECK: 'Cost Model Analysis' for function 'test4':
699 ; SSE2: Found an estimated cost of 6 for instruction: %shl
700 ; SSE41: Found an estimated cost of 1 for instruction: %shl
701 ; AVX: Found an estimated cost of 1 for instruction: %shl
702 ; AVX2: Found an estimated cost of 1 for instruction: %shl
703 ; XOP: Found an estimated cost of 1 for instruction: %shl
706 ; On AVX2 we are able to lower the following shift into a single
707 ; vpsllvq. Therefore, the expected cost is only 1.
708 ; In all other cases, this shift is scalarized as the target does not support
709 ; vpsllv instructions.
711 define <2 x i64> @test5(<2 x i64> %a) {
712 %shl = shl <2 x i64> %a, <i64 2, i64 3>
715 ; CHECK: 'Cost Model Analysis' for function 'test5':
716 ; SSE2: Found an estimated cost of 4 for instruction: %shl
717 ; SSE41: Found an estimated cost of 4 for instruction: %shl
718 ; AVX: Found an estimated cost of 4 for instruction: %shl
719 ; AVX2: Found an estimated cost of 1 for instruction: %shl
720 ; XOP: Found an estimated cost of 1 for instruction: %shl
723 ; v16i16 and v8i32 shift left by non-uniform constant are lowered into
724 ; vector multiply instructions. With AVX (but not AVX2), the vector multiply
725 ; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert.
727 ; With AVX2, instruction vpmullw works with 256bit quantities and
728 ; therefore there is no need to split the resulting vector multiply into
729 ; a sequence of two multiply.
731 ; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice
732 ; the cost computed in the case of 'test1'. That is because the backend
733 ; simply emits 2 pmullw with no extract/insert.
736 define <16 x i16> @test6(<16 x i16> %a) {
737 %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
740 ; CHECK: 'Cost Model Analysis' for function 'test6':
741 ; SSE2: Found an estimated cost of 2 for instruction: %shl
742 ; SSE41: Found an estimated cost of 2 for instruction: %shl
743 ; AVX: Found an estimated cost of 4 for instruction: %shl
744 ; AVX2: Found an estimated cost of 1 for instruction: %shl
745 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl
746 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
749 ; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice
750 ; the cost computed in the case of 'test3'. That is because the multiply
751 ; is type-legalized into two 4i32 vector multiply.
753 define <8 x i32> @test7(<8 x i32> %a) {
754 %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
757 ; CHECK: 'Cost Model Analysis' for function 'test7':
758 ; SSE2: Found an estimated cost of 12 for instruction: %shl
759 ; SSE41: Found an estimated cost of 2 for instruction: %shl
760 ; AVX: Found an estimated cost of 4 for instruction: %shl
761 ; AVX2: Found an estimated cost of 1 for instruction: %shl
762 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl
763 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
766 ; On AVX2 we are able to lower the following shift into a single
767 ; vpsllvq. Therefore, the expected cost is only 1.
768 ; In all other cases, this shift is scalarized as the target does not support
769 ; vpsllv instructions.
771 define <4 x i64> @test8(<4 x i64> %a) {
772 %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
775 ; CHECK: 'Cost Model Analysis' for function 'test8':
776 ; SSE2: Found an estimated cost of 8 for instruction: %shl
777 ; SSE41: Found an estimated cost of 8 for instruction: %shl
778 ; AVX: Found an estimated cost of 8 for instruction: %shl
779 ; AVX2: Found an estimated cost of 1 for instruction: %shl
780 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl
781 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
784 ; Same as 'test6', with the difference that the cost is double.
786 define <32 x i16> @test9(<32 x i16> %a) {
787 %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
790 ; CHECK: 'Cost Model Analysis' for function 'test9':
791 ; SSE2: Found an estimated cost of 4 for instruction: %shl
792 ; SSE41: Found an estimated cost of 4 for instruction: %shl
793 ; AVX: Found an estimated cost of 8 for instruction: %shl
794 ; AVX2: Found an estimated cost of 2 for instruction: %shl
795 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl
796 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl
799 ; Same as 'test7', except that now the cost is double.
801 define <16 x i32> @test10(<16 x i32> %a) {
802 %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
805 ; CHECK: 'Cost Model Analysis' for function 'test10':
806 ; SSE2: Found an estimated cost of 24 for instruction: %shl
807 ; SSE41: Found an estimated cost of 4 for instruction: %shl
808 ; AVX: Found an estimated cost of 8 for instruction: %shl
809 ; AVX2: Found an estimated cost of 2 for instruction: %shl
810 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl
811 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl
814 ; On AVX2 we are able to lower the following shift into a sequence of
815 ; two vpsllvq instructions. Therefore, the expected cost is only 2.
816 ; In all other cases, this shift is scalarized as we don't have vpsllv
819 define <8 x i64> @test11(<8 x i64> %a) {
820 %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
823 ; CHECK: 'Cost Model Analysis' for function 'test11':
824 ; SSE2: Found an estimated cost of 16 for instruction: %shl
825 ; SSE41: Found an estimated cost of 16 for instruction: %shl
826 ; AVX: Found an estimated cost of 16 for instruction: %shl
827 ; AVX2: Found an estimated cost of 2 for instruction: %shl
828 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl
829 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl