1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
6 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
8 ; Verify the cost of vector shift left instructions.
15 define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
16 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
17 ; SSE2: Found an estimated cost of 4 for instruction: %shift
18 ; SSE41: Found an estimated cost of 4 for instruction: %shift
19 ; AVX: Found an estimated cost of 4 for instruction: %shift
20 ; AVX2: Found an estimated cost of 1 for instruction: %shift
21 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
22 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
23 %shift = shl <2 x i64> %a, %b
27 define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
28 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
29 ; SSE2: Found an estimated cost of 8 for instruction: %shift
30 ; SSE41: Found an estimated cost of 8 for instruction: %shift
31 ; AVX: Found an estimated cost of 8 for instruction: %shift
32 ; AVX2: Found an estimated cost of 1 for instruction: %shift
33 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
34 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
35 %shift = shl <4 x i64> %a, %b
39 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
40 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
41 ; SSE2: Found an estimated cost of 10 for instruction: %shift
42 ; SSE41: Found an estimated cost of 10 for instruction: %shift
43 ; AVX: Found an estimated cost of 10 for instruction: %shift
44 ; AVX2: Found an estimated cost of 1 for instruction: %shift
45 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
46 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
47 %shift = shl <4 x i32> %a, %b
51 define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
52 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
53 ; SSE2: Found an estimated cost of 20 for instruction: %shift
54 ; SSE41: Found an estimated cost of 20 for instruction: %shift
55 ; AVX: Found an estimated cost of 20 for instruction: %shift
56 ; AVX2: Found an estimated cost of 1 for instruction: %shift
57 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
58 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
59 %shift = shl <8 x i32> %a, %b
63 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
64 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
65 ; SSE2: Found an estimated cost of 32 for instruction: %shift
66 ; SSE41: Found an estimated cost of 14 for instruction: %shift
67 ; AVX: Found an estimated cost of 14 for instruction: %shift
68 ; AVX2: Found an estimated cost of 14 for instruction: %shift
69 ; XOP: Found an estimated cost of 1 for instruction: %shift
70 %shift = shl <8 x i16> %a, %b
74 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
75 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
76 ; SSE2: Found an estimated cost of 64 for instruction: %shift
77 ; SSE41: Found an estimated cost of 28 for instruction: %shift
78 ; AVX: Found an estimated cost of 28 for instruction: %shift
79 ; AVX2: Found an estimated cost of 10 for instruction: %shift
80 ; XOP: Found an estimated cost of 2 for instruction: %shift
81 %shift = shl <16 x i16> %a, %b
85 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
86 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
87 ; SSE2: Found an estimated cost of 26 for instruction: %shift
88 ; SSE41: Found an estimated cost of 11 for instruction: %shift
89 ; AVX: Found an estimated cost of 11 for instruction: %shift
90 ; AVX2: Found an estimated cost of 11 for instruction: %shift
91 ; XOP: Found an estimated cost of 1 for instruction: %shift
92 %shift = shl <16 x i8> %a, %b
96 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
97 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
98 ; SSE2: Found an estimated cost of 52 for instruction: %shift
99 ; SSE41: Found an estimated cost of 22 for instruction: %shift
100 ; AVX: Found an estimated cost of 22 for instruction: %shift
101 ; AVX2: Found an estimated cost of 11 for instruction: %shift
102 ; XOP: Found an estimated cost of 2 for instruction: %shift
103 %shift = shl <32 x i8> %a, %b
108 ; Uniform Variable Shifts
111 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
112 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
113 ; SSE2: Found an estimated cost of 4 for instruction: %shift
114 ; SSE41: Found an estimated cost of 4 for instruction: %shift
115 ; AVX: Found an estimated cost of 4 for instruction: %shift
116 ; AVX2: Found an estimated cost of 1 for instruction: %shift
117 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
118 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
119 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
120 %shift = shl <2 x i64> %a, %splat
124 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
125 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
126 ; SSE2: Found an estimated cost of 8 for instruction: %shift
127 ; SSE41: Found an estimated cost of 8 for instruction: %shift
128 ; AVX: Found an estimated cost of 8 for instruction: %shift
129 ; AVX2: Found an estimated cost of 1 for instruction: %shift
130 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
131 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
132 %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
133 %shift = shl <4 x i64> %a, %splat
137 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
138 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
139 ; SSE2: Found an estimated cost of 10 for instruction: %shift
140 ; SSE41: Found an estimated cost of 10 for instruction: %shift
141 ; AVX: Found an estimated cost of 10 for instruction: %shift
142 ; AVX2: Found an estimated cost of 1 for instruction: %shift
143 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
144 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
145 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
146 %shift = shl <4 x i32> %a, %splat
150 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
151 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
152 ; SSE2: Found an estimated cost of 20 for instruction: %shift
153 ; SSE41: Found an estimated cost of 20 for instruction: %shift
154 ; AVX: Found an estimated cost of 20 for instruction: %shift
155 ; AVX2: Found an estimated cost of 1 for instruction: %shift
156 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
157 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
158 %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
159 %shift = shl <8 x i32> %a, %splat
163 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
164 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
165 ; SSE2: Found an estimated cost of 32 for instruction: %shift
166 ; SSE41: Found an estimated cost of 14 for instruction: %shift
167 ; AVX: Found an estimated cost of 14 for instruction: %shift
168 ; AVX2: Found an estimated cost of 14 for instruction: %shift
169 ; XOP: Found an estimated cost of 1 for instruction: %shift
170 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
171 %shift = shl <8 x i16> %a, %splat
175 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
176 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
177 ; SSE2: Found an estimated cost of 64 for instruction: %shift
178 ; SSE41: Found an estimated cost of 28 for instruction: %shift
179 ; AVX: Found an estimated cost of 28 for instruction: %shift
180 ; AVX2: Found an estimated cost of 10 for instruction: %shift
181 ; XOP: Found an estimated cost of 2 for instruction: %shift
182 %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
183 %shift = shl <16 x i16> %a, %splat
184 ret <16 x i16> %shift
187 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
188 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
189 ; SSE2: Found an estimated cost of 26 for instruction: %shift
190 ; SSE41: Found an estimated cost of 11 for instruction: %shift
191 ; AVX: Found an estimated cost of 11 for instruction: %shift
192 ; AVX2: Found an estimated cost of 11 for instruction: %shift
193 ; XOP: Found an estimated cost of 1 for instruction: %shift
194 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
195 %shift = shl <16 x i8> %a, %splat
199 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
200 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
201 ; SSE2: Found an estimated cost of 52 for instruction: %shift
202 ; SSE41: Found an estimated cost of 22 for instruction: %shift
203 ; AVX: Found an estimated cost of 22 for instruction: %shift
204 ; AVX2: Found an estimated cost of 11 for instruction: %shift
205 ; XOP: Found an estimated cost of 2 for instruction: %shift
206 %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
207 %shift = shl <32 x i8> %a, %splat
215 define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
216 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
217 ; SSE2: Found an estimated cost of 4 for instruction: %shift
218 ; SSE41: Found an estimated cost of 4 for instruction: %shift
219 ; AVX: Found an estimated cost of 4 for instruction: %shift
220 ; AVX2: Found an estimated cost of 1 for instruction: %shift
221 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
222 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
223 %shift = shl <2 x i64> %a, <i64 1, i64 7>
227 define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
228 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
229 ; SSE2: Found an estimated cost of 8 for instruction: %shift
230 ; SSE41: Found an estimated cost of 8 for instruction: %shift
231 ; AVX: Found an estimated cost of 8 for instruction: %shift
232 ; AVX2: Found an estimated cost of 1 for instruction: %shift
233 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
234 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
235 %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
239 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
240 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
241 ; SSE2: Found an estimated cost of 6 for instruction: %shift
242 ; SSE41: Found an estimated cost of 1 for instruction: %shift
243 ; AVX: Found an estimated cost of 1 for instruction: %shift
244 ; AVX2: Found an estimated cost of 1 for instruction: %shift
245 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
246 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
247 %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
251 define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
252 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
253 ; SSE2: Found an estimated cost of 12 for instruction: %shift
254 ; SSE41: Found an estimated cost of 2 for instruction: %shift
255 ; AVX: Found an estimated cost of 4 for instruction: %shift
256 ; AVX2: Found an estimated cost of 1 for instruction: %shift
257 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
258 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
259 %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
263 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
264 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
265 ; SSE2: Found an estimated cost of 1 for instruction: %shift
266 ; SSE41: Found an estimated cost of 1 for instruction: %shift
267 ; AVX: Found an estimated cost of 1 for instruction: %shift
268 ; AVX2: Found an estimated cost of 1 for instruction: %shift
269 ; XOP: Found an estimated cost of 1 for instruction: %shift
270 %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
274 define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
275 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
276 ; SSE2: Found an estimated cost of 2 for instruction: %shift
277 ; SSE41: Found an estimated cost of 2 for instruction: %shift
278 ; AVX: Found an estimated cost of 4 for instruction: %shift
279 ; AVX2: Found an estimated cost of 1 for instruction: %shift
280 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
281 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
282 %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
283 ret <16 x i16> %shift
286 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
287 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
288 ; SSE2: Found an estimated cost of 26 for instruction: %shift
289 ; SSE41: Found an estimated cost of 11 for instruction: %shift
290 ; AVX: Found an estimated cost of 11 for instruction: %shift
291 ; AVX2: Found an estimated cost of 11 for instruction: %shift
292 ; XOP: Found an estimated cost of 1 for instruction: %shift
293 %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
297 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
298 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
299 ; SSE2: Found an estimated cost of 52 for instruction: %shift
300 ; SSE41: Found an estimated cost of 22 for instruction: %shift
301 ; AVX: Found an estimated cost of 22 for instruction: %shift
302 ; AVX2: Found an estimated cost of 11 for instruction: %shift
303 ; XOP: Found an estimated cost of 2 for instruction: %shift
304 %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
309 ; Uniform Constant Shifts
312 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
313 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
314 ; SSE2: Found an estimated cost of 1 for instruction: %shift
315 ; SSE41: Found an estimated cost of 1 for instruction: %shift
316 ; AVX: Found an estimated cost of 1 for instruction: %shift
317 ; AVX2: Found an estimated cost of 1 for instruction: %shift
318 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
319 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
320 %shift = shl <2 x i64> %a, <i64 7, i64 7>
324 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
325 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
326 ; SSE2: Found an estimated cost of 2 for instruction: %shift
327 ; SSE41: Found an estimated cost of 2 for instruction: %shift
328 ; AVX: Found an estimated cost of 2 for instruction: %shift
329 ; AVX2: Found an estimated cost of 1 for instruction: %shift
330 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
331 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
332 %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
336 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
337 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
338 ; SSE2: Found an estimated cost of 1 for instruction: %shift
339 ; SSE41: Found an estimated cost of 1 for instruction: %shift
340 ; AVX: Found an estimated cost of 1 for instruction: %shift
341 ; AVX2: Found an estimated cost of 1 for instruction: %shift
342 ; XOPAVX: Found an estimated cost of 1 for instruction: %shift
343 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
344 %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
348 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
349 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
350 ; SSE2: Found an estimated cost of 2 for instruction: %shift
351 ; SSE41: Found an estimated cost of 2 for instruction: %shift
352 ; AVX: Found an estimated cost of 2 for instruction: %shift
353 ; AVX2: Found an estimated cost of 1 for instruction: %shift
354 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
355 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
356 %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
360 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
361 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
362 ; SSE2: Found an estimated cost of 1 for instruction: %shift
363 ; SSE41: Found an estimated cost of 1 for instruction: %shift
364 ; AVX: Found an estimated cost of 1 for instruction: %shift
365 ; AVX2: Found an estimated cost of 1 for instruction: %shift
366 ; XOP: Found an estimated cost of 1 for instruction: %shift
367 %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
371 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
372 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
373 ; SSE2: Found an estimated cost of 2 for instruction: %shift
374 ; SSE41: Found an estimated cost of 2 for instruction: %shift
375 ; AVX: Found an estimated cost of 2 for instruction: %shift
376 ; AVX2: Found an estimated cost of 1 for instruction: %shift
377 ; XOPAVX: Found an estimated cost of 2 for instruction: %shift
378 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
379 %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
380 ret <16 x i16> %shift
383 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
384 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
385 ; SSE2: Found an estimated cost of 1 for instruction: %shift
386 ; SSE41: Found an estimated cost of 1 for instruction: %shift
387 ; AVX: Found an estimated cost of 1 for instruction: %shift
388 ; AVX2: Found an estimated cost of 1 for instruction: %shift
389 ; XOP: Found an estimated cost of 1 for instruction: %shift
390 %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
394 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
395 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
396 ; SSE2: Found an estimated cost of 2 for instruction: %shift
397 ; SSE41: Found an estimated cost of 2 for instruction: %shift
398 ; AVX: Found an estimated cost of 2 for instruction: %shift
399 ; AVX2: Found an estimated cost of 11 for instruction: %shift
400 ; XOP: Found an estimated cost of 2 for instruction: %shift
401 %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
409 ; We always emit a single pmullw in the case of v8i16 vector shifts by
410 ; non-uniform constant.
412 define <8 x i16> @test1(<8 x i16> %a) {
413 %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
416 ; CHECK: 'Cost Model Analysis' for function 'test1':
417 ; CHECK: Found an estimated cost of 1 for instruction: %shl
420 define <8 x i16> @test2(<8 x i16> %a) {
421 %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
424 ; CHECK: 'Cost Model Analysis' for function 'test2':
425 ; CHECK: Found an estimated cost of 1 for instruction: %shl
428 ; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction.
429 ; Make sure that the estimated cost is always 1 except for the case where
430 ; we only have SSE2 support. With SSE2, we are forced to special lower the
431 ; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle.
433 define <4 x i32> @test3(<4 x i32> %a) {
434 %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
437 ; CHECK: 'Cost Model Analysis' for function 'test3':
438 ; SSE2: Found an estimated cost of 6 for instruction: %shl
439 ; SSE41: Found an estimated cost of 1 for instruction: %shl
440 ; AVX: Found an estimated cost of 1 for instruction: %shl
441 ; AVX2: Found an estimated cost of 1 for instruction: %shl
442 ; XOP: Found an estimated cost of 1 for instruction: %shl
445 define <4 x i32> @test4(<4 x i32> %a) {
446 %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
449 ; CHECK: 'Cost Model Analysis' for function 'test4':
450 ; SSE2: Found an estimated cost of 6 for instruction: %shl
451 ; SSE41: Found an estimated cost of 1 for instruction: %shl
452 ; AVX: Found an estimated cost of 1 for instruction: %shl
453 ; AVX2: Found an estimated cost of 1 for instruction: %shl
454 ; XOP: Found an estimated cost of 1 for instruction: %shl
457 ; On AVX2 we are able to lower the following shift into a single
458 ; vpsllvq. Therefore, the expected cost is only 1.
459 ; In all other cases, this shift is scalarized as the target does not support
460 ; vpsllv instructions.
462 define <2 x i64> @test5(<2 x i64> %a) {
463 %shl = shl <2 x i64> %a, <i64 2, i64 3>
466 ; CHECK: 'Cost Model Analysis' for function 'test5':
467 ; SSE2: Found an estimated cost of 4 for instruction: %shl
468 ; SSE41: Found an estimated cost of 4 for instruction: %shl
469 ; AVX: Found an estimated cost of 4 for instruction: %shl
470 ; AVX2: Found an estimated cost of 1 for instruction: %shl
471 ; XOP: Found an estimated cost of 1 for instruction: %shl
474 ; v16i16 and v8i32 shift left by non-uniform constant are lowered into
475 ; vector multiply instructions. With AVX (but not AVX2), the vector multiply
476 ; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert.
478 ; With AVX2, instruction vpmullw works with 256bit quantities and
479 ; therefore there is no need to split the resulting vector multiply into
480 ; a sequence of two multiply.
482 ; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice
483 ; the cost computed in the case of 'test1'. That is because the backend
484 ; simply emits 2 pmullw with no extract/insert.
487 define <16 x i16> @test6(<16 x i16> %a) {
488 %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
491 ; CHECK: 'Cost Model Analysis' for function 'test6':
492 ; SSE2: Found an estimated cost of 2 for instruction: %shl
493 ; SSE41: Found an estimated cost of 2 for instruction: %shl
494 ; AVX: Found an estimated cost of 4 for instruction: %shl
495 ; AVX2: Found an estimated cost of 1 for instruction: %shl
496 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl
497 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
500 ; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice
501 ; the cost computed in the case of 'test3'. That is because the multiply
502 ; is type-legalized into two 4i32 vector multiply.
504 define <8 x i32> @test7(<8 x i32> %a) {
505 %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
508 ; CHECK: 'Cost Model Analysis' for function 'test7':
509 ; SSE2: Found an estimated cost of 12 for instruction: %shl
510 ; SSE41: Found an estimated cost of 2 for instruction: %shl
511 ; AVX: Found an estimated cost of 4 for instruction: %shl
512 ; AVX2: Found an estimated cost of 1 for instruction: %shl
513 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl
514 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
517 ; On AVX2 we are able to lower the following shift into a single
518 ; vpsllvq. Therefore, the expected cost is only 1.
519 ; In all other cases, this shift is scalarized as the target does not support
520 ; vpsllv instructions.
522 define <4 x i64> @test8(<4 x i64> %a) {
523 %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
526 ; CHECK: 'Cost Model Analysis' for function 'test8':
527 ; SSE2: Found an estimated cost of 8 for instruction: %shl
528 ; SSE41: Found an estimated cost of 8 for instruction: %shl
529 ; AVX: Found an estimated cost of 8 for instruction: %shl
530 ; AVX2: Found an estimated cost of 1 for instruction: %shl
531 ; XOPAVX: Found an estimated cost of 2 for instruction: %shl
532 ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
535 ; Same as 'test6', with the difference that the cost is double.
537 define <32 x i16> @test9(<32 x i16> %a) {
538 %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
541 ; CHECK: 'Cost Model Analysis' for function 'test9':
542 ; SSE2: Found an estimated cost of 4 for instruction: %shl
543 ; SSE41: Found an estimated cost of 4 for instruction: %shl
544 ; AVX: Found an estimated cost of 8 for instruction: %shl
545 ; AVX2: Found an estimated cost of 2 for instruction: %shl
546 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl
547 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl
550 ; Same as 'test7', except that now the cost is double.
552 define <16 x i32> @test10(<16 x i32> %a) {
553 %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
556 ; CHECK: 'Cost Model Analysis' for function 'test10':
557 ; SSE2: Found an estimated cost of 24 for instruction: %shl
558 ; SSE41: Found an estimated cost of 4 for instruction: %shl
559 ; AVX: Found an estimated cost of 8 for instruction: %shl
560 ; AVX2: Found an estimated cost of 2 for instruction: %shl
561 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl
562 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl
565 ; On AVX2 we are able to lower the following shift into a sequence of
566 ; two vpsllvq instructions. Therefore, the expected cost is only 2.
567 ; In all other cases, this shift is scalarized as we don't have vpsllv
570 define <8 x i64> @test11(<8 x i64> %a) {
571 %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
574 ; CHECK: 'Cost Model Analysis' for function 'test11':
575 ; SSE2: Found an estimated cost of 16 for instruction: %shl
576 ; SSE41: Found an estimated cost of 16 for instruction: %shl
577 ; AVX: Found an estimated cost of 16 for instruction: %shl
578 ; AVX2: Found an estimated cost of 2 for instruction: %shl
579 ; XOPAVX: Found an estimated cost of 4 for instruction: %shl
580 ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl