1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST
11 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
12 ; SSE-LABEL: shuffle_v8i16_01012323:
14 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
17 ; AVX-LABEL: shuffle_v8i16_01012323:
19 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
21 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
22 ret <8 x i16> %shuffle
24 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
25 ; SSE-LABEL: shuffle_v8i16_67452301:
27 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
30 ; AVX-LABEL: shuffle_v8i16_67452301:
32 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
34 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
35 ret <8 x i16> %shuffle
37 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
38 ; SSE2-LABEL: shuffle_v8i16_456789AB:
40 ; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
43 ; SSSE3-LABEL: shuffle_v8i16_456789AB:
45 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
46 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
49 ; SSE41-LABEL: shuffle_v8i16_456789AB:
51 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
52 ; SSE41-NEXT: movdqa %xmm1, %xmm0
55 ; AVX-LABEL: shuffle_v8i16_456789AB:
57 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
59 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
60 ret <8 x i16> %shuffle
63 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
64 ; SSE-LABEL: shuffle_v8i16_00000000:
66 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
67 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
70 ; AVX1-LABEL: shuffle_v8i16_00000000:
72 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
73 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
76 ; AVX2OR512VL-LABEL: shuffle_v8i16_00000000:
77 ; AVX2OR512VL: # %bb.0:
78 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %xmm0
79 ; AVX2OR512VL-NEXT: retq
80 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
81 ret <8 x i16> %shuffle
83 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
84 ; SSE-LABEL: shuffle_v8i16_00004444:
86 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
87 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
90 ; AVX1-LABEL: shuffle_v8i16_00004444:
92 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
93 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
96 ; AVX2-SLOW-LABEL: shuffle_v8i16_00004444:
98 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
99 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
100 ; AVX2-SLOW-NEXT: retq
102 ; AVX2-FAST-LABEL: shuffle_v8i16_00004444:
103 ; AVX2-FAST: # %bb.0:
104 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
105 ; AVX2-FAST-NEXT: retq
107 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_00004444:
108 ; AVX512VL-SLOW: # %bb.0:
109 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
110 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
111 ; AVX512VL-SLOW-NEXT: retq
113 ; AVX512VL-FAST-LABEL: shuffle_v8i16_00004444:
114 ; AVX512VL-FAST: # %bb.0:
115 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9]
116 ; AVX512VL-FAST-NEXT: retq
117 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
118 ret <8 x i16> %shuffle
120 define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
121 ; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
123 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
126 ; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
128 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
130 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
131 ret <8 x i16> %shuffle
133 define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
134 ; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
136 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
139 ; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
141 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
143 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
144 ret <8 x i16> %shuffle
146 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
147 ; SSE-LABEL: shuffle_v8i16_31206745:
149 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
150 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
153 ; AVX1-LABEL: shuffle_v8i16_31206745:
155 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
156 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
159 ; AVX2-SLOW-LABEL: shuffle_v8i16_31206745:
160 ; AVX2-SLOW: # %bb.0:
161 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
162 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
163 ; AVX2-SLOW-NEXT: retq
165 ; AVX2-FAST-LABEL: shuffle_v8i16_31206745:
166 ; AVX2-FAST: # %bb.0:
167 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
168 ; AVX2-FAST-NEXT: retq
170 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745:
171 ; AVX512VL-SLOW: # %bb.0:
172 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
173 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
174 ; AVX512VL-SLOW-NEXT: retq
176 ; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745:
177 ; AVX512VL-FAST: # %bb.0:
178 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
179 ; AVX512VL-FAST-NEXT: retq
180 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
181 ret <8 x i16> %shuffle
183 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
184 ; SSE2-LABEL: shuffle_v8i16_44440000:
186 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
187 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
188 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
191 ; SSSE3-LABEL: shuffle_v8i16_44440000:
193 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
196 ; SSE41-LABEL: shuffle_v8i16_44440000:
198 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
201 ; AVX-LABEL: shuffle_v8i16_44440000:
203 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
205 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
206 ret <8 x i16> %shuffle
208 define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
209 ; SSE-LABEL: shuffle_v8i16_23016745:
211 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
214 ; AVX-LABEL: shuffle_v8i16_23016745:
216 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,3,2]
218 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
219 ret <8 x i16> %shuffle
221 define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
222 ; SSE-LABEL: shuffle_v8i16_23026745:
224 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
225 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
228 ; AVX1-LABEL: shuffle_v8i16_23026745:
230 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
231 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
234 ; AVX2-SLOW-LABEL: shuffle_v8i16_23026745:
235 ; AVX2-SLOW: # %bb.0:
236 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
237 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
238 ; AVX2-SLOW-NEXT: retq
240 ; AVX2-FAST-LABEL: shuffle_v8i16_23026745:
241 ; AVX2-FAST: # %bb.0:
242 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
243 ; AVX2-FAST-NEXT: retq
245 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745:
246 ; AVX512VL-SLOW: # %bb.0:
247 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
248 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
249 ; AVX512VL-SLOW-NEXT: retq
251 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745:
252 ; AVX512VL-FAST: # %bb.0:
253 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
254 ; AVX512VL-FAST-NEXT: retq
255 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
256 ret <8 x i16> %shuffle
258 define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
259 ; SSE-LABEL: shuffle_v8i16_23016747:
261 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
262 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
265 ; AVX1-LABEL: shuffle_v8i16_23016747:
267 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
268 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
271 ; AVX2-SLOW-LABEL: shuffle_v8i16_23016747:
272 ; AVX2-SLOW: # %bb.0:
273 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
274 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
275 ; AVX2-SLOW-NEXT: retq
277 ; AVX2-FAST-LABEL: shuffle_v8i16_23016747:
278 ; AVX2-FAST: # %bb.0:
279 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
280 ; AVX2-FAST-NEXT: retq
282 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_23016747:
283 ; AVX512VL-SLOW: # %bb.0:
284 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
285 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
286 ; AVX512VL-SLOW-NEXT: retq
288 ; AVX512VL-FAST-LABEL: shuffle_v8i16_23016747:
289 ; AVX512VL-FAST: # %bb.0:
290 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3,12,13,14,15,8,9,14,15]
291 ; AVX512VL-FAST-NEXT: retq
292 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
293 ret <8 x i16> %shuffle
295 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
296 ; SSE2-LABEL: shuffle_v8i16_75643120:
298 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
299 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
300 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
303 ; SSSE3-LABEL: shuffle_v8i16_75643120:
305 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
308 ; SSE41-LABEL: shuffle_v8i16_75643120:
310 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
313 ; AVX-LABEL: shuffle_v8i16_75643120:
315 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
317 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
318 ret <8 x i16> %shuffle
321 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
322 ; SSE2-LABEL: shuffle_v8i16_10545410:
324 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
325 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
326 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
329 ; SSSE3-LABEL: shuffle_v8i16_10545410:
331 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
334 ; SSE41-LABEL: shuffle_v8i16_10545410:
336 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
339 ; AVX-LABEL: shuffle_v8i16_10545410:
341 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
343 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
344 ret <8 x i16> %shuffle
346 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
347 ; SSE2-LABEL: shuffle_v8i16_54105410:
349 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
350 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
351 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
354 ; SSSE3-LABEL: shuffle_v8i16_54105410:
356 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
359 ; SSE41-LABEL: shuffle_v8i16_54105410:
361 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
364 ; AVX-LABEL: shuffle_v8i16_54105410:
366 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
368 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
369 ret <8 x i16> %shuffle
371 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
372 ; SSE2-LABEL: shuffle_v8i16_54101054:
374 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
375 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
376 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
379 ; SSSE3-LABEL: shuffle_v8i16_54101054:
381 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
384 ; SSE41-LABEL: shuffle_v8i16_54101054:
386 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
389 ; AVX-LABEL: shuffle_v8i16_54101054:
391 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
393 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
394 ret <8 x i16> %shuffle
396 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
397 ; SSE2-LABEL: shuffle_v8i16_04400440:
399 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
400 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
401 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
404 ; SSSE3-LABEL: shuffle_v8i16_04400440:
406 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
409 ; SSE41-LABEL: shuffle_v8i16_04400440:
411 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
414 ; AVX-LABEL: shuffle_v8i16_04400440:
416 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
418 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
419 ret <8 x i16> %shuffle
421 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
422 ; SSE2-LABEL: shuffle_v8i16_40044004:
424 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
425 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
426 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
429 ; SSSE3-LABEL: shuffle_v8i16_40044004:
431 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
434 ; SSE41-LABEL: shuffle_v8i16_40044004:
436 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
439 ; AVX-LABEL: shuffle_v8i16_40044004:
441 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
443 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
444 ret <8 x i16> %shuffle
447 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
448 ; SSE2-LABEL: shuffle_v8i16_26405173:
450 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
451 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
452 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
453 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
454 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
457 ; SSSE3-LABEL: shuffle_v8i16_26405173:
459 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
462 ; SSE41-LABEL: shuffle_v8i16_26405173:
464 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
467 ; AVX-LABEL: shuffle_v8i16_26405173:
469 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
471 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
472 ret <8 x i16> %shuffle
474 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
475 ; SSE2-LABEL: shuffle_v8i16_20645173:
477 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
478 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
479 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
480 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
481 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
484 ; SSSE3-LABEL: shuffle_v8i16_20645173:
486 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
489 ; SSE41-LABEL: shuffle_v8i16_20645173:
491 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
494 ; AVX-LABEL: shuffle_v8i16_20645173:
496 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
498 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
499 ret <8 x i16> %shuffle
501 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
502 ; SSE2-LABEL: shuffle_v8i16_26401375:
504 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
505 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
506 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
507 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
510 ; SSSE3-LABEL: shuffle_v8i16_26401375:
512 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
515 ; SSE41-LABEL: shuffle_v8i16_26401375:
517 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
520 ; AVX-LABEL: shuffle_v8i16_26401375:
522 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
524 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
525 ret <8 x i16> %shuffle
528 define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
529 ; SSE2-LABEL: shuffle_v8i16_66751643:
531 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
532 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
533 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
534 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
535 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
538 ; SSSE3-LABEL: shuffle_v8i16_66751643:
540 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
543 ; SSE41-LABEL: shuffle_v8i16_66751643:
545 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
548 ; AVX-LABEL: shuffle_v8i16_66751643:
550 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
552 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
553 ret <8 x i16> %shuffle
556 define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
557 ; SSE2-LABEL: shuffle_v8i16_60514754:
559 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
560 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
561 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
562 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
565 ; SSSE3-LABEL: shuffle_v8i16_60514754:
567 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
570 ; SSE41-LABEL: shuffle_v8i16_60514754:
572 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
575 ; AVX-LABEL: shuffle_v8i16_60514754:
577 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
579 %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
580 ret <8 x i16> %shuffle
583 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
584 ; SSE2-LABEL: shuffle_v8i16_00444444:
586 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
587 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
588 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
591 ; SSSE3-LABEL: shuffle_v8i16_00444444:
593 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
596 ; SSE41-LABEL: shuffle_v8i16_00444444:
598 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
601 ; AVX-LABEL: shuffle_v8i16_00444444:
603 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
605 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
606 ret <8 x i16> %shuffle
608 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
609 ; SSE2-LABEL: shuffle_v8i16_44004444:
611 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
612 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
613 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
616 ; SSSE3-LABEL: shuffle_v8i16_44004444:
618 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
621 ; SSE41-LABEL: shuffle_v8i16_44004444:
623 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
626 ; AVX-LABEL: shuffle_v8i16_44004444:
628 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
630 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
631 ret <8 x i16> %shuffle
633 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
634 ; SSE2-LABEL: shuffle_v8i16_04404444:
636 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
637 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
638 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
641 ; SSSE3-LABEL: shuffle_v8i16_04404444:
643 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
646 ; SSE41-LABEL: shuffle_v8i16_04404444:
648 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
651 ; AVX-LABEL: shuffle_v8i16_04404444:
653 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
655 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
656 ret <8 x i16> %shuffle
658 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
659 ; SSE2-LABEL: shuffle_v8i16_04400000:
661 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
662 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
663 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
666 ; SSSE3-LABEL: shuffle_v8i16_04400000:
668 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
671 ; SSE41-LABEL: shuffle_v8i16_04400000:
673 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
676 ; AVX-LABEL: shuffle_v8i16_04400000:
678 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
680 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
681 ret <8 x i16> %shuffle
683 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
684 ; SSE-LABEL: shuffle_v8i16_04404567:
686 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
687 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
690 ; AVX1-LABEL: shuffle_v8i16_04404567:
692 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
693 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
696 ; AVX2-SLOW-LABEL: shuffle_v8i16_04404567:
697 ; AVX2-SLOW: # %bb.0:
698 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
699 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
700 ; AVX2-SLOW-NEXT: retq
702 ; AVX2-FAST-LABEL: shuffle_v8i16_04404567:
703 ; AVX2-FAST: # %bb.0:
704 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
705 ; AVX2-FAST-NEXT: retq
707 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_04404567:
708 ; AVX512VL-SLOW: # %bb.0:
709 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
710 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
711 ; AVX512VL-SLOW-NEXT: retq
713 ; AVX512VL-FAST-LABEL: shuffle_v8i16_04404567:
714 ; AVX512VL-FAST: # %bb.0:
715 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,14,15]
716 ; AVX512VL-FAST-NEXT: retq
717 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
718 ret <8 x i16> %shuffle
721 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
722 ; SSE2-LABEL: shuffle_v8i16_0X444444:
724 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
725 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
726 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
729 ; SSSE3-LABEL: shuffle_v8i16_0X444444:
731 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
734 ; SSE41-LABEL: shuffle_v8i16_0X444444:
736 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
739 ; AVX-LABEL: shuffle_v8i16_0X444444:
741 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
743 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
744 ret <8 x i16> %shuffle
746 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
747 ; SSE2-LABEL: shuffle_v8i16_44X04444:
749 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
750 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
751 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
754 ; SSSE3-LABEL: shuffle_v8i16_44X04444:
756 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
759 ; SSE41-LABEL: shuffle_v8i16_44X04444:
761 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
764 ; AVX-LABEL: shuffle_v8i16_44X04444:
766 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
768 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
769 ret <8 x i16> %shuffle
771 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
772 ; SSE2-LABEL: shuffle_v8i16_X4404444:
774 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
775 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
776 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
779 ; SSSE3-LABEL: shuffle_v8i16_X4404444:
781 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
784 ; SSE41-LABEL: shuffle_v8i16_X4404444:
786 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
789 ; AVX-LABEL: shuffle_v8i16_X4404444:
791 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
793 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
794 ret <8 x i16> %shuffle
797 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
798 ; SSE2-LABEL: shuffle_v8i16_0127XXXX:
800 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
801 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
802 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
805 ; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
807 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
810 ; SSE41-LABEL: shuffle_v8i16_0127XXXX:
812 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
815 ; AVX-LABEL: shuffle_v8i16_0127XXXX:
817 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
819 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
820 ret <8 x i16> %shuffle
823 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
824 ; SSE2-LABEL: shuffle_v8i16_XXXX4563:
826 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
827 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
828 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
831 ; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
833 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
836 ; SSE41-LABEL: shuffle_v8i16_XXXX4563:
838 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
841 ; AVX-LABEL: shuffle_v8i16_XXXX4563:
843 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
845 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
846 ret <8 x i16> %shuffle
849 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
850 ; SSE2-LABEL: shuffle_v8i16_4563XXXX:
852 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
853 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
854 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
857 ; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
859 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
862 ; SSE41-LABEL: shuffle_v8i16_4563XXXX:
864 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
867 ; AVX-LABEL: shuffle_v8i16_4563XXXX:
869 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
871 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
872 ret <8 x i16> %shuffle
875 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
876 ; SSE2-LABEL: shuffle_v8i16_01274563:
878 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
879 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
880 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
883 ; SSSE3-LABEL: shuffle_v8i16_01274563:
885 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
888 ; SSE41-LABEL: shuffle_v8i16_01274563:
890 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
893 ; AVX-LABEL: shuffle_v8i16_01274563:
895 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
897 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
898 ret <8 x i16> %shuffle
901 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
902 ; SSE2-LABEL: shuffle_v8i16_45630127:
904 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
905 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
906 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
909 ; SSSE3-LABEL: shuffle_v8i16_45630127:
911 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
914 ; SSE41-LABEL: shuffle_v8i16_45630127:
916 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
919 ; AVX-LABEL: shuffle_v8i16_45630127:
921 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
923 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
924 ret <8 x i16> %shuffle
927 define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
928 ; SSE2-LABEL: shuffle_v8i16_37102735:
930 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
931 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
932 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
933 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
934 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
935 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
938 ; SSSE3-LABEL: shuffle_v8i16_37102735:
940 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
943 ; SSE41-LABEL: shuffle_v8i16_37102735:
945 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
948 ; AVX-LABEL: shuffle_v8i16_37102735:
950 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
952 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
953 ret <8 x i16> %shuffle
956 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
957 ; SSE-LABEL: shuffle_v8i16_08192a3b:
959 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
962 ; AVX-LABEL: shuffle_v8i16_08192a3b:
964 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
966 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
967 ret <8 x i16> %shuffle
970 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
971 ; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
973 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
974 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
977 ; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
979 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
980 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
982 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
983 ret <8 x i16> %shuffle
986 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
987 ; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
989 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
992 ; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
994 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
996 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
997 ret <8 x i16> %shuffle
1000 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
1001 ; SSE-LABEL: shuffle_v8i16_48596a7b:
1003 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1004 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1007 ; AVX-LABEL: shuffle_v8i16_48596a7b:
1009 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1010 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1012 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
1013 ret <8 x i16> %shuffle
1016 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
1017 ; SSE-LABEL: shuffle_v8i16_08196e7f:
1019 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1020 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1021 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1024 ; AVX-LABEL: shuffle_v8i16_08196e7f:
1026 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
1027 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1028 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1030 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
1031 ret <8 x i16> %shuffle
1034 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
1035 ; SSE-LABEL: shuffle_v8i16_0c1d6879:
1037 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1038 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1039 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1042 ; AVX-LABEL: shuffle_v8i16_0c1d6879:
1044 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
1045 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
1046 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1048 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
1049 ret <8 x i16> %shuffle
1052 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
1053 ; SSE-LABEL: shuffle_v8i16_109832ba:
1055 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1056 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1057 ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1060 ; AVX1-LABEL: shuffle_v8i16_109832ba:
1062 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1063 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1064 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1067 ; AVX2-SLOW-LABEL: shuffle_v8i16_109832ba:
1068 ; AVX2-SLOW: # %bb.0:
1069 ; AVX2-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1070 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1071 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1072 ; AVX2-SLOW-NEXT: retq
1074 ; AVX2-FAST-LABEL: shuffle_v8i16_109832ba:
1075 ; AVX2-FAST: # %bb.0:
1076 ; AVX2-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1077 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
1078 ; AVX2-FAST-NEXT: retq
1080 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_109832ba:
1081 ; AVX512VL-SLOW: # %bb.0:
1082 ; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1083 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
1084 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
1085 ; AVX512VL-SLOW-NEXT: retq
1087 ; AVX512VL-FAST-LABEL: shuffle_v8i16_109832ba:
1088 ; AVX512VL-FAST: # %bb.0:
1089 ; AVX512VL-FAST-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1090 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,6,7,2,3,12,13,8,9,14,15,10,11]
1091 ; AVX512VL-FAST-NEXT: retq
1092 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
1093 ret <8 x i16> %shuffle
1096 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
1097 ; SSE-LABEL: shuffle_v8i16_8091a2b3:
1099 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1100 ; SSE-NEXT: movdqa %xmm1, %xmm0
1103 ; AVX-LABEL: shuffle_v8i16_8091a2b3:
1105 ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1107 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
1108 ret <8 x i16> %shuffle
1110 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
1111 ; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
1113 ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1114 ; SSE-NEXT: movdqa %xmm1, %xmm0
1117 ; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
1119 ; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1121 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
1122 ret <8 x i16> %shuffle
1125 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
1126 ; SSE-LABEL: shuffle_v8i16_0213cedf:
1128 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1129 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1130 ; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1131 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1134 ; AVX1-LABEL: shuffle_v8i16_0213cedf:
1136 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1137 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1138 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1139 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1142 ; AVX2-SLOW-LABEL: shuffle_v8i16_0213cedf:
1143 ; AVX2-SLOW: # %bb.0:
1144 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1145 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1146 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1147 ; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1148 ; AVX2-SLOW-NEXT: retq
1150 ; AVX2-FAST-LABEL: shuffle_v8i16_0213cedf:
1151 ; AVX2-FAST: # %bb.0:
1152 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,10,11,12,13,14,15]
1153 ; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1154 ; AVX2-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1155 ; AVX2-FAST-NEXT: retq
1157 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_0213cedf:
1158 ; AVX512VL-SLOW: # %bb.0:
1159 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1160 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1161 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
1162 ; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1163 ; AVX512VL-SLOW-NEXT: retq
1165 ; AVX512VL-FAST-LABEL: shuffle_v8i16_0213cedf:
1166 ; AVX512VL-FAST: # %bb.0:
1167 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,12,13,10,11,14,15,8,9,10,11,12,13,14,15]
1168 ; AVX512VL-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
1169 ; AVX512VL-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1170 ; AVX512VL-FAST-NEXT: retq
1171 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
1172 ret <8 x i16> %shuffle
1175 define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
1176 ; SSE2-LABEL: shuffle_v8i16_443aXXXX:
1178 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
1179 ; SSE2-NEXT: pand %xmm2, %xmm0
1180 ; SSE2-NEXT: pandn %xmm1, %xmm2
1181 ; SSE2-NEXT: por %xmm0, %xmm2
1182 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
1183 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1186 ; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
1188 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
1189 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1190 ; SSSE3-NEXT: por %xmm1, %xmm0
1193 ; SSE41-LABEL: shuffle_v8i16_443aXXXX:
1195 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1196 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1197 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1200 ; AVX1-LABEL: shuffle_v8i16_443aXXXX:
1202 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1203 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1204 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1207 ; AVX2-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1208 ; AVX2-SLOW: # %bb.0:
1209 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1210 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1211 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1212 ; AVX2-SLOW-NEXT: retq
1214 ; AVX2-FAST-LABEL: shuffle_v8i16_443aXXXX:
1215 ; AVX2-FAST: # %bb.0:
1216 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1217 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
1218 ; AVX2-FAST-NEXT: retq
1220 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_443aXXXX:
1221 ; AVX512VL-SLOW: # %bb.0:
1222 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1223 ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1224 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
1225 ; AVX512VL-SLOW-NEXT: retq
1227 ; AVX512VL-FAST-LABEL: shuffle_v8i16_443aXXXX:
1228 ; AVX512VL-FAST: # %bb.0:
1229 ; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
1230 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7,4,5,8,9,10,11,12,13,14,15]
1231 ; AVX512VL-FAST-NEXT: retq
1232 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
1233 ret <8 x i16> %shuffle
1236 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
1237 ; SSE2-LABEL: shuffle_v8i16_032dXXXX:
1239 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1240 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
1241 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
1242 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
1243 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1246 ; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
1248 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1249 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1250 ; SSSE3-NEXT: por %xmm1, %xmm0
1253 ; SSE41-LABEL: shuffle_v8i16_032dXXXX:
1255 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1256 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1259 ; AVX1-LABEL: shuffle_v8i16_032dXXXX:
1261 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1262 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1265 ; AVX2OR512VL-LABEL: shuffle_v8i16_032dXXXX:
1266 ; AVX2OR512VL: # %bb.0:
1267 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1268 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
1269 ; AVX2OR512VL-NEXT: retq
1270 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1271 ret <8 x i16> %shuffle
1273 define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
1274 ; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
1276 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
1279 ; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
1281 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[2,2,3,3]
1283 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1284 ret <8 x i16> %shuffle
1287 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
1288 ; SSE2-LABEL: shuffle_v8i16_012dXXXX:
1290 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1291 ; SSE2-NEXT: pand %xmm2, %xmm0
1292 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1293 ; SSE2-NEXT: pandn %xmm1, %xmm2
1294 ; SSE2-NEXT: por %xmm2, %xmm0
1297 ; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
1299 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
1300 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
1301 ; SSSE3-NEXT: por %xmm1, %xmm0
1304 ; SSE41-LABEL: shuffle_v8i16_012dXXXX:
1306 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1307 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1310 ; AVX-LABEL: shuffle_v8i16_012dXXXX:
1312 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1313 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1315 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
1316 ret <8 x i16> %shuffle
1319 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
1320 ; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
1322 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
1323 ; SSE2-NEXT: pand %xmm2, %xmm1
1324 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1325 ; SSE2-NEXT: pandn %xmm0, %xmm2
1326 ; SSE2-NEXT: por %xmm1, %xmm2
1327 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1330 ; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
1332 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
1333 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
1334 ; SSSE3-NEXT: por %xmm1, %xmm0
1337 ; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
1339 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1340 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1343 ; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
1345 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1346 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1349 ; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3:
1350 ; AVX2OR512VL: # %bb.0:
1351 ; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0
1352 ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1353 ; AVX2OR512VL-NEXT: retq
1354 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
1355 ret <8 x i16> %shuffle
1358 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
1359 ; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
1361 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
1362 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1363 ; SSE2-NEXT: pand %xmm2, %xmm1
1364 ; SSE2-NEXT: pandn %xmm0, %xmm2
1365 ; SSE2-NEXT: por %xmm1, %xmm2
1366 ; SSE2-NEXT: movdqa %xmm2, %xmm0
1369 ; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
1371 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
1372 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
1373 ; SSSE3-NEXT: por %xmm1, %xmm0
1376 ; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
1378 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1379 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1382 ; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1384 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1385 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1387 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1388 ret <8 x i16> %shuffle
1391 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
1392 ; SSE2-LABEL: shuffle_v8i16_012dcde3:
1394 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
1395 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1]
1396 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
1397 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
1398 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
1399 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
1400 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
1401 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
1404 ; SSSE3-LABEL: shuffle_v8i16_012dcde3:
1406 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
1407 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
1408 ; SSSE3-NEXT: por %xmm1, %xmm0
1411 ; SSE41-LABEL: shuffle_v8i16_012dcde3:
1413 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1414 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1417 ; AVX1-LABEL: shuffle_v8i16_012dcde3:
1419 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1420 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1423 ; AVX2OR512VL-LABEL: shuffle_v8i16_012dcde3:
1424 ; AVX2OR512VL: # %bb.0:
1425 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1426 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
1427 ; AVX2OR512VL-NEXT: retq
1428 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
1429 ret <8 x i16> %shuffle
1432 define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
1433 ; SSE2-LABEL: shuffle_v8i16_0923cde7:
1435 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1436 ; SSE2-NEXT: andps %xmm2, %xmm0
1437 ; SSE2-NEXT: andnps %xmm1, %xmm2
1438 ; SSE2-NEXT: orps %xmm2, %xmm0
1441 ; SSSE3-LABEL: shuffle_v8i16_0923cde7:
1443 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
1444 ; SSSE3-NEXT: andps %xmm2, %xmm0
1445 ; SSSE3-NEXT: andnps %xmm1, %xmm2
1446 ; SSSE3-NEXT: orps %xmm2, %xmm0
1449 ; SSE41-LABEL: shuffle_v8i16_0923cde7:
1451 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1454 ; AVX-LABEL: shuffle_v8i16_0923cde7:
1456 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
1458 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
1459 ret <8 x i16> %shuffle
1462 define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
1463 ; SSE2-LABEL: shuffle_v8i16_XXX1X579:
1465 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
1466 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
1467 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1468 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1469 ; SSE2-NEXT: pand %xmm1, %xmm0
1470 ; SSE2-NEXT: pandn %xmm2, %xmm1
1471 ; SSE2-NEXT: por %xmm0, %xmm1
1472 ; SSE2-NEXT: movdqa %xmm1, %xmm0
1475 ; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
1477 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
1478 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
1479 ; SSSE3-NEXT: por %xmm1, %xmm0
1482 ; SSE41-LABEL: shuffle_v8i16_XXX1X579:
1484 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1485 ; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1486 ; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1487 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1490 ; AVX1-LABEL: shuffle_v8i16_XXX1X579:
1492 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
1493 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1494 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1495 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1498 ; AVX2-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1499 ; AVX2-SLOW: # %bb.0:
1500 ; AVX2-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
1501 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1502 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1503 ; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1504 ; AVX2-SLOW-NEXT: retq
1506 ; AVX2-FAST-LABEL: shuffle_v8i16_XXX1X579:
1507 ; AVX2-FAST: # %bb.0:
1508 ; AVX2-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
1509 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
1510 ; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1511 ; AVX2-FAST-NEXT: retq
1513 ; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXX1X579:
1514 ; AVX512VL-SLOW: # %bb.0:
1515 ; AVX512VL-SLOW-NEXT: vpbroadcastd %xmm1, %xmm1
1516 ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
1517 ; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
1518 ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1519 ; AVX512VL-SLOW-NEXT: retq
1521 ; AVX512VL-FAST-LABEL: shuffle_v8i16_XXX1X579:
1522 ; AVX512VL-FAST: # %bb.0:
1523 ; AVX512VL-FAST-NEXT: vpbroadcastd %xmm1, %xmm1
1524 ; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,2,3,8,9,10,11,14,15,14,15]
1525 ; AVX512VL-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
1526 ; AVX512VL-FAST-NEXT: retq
1527 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
1528 ret <8 x i16> %shuffle
1531 define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
1532 ; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
1534 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
1535 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
1536 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
1537 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
1538 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1541 ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
1543 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1544 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1545 ; SSSE3-NEXT: por %xmm1, %xmm0
1548 ; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
1550 ; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1551 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1552 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1555 ; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
1557 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1558 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1559 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1562 ; AVX2OR512VL-LABEL: shuffle_v8i16_XX4X8acX:
1563 ; AVX2OR512VL: # %bb.0:
1564 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
1565 ; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1566 ; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
1567 ; AVX2OR512VL-NEXT: retq
1568 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
1569 ret <8 x i16> %shuffle
1572 define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
1573 ; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
1575 ; SSE-NEXT: movzwl %di, %eax
1576 ; SSE-NEXT: movd %eax, %xmm0
1579 ; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
1581 ; AVX-NEXT: movzwl %di, %eax
1582 ; AVX-NEXT: vmovd %eax, %xmm0
1584 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1585 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1586 ret <8 x i16> %shuffle
1589 define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
1590 ; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1592 ; SSE-NEXT: pxor %xmm0, %xmm0
1593 ; SSE-NEXT: pinsrw $1, %edi, %xmm0
1596 ; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1598 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1599 ; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0
1601 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1602 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1603 ret <8 x i16> %shuffle
1606 define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
1607 ; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1609 ; SSE-NEXT: pxor %xmm0, %xmm0
1610 ; SSE-NEXT: pinsrw $5, %edi, %xmm0
1613 ; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1615 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1616 ; AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
1618 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1619 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1620 ret <8 x i16> %shuffle
1623 define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
1624 ; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1626 ; SSE-NEXT: pxor %xmm0, %xmm0
1627 ; SSE-NEXT: pinsrw $7, %edi, %xmm0
1630 ; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1632 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1633 ; AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
1635 %a = insertelement <8 x i16> undef, i16 %i, i32 0
1636 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1637 ret <8 x i16> %shuffle
1640 define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
1641 ; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1643 ; SSE-NEXT: pxor %xmm0, %xmm0
1644 ; SSE-NEXT: pinsrw $2, %edi, %xmm0
1647 ; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1649 ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
1650 ; AVX-NEXT: vpinsrw $2, %edi, %xmm0, %xmm0
1652 %a = insertelement <8 x i16> undef, i16 %i, i32 3
1653 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1654 ret <8 x i16> %shuffle
1657 define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
1658 ; SSE2-LABEL: shuffle_v8i16_def01234:
1660 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1661 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1662 ; SSE2-NEXT: por %xmm1, %xmm0
1665 ; SSSE3-LABEL: shuffle_v8i16_def01234:
1667 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1670 ; SSE41-LABEL: shuffle_v8i16_def01234:
1672 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1675 ; AVX-LABEL: shuffle_v8i16_def01234:
1677 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1679 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
1680 ret <8 x i16> %shuffle
1683 define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
1684 ; SSE2-LABEL: shuffle_v8i16_ueuu123u:
1686 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1687 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1688 ; SSE2-NEXT: por %xmm1, %xmm0
1691 ; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
1693 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1696 ; SSE41-LABEL: shuffle_v8i16_ueuu123u:
1698 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1701 ; AVX-LABEL: shuffle_v8i16_ueuu123u:
1703 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1705 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1706 ret <8 x i16> %shuffle
1709 define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
1710 ; SSE2-LABEL: shuffle_v8i16_56701234:
1712 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1713 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1714 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1715 ; SSE2-NEXT: por %xmm1, %xmm0
1718 ; SSSE3-LABEL: shuffle_v8i16_56701234:
1720 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1723 ; SSE41-LABEL: shuffle_v8i16_56701234:
1725 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1728 ; AVX-LABEL: shuffle_v8i16_56701234:
1730 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1732 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
1733 ret <8 x i16> %shuffle
1736 define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
1737 ; SSE2-LABEL: shuffle_v8i16_u6uu123u:
1739 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1740 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1741 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1742 ; SSE2-NEXT: por %xmm1, %xmm0
1745 ; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
1747 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1750 ; SSE41-LABEL: shuffle_v8i16_u6uu123u:
1752 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1755 ; AVX-LABEL: shuffle_v8i16_u6uu123u:
1757 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
1759 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1760 ret <8 x i16> %shuffle
1763 define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
1764 ; SSE-LABEL: shuffle_v8i16_uuuu123u:
1766 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1769 ; AVX-LABEL: shuffle_v8i16_uuuu123u:
1771 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
1773 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
1774 ret <8 x i16> %shuffle
1777 define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
1778 ; SSE2-LABEL: shuffle_v8i16_bcdef012:
1780 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1781 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1782 ; SSE2-NEXT: por %xmm1, %xmm0
1785 ; SSSE3-LABEL: shuffle_v8i16_bcdef012:
1787 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1790 ; SSE41-LABEL: shuffle_v8i16_bcdef012:
1792 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1795 ; AVX-LABEL: shuffle_v8i16_bcdef012:
1797 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1799 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
1800 ret <8 x i16> %shuffle
1803 define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
1804 ; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
1806 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1807 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1808 ; SSE2-NEXT: por %xmm1, %xmm0
1811 ; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
1813 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1816 ; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
1818 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1821 ; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
1823 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
1825 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
1826 ret <8 x i16> %shuffle
1829 define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
1830 ; SSE2-LABEL: shuffle_v8i16_34567012:
1832 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1833 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1834 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1835 ; SSE2-NEXT: por %xmm1, %xmm0
1838 ; SSSE3-LABEL: shuffle_v8i16_34567012:
1840 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1843 ; SSE41-LABEL: shuffle_v8i16_34567012:
1845 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1848 ; AVX-LABEL: shuffle_v8i16_34567012:
1850 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1852 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
1853 ret <8 x i16> %shuffle
1856 define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
1857 ; SSE2-LABEL: shuffle_v8i16_u456uu1u:
1859 ; SSE2-NEXT: movdqa %xmm0, %xmm1
1860 ; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1861 ; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1862 ; SSE2-NEXT: por %xmm1, %xmm0
1865 ; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
1867 ; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1870 ; SSE41-LABEL: shuffle_v8i16_u456uu1u:
1872 ; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1875 ; AVX-LABEL: shuffle_v8i16_u456uu1u:
1877 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
1879 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
1880 ret <8 x i16> %shuffle
1883 define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
1884 ; SSE-LABEL: shuffle_v8i16_u456uuuu:
1886 ; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1889 ; AVX-LABEL: shuffle_v8i16_u456uuuu:
1891 ; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1893 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
1894 ret <8 x i16> %shuffle
1897 define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
1898 ; SSE2-LABEL: shuffle_v8i16_3456789a:
1900 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1901 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1902 ; SSE2-NEXT: por %xmm1, %xmm0
1905 ; SSSE3-LABEL: shuffle_v8i16_3456789a:
1907 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1908 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1911 ; SSE41-LABEL: shuffle_v8i16_3456789a:
1913 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1914 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1917 ; AVX-LABEL: shuffle_v8i16_3456789a:
1919 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1921 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
1922 ret <8 x i16> %shuffle
1925 define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
1926 ; SSE2-LABEL: shuffle_v8i16_u456uu9u:
1928 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1929 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
1930 ; SSE2-NEXT: por %xmm1, %xmm0
1933 ; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
1935 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1936 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1939 ; SSE41-LABEL: shuffle_v8i16_u456uu9u:
1941 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1942 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1945 ; AVX-LABEL: shuffle_v8i16_u456uu9u:
1947 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
1949 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
1950 ret <8 x i16> %shuffle
1953 define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
1954 ; SSE2-LABEL: shuffle_v8i16_56789abc:
1956 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1957 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1958 ; SSE2-NEXT: por %xmm1, %xmm0
1961 ; SSSE3-LABEL: shuffle_v8i16_56789abc:
1963 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1964 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1967 ; SSE41-LABEL: shuffle_v8i16_56789abc:
1969 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1970 ; SSE41-NEXT: movdqa %xmm1, %xmm0
1973 ; AVX-LABEL: shuffle_v8i16_56789abc:
1975 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1977 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1978 ret <8 x i16> %shuffle
1981 define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
1982 ; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
1984 ; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1985 ; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
1986 ; SSE2-NEXT: por %xmm1, %xmm0
1989 ; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
1991 ; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1992 ; SSSE3-NEXT: movdqa %xmm1, %xmm0
1995 ; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
1997 ; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
1998 ; SSE41-NEXT: movdqa %xmm1, %xmm0
2001 ; AVX-LABEL: shuffle_v8i16_u6uu9abu:
2003 ; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
2005 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
2006 ret <8 x i16> %shuffle
2009 define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
2010 ; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
2012 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2013 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2016 ; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
2018 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2019 ; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
2022 ; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
2024 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2027 ; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
2029 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2031 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
2032 ret <8 x i16> %shuffle
2035 define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
2036 ; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
2038 ; SSE2-NEXT: pxor %xmm1, %xmm1
2039 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2040 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2043 ; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
2045 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2046 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2047 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2050 ; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
2052 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2055 ; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
2057 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2059 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
2060 ret <8 x i16> %shuffle
2063 define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
2064 ; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
2066 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2069 ; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
2071 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
2074 ; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
2076 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2079 ; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
2081 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2083 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
2084 ret <8 x i16> %shuffle
2087 define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
2088 ; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
2090 ; SSE2-NEXT: pxor %xmm1, %xmm1
2091 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2094 ; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
2096 ; SSSE3-NEXT: pxor %xmm1, %xmm1
2097 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2100 ; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
2102 ; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2105 ; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
2107 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2109 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
2110 ret <8 x i16> %shuffle
2113 define <8 x i16> @shuffle_v8i16_01100110(<8 x i16> %a) {
2114 ; SSE2-LABEL: shuffle_v8i16_01100110:
2116 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2117 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,0,4,5,6,7]
2118 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,4]
2121 ; SSSE3-LABEL: shuffle_v8i16_01100110:
2123 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2126 ; SSE41-LABEL: shuffle_v8i16_01100110:
2128 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2131 ; AVX-LABEL: shuffle_v8i16_01100110:
2133 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,2,3,2,3,0,1]
2135 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0>
2136 ret <8 x i16> %shuffle
2139 define <8 x i16> @shuffle_v8i16_01u0u110(<8 x i16> %a) {
2140 ; SSE2-LABEL: shuffle_v8i16_01u0u110:
2142 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
2143 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,0,4,5,6,7]
2144 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,4]
2147 ; SSSE3-LABEL: shuffle_v8i16_01u0u110:
2149 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,0,1,0,1,2,3,2,3,0,1]
2152 ; SSE41-LABEL: shuffle_v8i16_01u0u110:
2154 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,0,1,0,1,2,3,2,3,0,1]
2157 ; AVX-LABEL: shuffle_v8i16_01u0u110:
2159 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,0,1,0,1,2,3,2,3,0,1]
2161 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 1, i32 0>
2162 ret <8 x i16> %shuffle
2165 define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) {
2166 ; SSE2-LABEL: shuffle_v8i16_467uu675:
2168 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
2169 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,3,3,4,5,6,7]
2170 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,7,5]
2173 ; SSSE3-LABEL: shuffle_v8i16_467uu675:
2175 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,14,15,8,9,12,13,14,15,10,11]
2178 ; SSE41-LABEL: shuffle_v8i16_467uu675:
2180 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,14,15,8,9,12,13,14,15,10,11]
2183 ; AVX-LABEL: shuffle_v8i16_467uu675:
2185 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,12,13,14,15,14,15,8,9,12,13,14,15,10,11]
2187 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7, i32 5>
2188 ret <8 x i16> %shuffle
2192 ; Shuffle to logical bit shifts
2194 define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
2195 ; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
2197 ; SSE-NEXT: pslld $16, %xmm0
2200 ; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
2202 ; AVX-NEXT: vpslld $16, %xmm0, %xmm0
2204 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
2205 ret <8 x i16> %shuffle
2208 define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
2209 ; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
2211 ; SSE-NEXT: psllq $48, %xmm0
2214 ; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
2216 ; AVX-NEXT: vpsllq $48, %xmm0, %xmm0
2218 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
2219 ret <8 x i16> %shuffle
2222 define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
2223 ; SSE-LABEL: shuffle_v8i16_zz01zX4X:
2225 ; SSE-NEXT: psllq $32, %xmm0
2228 ; AVX-LABEL: shuffle_v8i16_zz01zX4X:
2230 ; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
2232 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
2233 ret <8 x i16> %shuffle
2236 define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
2237 ; SSE-LABEL: shuffle_v8i16_z0X2z456:
2239 ; SSE-NEXT: psllq $16, %xmm0
2242 ; AVX-LABEL: shuffle_v8i16_z0X2z456:
2244 ; AVX-NEXT: vpsllq $16, %xmm0, %xmm0
2246 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
2247 ret <8 x i16> %shuffle
2250 define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
2251 ; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
2253 ; SSE-NEXT: psrld $16, %xmm0
2256 ; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
2258 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
2260 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
2261 ret <8 x i16> %shuffle
2264 define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
2265 ; SSE-LABEL: shuffle_v8i16_1X3z567z:
2267 ; SSE-NEXT: psrlq $16, %xmm0
2270 ; AVX-LABEL: shuffle_v8i16_1X3z567z:
2272 ; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
2274 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
2275 ret <8 x i16> %shuffle
2278 define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
2279 ; SSE-LABEL: shuffle_v8i16_23zz67zz:
2281 ; SSE-NEXT: psrlq $32, %xmm0
2284 ; AVX-LABEL: shuffle_v8i16_23zz67zz:
2286 ; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
2288 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2289 ret <8 x i16> %shuffle
2292 define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2293 ; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2295 ; SSE-NEXT: psrlq $48, %xmm0
2298 ; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2300 ; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
2302 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2303 ret <8 x i16> %shuffle
2306 define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
2307 ; SSE-LABEL: shuffle_v8i16_01u3zzuz:
2309 ; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
2312 ; AVX-LABEL: shuffle_v8i16_01u3zzuz:
2314 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
2316 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
2317 ret <8 x i16> %shuffle
2320 define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
2321 ; SSE2-LABEL: shuffle_v8i16_0z234567:
2323 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2326 ; SSSE3-LABEL: shuffle_v8i16_0z234567:
2328 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2331 ; SSE41-LABEL: shuffle_v8i16_0z234567:
2333 ; SSE41-NEXT: pxor %xmm1, %xmm1
2334 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2337 ; AVX-LABEL: shuffle_v8i16_0z234567:
2339 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2340 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
2342 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2343 ret <8 x i16> %shuffle
2346 define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
2347 ; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
2349 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2352 ; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
2354 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2357 ; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
2359 ; SSE41-NEXT: pxor %xmm1, %xmm1
2360 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2363 ; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
2365 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2366 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
2368 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
2369 ret <8 x i16> %shuffle
2372 define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
2373 ; SSE2-LABEL: shuffle_v8i16_0123456z:
2375 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
2378 ; SSSE3-LABEL: shuffle_v8i16_0123456z:
2380 ; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
2383 ; SSE41-LABEL: shuffle_v8i16_0123456z:
2385 ; SSE41-NEXT: pxor %xmm1, %xmm1
2386 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2389 ; AVX-LABEL: shuffle_v8i16_0123456z:
2391 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
2392 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
2394 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
2395 ret <8 x i16> %shuffle
2398 define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
2399 ; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
2401 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2402 ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2403 ; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2404 ; SSE-NEXT: movdqa %xmm1, %xmm0
2407 ; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
2409 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2410 ; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
2411 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
2413 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
2414 ret <8 x i16> %shuffle
2417 define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
2418 ; SSE-LABEL: shuffle_v8i16_8012345u:
2420 ; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2423 ; AVX-LABEL: shuffle_v8i16_8012345u:
2425 ; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
2427 %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
2429 ret <8 x i16> %shuffle
2432 define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
2433 ; SSE2-LABEL: mask_v8i16_012345ef:
2435 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2436 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2437 ; SSE2-NEXT: movaps %xmm1, %xmm0
2440 ; SSSE3-LABEL: mask_v8i16_012345ef:
2442 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
2443 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
2444 ; SSSE3-NEXT: movaps %xmm1, %xmm0
2447 ; SSE41-LABEL: mask_v8i16_012345ef:
2449 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
2452 ; AVX1-LABEL: mask_v8i16_012345ef:
2454 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
2457 ; AVX2OR512VL-LABEL: mask_v8i16_012345ef:
2458 ; AVX2OR512VL: # %bb.0:
2459 ; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
2460 ; AVX2OR512VL-NEXT: retq
2461 %1 = bitcast <8 x i16> %a to <2 x i64>
2462 %2 = bitcast <8 x i16> %b to <2 x i64>
2463 %3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
2464 %4 = and <2 x i64> %2, <i64 -1, i64 4294967295>
2465 %5 = or <2 x i64> %4, %3
2466 %6 = bitcast <2 x i64> %5 to <8 x i16>
2470 define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
2471 ; SSE-LABEL: insert_dup_mem_v8i16_i32:
2473 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2474 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2475 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2478 ; AVX1-LABEL: insert_dup_mem_v8i16_i32:
2480 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2481 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2482 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2485 ; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_i32:
2486 ; AVX2OR512VL: # %bb.0:
2487 ; AVX2OR512VL-NEXT: vpbroadcastw (%rdi), %xmm0
2488 ; AVX2OR512VL-NEXT: retq
2489 %tmp = load i32, i32* %ptr, align 4
2490 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2491 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2492 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
2496 define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
2497 ; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16:
2499 ; SSE2-NEXT: movswl (%rdi), %eax
2500 ; SSE2-NEXT: movd %eax, %xmm0
2501 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2502 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2505 ; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
2507 ; SSSE3-NEXT: movswl (%rdi), %eax
2508 ; SSSE3-NEXT: movd %eax, %xmm0
2509 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2512 ; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16:
2514 ; SSE41-NEXT: movswl (%rdi), %eax
2515 ; SSE41-NEXT: movd %eax, %xmm0
2516 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2519 ; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
2521 ; AVX1-NEXT: movswl (%rdi), %eax
2522 ; AVX1-NEXT: vmovd %eax, %xmm0
2523 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
2526 ; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
2528 ; AVX2-NEXT: movswl (%rdi), %eax
2529 ; AVX2-NEXT: vmovd %eax, %xmm0
2530 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2533 ; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
2534 ; AVX512VL: # %bb.0:
2535 ; AVX512VL-NEXT: movswl (%rdi), %eax
2536 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
2537 ; AVX512VL-NEXT: retq
2538 %tmp = load i16, i16* %ptr, align 2
2539 %tmp1 = sext i16 %tmp to i32
2540 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2541 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2542 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
2546 define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
2547 ; SSE-LABEL: insert_dup_elt1_mem_v8i16_i32:
2549 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2550 ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2551 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2554 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
2556 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2557 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2558 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2561 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v8i16_i32:
2562 ; AVX2OR512VL: # %bb.0:
2563 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
2564 ; AVX2OR512VL-NEXT: retq
2565 %tmp = load i32, i32* %ptr, align 4
2566 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
2567 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2568 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2572 define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
2573 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
2575 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2576 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
2577 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2578 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2581 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
2583 ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2584 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2587 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
2589 ; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2590 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2593 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
2595 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2596 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2599 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v8i16_i32:
2600 ; AVX2OR512VL: # %bb.0:
2601 ; AVX2OR512VL-NEXT: vpbroadcastw 2(%rdi), %xmm0
2602 ; AVX2OR512VL-NEXT: retq
2603 %tmp = load i32, i32* %ptr, align 4
2604 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
2605 %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
2606 %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2610 define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
2611 ; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2613 ; SSE2-NEXT: movswl (%rdi), %eax
2614 ; SSE2-NEXT: movd %eax, %xmm0
2615 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
2616 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2619 ; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2621 ; SSSE3-NEXT: movswl (%rdi), %eax
2622 ; SSSE3-NEXT: movd %eax, %xmm0
2623 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2626 ; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2628 ; SSE41-NEXT: movswl (%rdi), %eax
2629 ; SSE41-NEXT: movd %eax, %xmm0
2630 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2633 ; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2635 ; AVX1-NEXT: movswl (%rdi), %eax
2636 ; AVX1-NEXT: vmovd %eax, %xmm0
2637 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2640 ; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2642 ; AVX2-NEXT: movswl (%rdi), %eax
2643 ; AVX2-NEXT: shrl $16, %eax
2644 ; AVX2-NEXT: vmovd %eax, %xmm0
2645 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2648 ; AVX512VL-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
2649 ; AVX512VL: # %bb.0:
2650 ; AVX512VL-NEXT: movswl (%rdi), %eax
2651 ; AVX512VL-NEXT: shrl $16, %eax
2652 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
2653 ; AVX512VL-NEXT: retq
2654 %tmp = load i16, i16* %ptr, align 2
2655 %tmp1 = sext i16 %tmp to i32
2656 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
2657 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2658 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
2662 define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
2663 ; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2665 ; SSE2-NEXT: movswl (%rdi), %eax
2666 ; SSE2-NEXT: movd %eax, %xmm0
2667 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
2668 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
2669 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
2672 ; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2674 ; SSSE3-NEXT: movswl (%rdi), %eax
2675 ; SSSE3-NEXT: movd %eax, %xmm0
2676 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2679 ; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2681 ; SSE41-NEXT: movswl (%rdi), %eax
2682 ; SSE41-NEXT: movd %eax, %xmm0
2683 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2686 ; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2688 ; AVX1-NEXT: movswl (%rdi), %eax
2689 ; AVX1-NEXT: vmovd %eax, %xmm0
2690 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
2693 ; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2695 ; AVX2-NEXT: movswl (%rdi), %eax
2696 ; AVX2-NEXT: shrl $16, %eax
2697 ; AVX2-NEXT: vmovd %eax, %xmm0
2698 ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
2701 ; AVX512VL-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
2702 ; AVX512VL: # %bb.0:
2703 ; AVX512VL-NEXT: movswl (%rdi), %eax
2704 ; AVX512VL-NEXT: shrl $16, %eax
2705 ; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
2706 ; AVX512VL-NEXT: retq
2707 %tmp = load i16, i16* %ptr, align 2
2708 %tmp1 = sext i16 %tmp to i32
2709 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
2710 %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
2711 %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>