1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4 target triple = "x86_64-unknown-unknown"
6 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
7 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
9 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
10 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
11 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
12 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
13 ; CHECK-SSE2-NEXT: retq
14 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
15 ret <16 x i8> %shuffle
18 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
19 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
21 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
22 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
23 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
24 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
25 ; CHECK-SSE2-NEXT: retq
26 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
27 ret <16 x i8> %shuffle
30 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
31 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
33 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
34 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7]
35 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
36 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
37 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6]
38 ; CHECK-SSE2-NEXT: retq
39 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
40 ret <16 x i8> %shuffle
43 define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
44 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
46 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
47 ; CHECK-SSE2-NEXT: punpcklwd %xmm0, %xmm0
48 ; CHECK-SSE2-NEXT: retq
49 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
50 ret <16 x i8> %shuffle
53 define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
54 ; CHECK-SSE2-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07
56 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
57 ; CHECK-SSE2-NEXT: punpckhwd %xmm0, %xmm0
58 ; CHECK-SSE2-NEXT: retq
59 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
60 ret <16 x i8> %shuffle
63 define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
64 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
66 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
67 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
68 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
69 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
70 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
71 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6]
72 ; CHECK-SSE2-NEXT: retq
73 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
74 ret <16 x i8> %shuffle
77 define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
78 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07
80 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm0
81 ; CHECK-SSE2-NEXT: retq
82 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
83 ret <16 x i8> %shuffle
86 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
87 ; CHECK-SSE2-LABEL: @shuffle_v16i8_0101010101010101
88 ; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
89 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
90 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
91 ; CHECK-SSE2-NEXT: retq
92 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
93 ret <16 x i8> %shuffle
96 define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
97 ; CHECK-SSE2-LABEL: @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23
98 ; CHECK-SSE2: punpcklbw %xmm1, %xmm0
99 ; CHECK-SSE2-NEXT: retq
100 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
101 ret <16 x i8> %shuffle
104 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
105 ; CHECK-SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
106 ; CHECK-SSE2: # BB#0:
107 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm1
108 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
109 ; CHECK-SSE2-NEXT: punpcklbw %xmm0, %xmm1
110 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0
111 ; CHECK-SSE2-NEXT: retq
112 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
113 ret <16 x i8> %shuffle
116 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
117 ; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
118 ; CHECK-SSE2: pxor %xmm1, %xmm1
119 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
120 ; CHECK-SSE2-NEXT: punpckhbw %xmm1, %xmm2
121 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7]
122 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4]
123 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
124 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
125 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
126 ; CHECK-SSE2-NEXT: packuswb %xmm2, %xmm0
127 ; CHECK-SSE2-NEXT: retq
128 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
129 ret <16 x i8> %shuffle
132 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
133 ; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20
134 ; CHECK-SSE2: pxor %xmm2, %xmm2
135 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm1
136 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
137 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
138 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm0
139 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
140 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
141 ; CHECK-SSE2-NEXT: packuswb %xmm1, %xmm0
142 ; CHECK-SSE2-NEXT: retq
143 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
144 ret <16 x i8> %shuffle
147 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
148 ; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20
149 ; CHECK-SSE2: pxor %xmm2, %xmm2
150 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm3
151 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm3
152 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4]
153 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
154 ; CHECK-SSE2-NEXT: punpckhbw %xmm2, %xmm4
155 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm4 = xmm4[3,2,1,0,4,5,6,7]
156 ; CHECK-SSE2-NEXT: shufpd {{.*}} # xmm4 = xmm4[0],xmm3[1]
157 ; CHECK-SSE2-NEXT: punpckhbw %xmm2, %xmm1
158 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
159 ; CHECK-SSE2-NEXT: punpcklbw %xmm2, %xmm0
160 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
161 ; CHECK-SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
162 ; CHECK-SSE2-NEXT: packuswb %xmm4, %xmm0
163 ; CHECK-SSE2-NEXT: retq
164 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
165 ret <16 x i8> %shuffle
168 define <16 x i8> @zext_to_v8i16_shuffle(<16 x i8> %a) {
169 ; CHECK-SSE2-LABEL: @zext_to_v8i16_shuffle
170 ; CHECK-SSE2: pxor %xmm1, %xmm1
171 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
172 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
173 ret <16 x i8> %shuffle
176 define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) {
177 ; CHECK-SSE2-LABEL: @zext_to_v4i32_shuffle
178 ; CHECK-SSE2: pxor %xmm1, %xmm1
179 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
180 ; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0
181 %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
182 ret <16 x i8> %shuffle
185 define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
186 ; CHECK-SSE2-LABEL: @trunc_v4i32_shuffle
187 ; CHECK-SSE2: # BB#0:
188 ; CHECK-SSE2-NEXT: pand
189 ; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
190 ; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
191 ; CHECK-SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
192 ; CHECK-SSE2-NEXT: packuswb %xmm0, %xmm0
193 ; CHECK-SSE2-NEXT: retq
194 %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
195 ret <16 x i8> %shuffle