1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
5 ; CHECK-LABEL: ins16bw:
6 ; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
7 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
12 ; CHECK-LABEL: ins8hw:
13 ; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
14 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
19 ; CHECK-LABEL: ins4sw:
20 ; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
21 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
26 ; CHECK-LABEL: ins2dw:
27 ; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
28 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
33 ; CHECK-LABEL: ins8bw:
34 ; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
35 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
40 ; CHECK-LABEL: ins4hw:
41 ; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
42 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
47 ; CHECK-LABEL: ins2sw:
48 ; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
49 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
54 ; CHECK-LABEL: ins16b16:
55 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
56 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
57 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
62 ; CHECK-LABEL: ins8h8:
63 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
64 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
65 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
70 ; CHECK-LABEL: ins4s4:
71 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
72 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
73 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
78 ; CHECK-LABEL: ins2d2:
79 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
80 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
81 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
86 ; CHECK-LABEL: ins4f4:
87 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
88 %tmp3 = extractelement <4 x float> %tmp1, i32 2
89 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
94 ; CHECK-LABEL: ins2df2:
95 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
96 %tmp3 = extractelement <2 x double> %tmp1, i32 0
97 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
98 ret <2 x double> %tmp4
101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
102 ; CHECK-LABEL: ins8b16:
103 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
104 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
105 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
110 ; CHECK-LABEL: ins4h8:
111 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
112 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
113 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
118 ; CHECK-LABEL: ins2s4:
119 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
120 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
121 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
126 ; CHECK-LABEL: ins1d2:
127 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
128 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
129 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
134 ; CHECK-LABEL: ins2f4:
135 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
136 %tmp3 = extractelement <2 x float> %tmp1, i32 1
137 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
138 ret <4 x float> %tmp4
141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
142 ; CHECK-LABEL: ins1f2:
143 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
144 %tmp3 = extractelement <1 x double> %tmp1, i32 0
145 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
146 ret <2 x double> %tmp4
149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
150 ; CHECK-LABEL: ins16b8:
151 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
152 %tmp3 = extractelement <16 x i8> %tmp1, i32 2
153 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
158 ; CHECK-LABEL: ins8h4:
159 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
160 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
161 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
166 ; CHECK-LABEL: ins4s2:
167 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
168 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
169 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
174 ; CHECK-LABEL: ins2d1:
175 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
176 %tmp3 = extractelement <2 x i64> %tmp1, i32 0
177 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
182 ; CHECK-LABEL: ins4f2:
183 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
184 %tmp3 = extractelement <4 x float> %tmp1, i32 2
185 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
186 ret <2 x float> %tmp4
189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
190 ; CHECK-LABEL: ins2f1:
191 ; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
192 %tmp3 = extractelement <2 x double> %tmp1, i32 1
193 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
194 ret <1 x double> %tmp4
197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
198 ; CHECK-LABEL: ins8b8:
199 ; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
200 %tmp3 = extractelement <8 x i8> %tmp1, i32 2
201 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
206 ; CHECK-LABEL: ins4h4:
207 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
208 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
209 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
214 ; CHECK-LABEL: ins2s2:
215 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
216 %tmp3 = extractelement <2 x i32> %tmp1, i32 0
217 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
222 ; CHECK-LABEL: ins1d1:
223 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
224 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
225 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
230 ; CHECK-LABEL: ins2f2:
231 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
232 %tmp3 = extractelement <2 x float> %tmp1, i32 0
233 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
234 ret <2 x float> %tmp4
237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
238 ; CHECK-LABEL: ins1df1:
239 ; CHECK-NOT: ins {{v[0-9]+}}
240 %tmp3 = extractelement <1 x double> %tmp1, i32 0
241 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
242 ret <1 x double> %tmp4
245 define i32 @umovw16b(<16 x i8> %tmp1) {
246 ; CHECK-LABEL: umovw16b:
247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
248 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
249 %tmp4 = zext i8 %tmp3 to i32
253 define i32 @umovw8h(<8 x i16> %tmp1) {
254 ; CHECK-LABEL: umovw8h:
255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
256 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
257 %tmp4 = zext i16 %tmp3 to i32
261 define i32 @umovw4s(<4 x i32> %tmp1) {
262 ; CHECK-LABEL: umovw4s:
263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
264 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
268 define i64 @umovx2d(<2 x i64> %tmp1) {
269 ; CHECK-LABEL: umovx2d:
270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
271 %tmp3 = extractelement <2 x i64> %tmp1, i32 1
275 define i32 @umovw8b(<8 x i8> %tmp1) {
276 ; CHECK-LABEL: umovw8b:
277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
278 %tmp3 = extractelement <8 x i8> %tmp1, i32 7
279 %tmp4 = zext i8 %tmp3 to i32
283 define i32 @umovw4h(<4 x i16> %tmp1) {
284 ; CHECK-LABEL: umovw4h:
285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
286 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
287 %tmp4 = zext i16 %tmp3 to i32
291 define i32 @umovw2s(<2 x i32> %tmp1) {
292 ; CHECK-LABEL: umovw2s:
293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
294 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
298 define i64 @umovx1d(<1 x i64> %tmp1) {
299 ; CHECK-LABEL: umovx1d:
300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
301 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
305 define i32 @smovw16b(<16 x i8> %tmp1) {
306 ; CHECK-LABEL: smovw16b:
307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
308 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
309 %tmp4 = sext i8 %tmp3 to i32
310 %tmp5 = add i32 %tmp4, %tmp4
314 define i32 @smovw8h(<8 x i16> %tmp1) {
315 ; CHECK-LABEL: smovw8h:
316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
317 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
318 %tmp4 = sext i16 %tmp3 to i32
319 %tmp5 = add i32 %tmp4, %tmp4
323 define i64 @smovx16b(<16 x i8> %tmp1) {
324 ; CHECK-LABEL: smovx16b:
325 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
326 %tmp3 = extractelement <16 x i8> %tmp1, i32 8
327 %tmp4 = sext i8 %tmp3 to i64
331 define i64 @smovx8h(<8 x i16> %tmp1) {
332 ; CHECK-LABEL: smovx8h:
333 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
334 %tmp3 = extractelement <8 x i16> %tmp1, i32 2
335 %tmp4 = sext i16 %tmp3 to i64
339 define i64 @smovx4s(<4 x i32> %tmp1) {
340 ; CHECK-LABEL: smovx4s:
341 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
342 %tmp3 = extractelement <4 x i32> %tmp1, i32 2
343 %tmp4 = sext i32 %tmp3 to i64
347 define i32 @smovw8b(<8 x i8> %tmp1) {
348 ; CHECK-LABEL: smovw8b:
349 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
350 %tmp3 = extractelement <8 x i8> %tmp1, i32 4
351 %tmp4 = sext i8 %tmp3 to i32
352 %tmp5 = add i32 %tmp4, %tmp4
356 define i32 @smovw4h(<4 x i16> %tmp1) {
357 ; CHECK-LABEL: smovw4h:
358 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
359 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
360 %tmp4 = sext i16 %tmp3 to i32
361 %tmp5 = add i32 %tmp4, %tmp4
365 define i32 @smovx8b(<8 x i8> %tmp1) {
366 ; CHECK-LABEL: smovx8b:
367 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
368 %tmp3 = extractelement <8 x i8> %tmp1, i32 6
369 %tmp4 = sext i8 %tmp3 to i32
373 define i32 @smovx4h(<4 x i16> %tmp1) {
374 ; CHECK-LABEL: smovx4h:
375 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
376 %tmp3 = extractelement <4 x i16> %tmp1, i32 2
377 %tmp4 = sext i16 %tmp3 to i32
381 define i64 @smovx2s(<2 x i32> %tmp1) {
382 ; CHECK-LABEL: smovx2s:
383 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
384 %tmp3 = extractelement <2 x i32> %tmp1, i32 1
385 %tmp4 = sext i32 %tmp3 to i64
389 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
390 ; CHECK-LABEL: test_vcopy_lane_s8:
391 ; CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
392 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
393 ret <8 x i8> %vset_lane
396 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
397 ; CHECK-LABEL: test_vcopyq_laneq_s8:
398 ; CHECK: ins {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
399 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
400 ret <16 x i8> %vset_lane
403 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
404 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
405 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
406 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
407 ret <8 x i8> %vset_lane
410 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
411 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
412 ; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
413 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
414 ret <16 x i8> %vset_lane
417 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
418 ; CHECK-LABEL: test_vdup_n_u8:
419 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
420 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
421 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
422 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
423 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
424 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
425 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
426 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
427 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
428 ret <8 x i8> %vecinit7.i
431 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
432 ; CHECK-LABEL: test_vdup_n_u16:
433 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
434 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
435 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
436 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
437 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
438 ret <4 x i16> %vecinit3.i
441 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
442 ; CHECK-LABEL: test_vdup_n_u32:
443 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
444 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
445 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
446 ret <2 x i32> %vecinit1.i
449 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
450 ; CHECK-LABEL: test_vdup_n_u64:
451 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
452 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
453 ret <1 x i64> %vecinit.i
456 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
457 ; CHECK-LABEL: test_vdupq_n_u8:
458 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
459 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
460 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
461 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
462 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
463 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
464 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
465 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
466 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
467 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
468 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
469 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
470 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
471 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
472 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
473 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
474 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
475 ret <16 x i8> %vecinit15.i
478 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
479 ; CHECK-LABEL: test_vdupq_n_u16:
480 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
481 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
482 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
483 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
484 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
485 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
486 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
487 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
488 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
489 ret <8 x i16> %vecinit7.i
492 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
493 ; CHECK-LABEL: test_vdupq_n_u32:
494 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
495 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
496 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
497 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
498 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
499 ret <4 x i32> %vecinit3.i
502 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
503 ; CHECK-LABEL: test_vdupq_n_u64:
504 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
505 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
506 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
507 ret <2 x i64> %vecinit1.i
510 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
511 ; CHECK-LABEL: test_vdup_lane_s8:
512 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
513 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
514 ret <8 x i8> %shuffle
517 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
518 ; CHECK-LABEL: test_vdup_lane_s16:
519 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
520 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
521 ret <4 x i16> %shuffle
524 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
525 ; CHECK-LABEL: test_vdup_lane_s32:
526 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
527 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
528 ret <2 x i32> %shuffle
531 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
532 ; CHECK-LABEL: test_vdupq_lane_s8:
533 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
534 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
535 ret <16 x i8> %shuffle
538 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
539 ; CHECK-LABEL: test_vdupq_lane_s16:
540 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
541 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
542 ret <8 x i16> %shuffle
545 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
546 ; CHECK-LABEL: test_vdupq_lane_s32:
547 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
548 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
549 ret <4 x i32> %shuffle
552 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
553 ; CHECK-LABEL: test_vdupq_lane_s64:
554 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
555 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
556 ret <2 x i64> %shuffle
559 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
560 ; CHECK-LABEL: test_vdup_laneq_s8:
561 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
562 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
563 ret <8 x i8> %shuffle
566 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
567 ; CHECK-LABEL: test_vdup_laneq_s16:
568 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
569 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
570 ret <4 x i16> %shuffle
573 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
574 ; CHECK-LABEL: test_vdup_laneq_s32:
575 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
576 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
577 ret <2 x i32> %shuffle
580 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
581 ; CHECK-LABEL: test_vdupq_laneq_s8:
582 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
583 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
584 ret <16 x i8> %shuffle
587 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
588 ; CHECK-LABEL: test_vdupq_laneq_s16:
589 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
590 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
591 ret <8 x i16> %shuffle
594 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
595 ; CHECK-LABEL: test_vdupq_laneq_s32:
596 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
597 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
598 ret <4 x i32> %shuffle
601 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
602 ; CHECK-LABEL: test_vdupq_laneq_s64:
603 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
604 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
605 ret <2 x i64> %shuffle
608 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
609 ; CHECK-LABEL: test_bitcastv8i8toi64:
610 %res = bitcast <8 x i8> %in to i64
611 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
615 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
616 ; CHECK-LABEL: test_bitcastv4i16toi64:
617 %res = bitcast <4 x i16> %in to i64
618 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
622 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
623 ; CHECK-LABEL: test_bitcastv2i32toi64:
624 %res = bitcast <2 x i32> %in to i64
625 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
629 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
630 ; CHECK-LABEL: test_bitcastv2f32toi64:
631 %res = bitcast <2 x float> %in to i64
632 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
636 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
637 ; CHECK-LABEL: test_bitcastv1i64toi64:
638 %res = bitcast <1 x i64> %in to i64
639 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
643 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
644 ; CHECK-LABEL: test_bitcastv1f64toi64:
645 %res = bitcast <1 x double> %in to i64
646 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
650 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
651 ; CHECK-LABEL: test_bitcasti64tov8i8:
652 %res = bitcast i64 %in to <8 x i8>
653 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
657 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
658 ; CHECK-LABEL: test_bitcasti64tov4i16:
659 %res = bitcast i64 %in to <4 x i16>
660 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
664 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
665 ; CHECK-LABEL: test_bitcasti64tov2i32:
666 %res = bitcast i64 %in to <2 x i32>
667 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
671 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
672 ; CHECK-LABEL: test_bitcasti64tov2f32:
673 %res = bitcast i64 %in to <2 x float>
674 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
678 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
679 ; CHECK-LABEL: test_bitcasti64tov1i64:
680 %res = bitcast i64 %in to <1 x i64>
681 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
685 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
686 ; CHECK-LABEL: test_bitcasti64tov1f64:
687 %res = bitcast i64 %in to <1 x double>
688 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
689 ret <1 x double> %res
692 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
693 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
694 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
695 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
696 %sub.i = sub <8 x i8> zeroinitializer, %a
697 %1 = bitcast <8 x i8> %sub.i to <1 x double>
698 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
699 ret <1 x i64> %vcvt.i
702 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
703 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
704 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
705 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
706 %sub.i = sub <4 x i16> zeroinitializer, %a
707 %1 = bitcast <4 x i16> %sub.i to <1 x double>
708 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
709 ret <1 x i64> %vcvt.i
712 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
713 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
714 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
715 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
716 %sub.i = sub <2 x i32> zeroinitializer, %a
717 %1 = bitcast <2 x i32> %sub.i to <1 x double>
718 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
719 ret <1 x i64> %vcvt.i
722 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
723 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
724 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
725 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
726 %sub.i = sub <1 x i64> zeroinitializer, %a
727 %1 = bitcast <1 x i64> %sub.i to <1 x double>
728 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
729 ret <1 x i64> %vcvt.i
732 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
733 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
734 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
735 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
736 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
737 %1 = bitcast <2 x float> %sub.i to <1 x double>
738 %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
739 ret <1 x i64> %vcvt.i
742 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
743 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
744 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
745 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
746 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
747 %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
748 %sub.i = sub <8 x i8> zeroinitializer, %1
752 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
753 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
754 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
755 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
756 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
757 %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
758 %sub.i = sub <4 x i16> zeroinitializer, %1
762 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
763 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
764 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
765 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
766 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
767 %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
768 %sub.i = sub <2 x i32> zeroinitializer, %1
772 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
773 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
774 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
775 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
776 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
777 %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
778 %sub.i = sub <1 x i64> zeroinitializer, %1
782 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
783 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
784 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
785 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
786 %vcvt.i = sitofp <1 x i64> %a to <1 x double>
787 %1 = bitcast <1 x double> %vcvt.i to <2 x float>
788 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
789 ret <2 x float> %sub.i
792 ; Test insert element into an undef vector
793 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
794 ; CHECK-LABEL: scalar_to_vector.v8i8:
795 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
796 %b = insertelement <8 x i8> undef, i8 %a, i32 0
800 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
801 ; CHECK-LABEL: scalar_to_vector.v16i8:
802 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
803 %b = insertelement <16 x i8> undef, i8 %a, i32 0
807 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
808 ; CHECK-LABEL: scalar_to_vector.v4i16:
809 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
810 %b = insertelement <4 x i16> undef, i16 %a, i32 0
814 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
815 ; CHECK-LABEL: scalar_to_vector.v8i16:
816 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
817 %b = insertelement <8 x i16> undef, i16 %a, i32 0
821 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
822 ; CHECK-LABEL: scalar_to_vector.v2i32:
823 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
824 %b = insertelement <2 x i32> undef, i32 %a, i32 0
828 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
829 ; CHECK-LABEL: scalar_to_vector.v4i32:
830 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
831 %b = insertelement <4 x i32> undef, i32 %a, i32 0
835 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
836 ; CHECK-LABEL: scalar_to_vector.v2i64:
837 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
838 %b = insertelement <2 x i64> undef, i64 %a, i32 0
842 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
843 ; CHECK-LABEL: testDUP.v1i8:
844 ; CHECK: dup v0.8b, v0.b[0]
845 %b = extractelement <1 x i8> %a, i32 0
846 %c = insertelement <8 x i8> undef, i8 %b, i32 0
847 %d = insertelement <8 x i8> %c, i8 %b, i32 1
848 %e = insertelement <8 x i8> %d, i8 %b, i32 2
849 %f = insertelement <8 x i8> %e, i8 %b, i32 3
850 %g = insertelement <8 x i8> %f, i8 %b, i32 4
851 %h = insertelement <8 x i8> %g, i8 %b, i32 5
852 %i = insertelement <8 x i8> %h, i8 %b, i32 6
853 %j = insertelement <8 x i8> %i, i8 %b, i32 7
857 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
858 ; CHECK-LABEL: testDUP.v1i16:
859 ; CHECK: dup v0.8h, v0.h[0]
860 %b = extractelement <1 x i16> %a, i32 0
861 %c = insertelement <8 x i16> undef, i16 %b, i32 0
862 %d = insertelement <8 x i16> %c, i16 %b, i32 1
863 %e = insertelement <8 x i16> %d, i16 %b, i32 2
864 %f = insertelement <8 x i16> %e, i16 %b, i32 3
865 %g = insertelement <8 x i16> %f, i16 %b, i32 4
866 %h = insertelement <8 x i16> %g, i16 %b, i32 5
867 %i = insertelement <8 x i16> %h, i16 %b, i32 6
868 %j = insertelement <8 x i16> %i, i16 %b, i32 7
872 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
873 ; CHECK-LABEL: testDUP.v1i32:
874 ; CHECK: dup v0.4s, v0.s[0]
875 %b = extractelement <1 x i32> %a, i32 0
876 %c = insertelement <4 x i32> undef, i32 %b, i32 0
877 %d = insertelement <4 x i32> %c, i32 %b, i32 1
878 %e = insertelement <4 x i32> %d, i32 %b, i32 2
879 %f = insertelement <4 x i32> %e, i32 %b, i32 3
883 define <8 x i8> @getl(<16 x i8> %x) #0 {
886 %vecext = extractelement <16 x i8> %x, i32 0
887 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
888 %vecext1 = extractelement <16 x i8> %x, i32 1
889 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
890 %vecext3 = extractelement <16 x i8> %x, i32 2
891 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
892 %vecext5 = extractelement <16 x i8> %x, i32 3
893 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
894 %vecext7 = extractelement <16 x i8> %x, i32 4
895 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
896 %vecext9 = extractelement <16 x i8> %x, i32 5
897 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
898 %vecext11 = extractelement <16 x i8> %x, i32 6
899 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
900 %vecext13 = extractelement <16 x i8> %x, i32 7
901 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
902 ret <8 x i8> %vecinit14
905 ; CHECK-LABEL: test_extracts_inserts_varidx_extract:
907 ; CHECK: add x[[PTR:[0-9]+]], {{.*}}, w0, sxtw #1
908 ; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], [x[[PTR]]]
909 ; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
910 ; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
911 ; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
912 define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
913 %tmp = extractelement <8 x i16> %x, i32 %idx
914 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
915 %tmp3 = extractelement <8 x i16> %x, i32 1
916 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
917 %tmp5 = extractelement <8 x i16> %x, i32 2
918 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
919 %tmp7 = extractelement <8 x i16> %x, i32 3
920 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
924 ; CHECK-LABEL: test_extracts_inserts_varidx_insert:
925 ; CHECK: str h0, [{{.*}}, w0, sxtw #1]
926 ; CHECK-DAG: ldr d[[R:[0-9]+]]
927 ; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
928 ; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
929 ; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
930 define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
931 %tmp = extractelement <8 x i16> %x, i32 0
932 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
933 %tmp3 = extractelement <8 x i16> %x, i32 1
934 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
935 %tmp5 = extractelement <8 x i16> %x, i32 2
936 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
937 %tmp7 = extractelement <8 x i16> %x, i32 3
938 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
942 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
943 ; CHECK-LABEL: test_dup_v2i32_v4i16:
944 ; CHECK: dup v0.4h, v0.h[2]
946 %x = extractelement <2 x i32> %a, i32 1
947 %vget_lane = trunc i32 %x to i16
948 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
949 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
950 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
951 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
952 ret <4 x i16> %vecinit3.i
955 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
956 ; CHECK-LABEL: test_dup_v4i32_v8i16:
957 ; CHECK: dup v0.8h, v0.h[6]
959 %x = extractelement <4 x i32> %a, i32 3
960 %vget_lane = trunc i32 %x to i16
961 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
962 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
963 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
964 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
965 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
966 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
967 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
968 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
969 ret <8 x i16> %vecinit7.i
972 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
973 ; CHECK-LABEL: test_dup_v1i64_v4i16:
974 ; CHECK: dup v0.4h, v0.h[0]
976 %x = extractelement <1 x i64> %a, i32 0
977 %vget_lane = trunc i64 %x to i16
978 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
979 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
980 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
981 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
982 ret <4 x i16> %vecinit3.i
985 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
986 ; CHECK-LABEL: test_dup_v1i64_v2i32:
987 ; CHECK: dup v0.2s, v0.s[0]
989 %x = extractelement <1 x i64> %a, i32 0
990 %vget_lane = trunc i64 %x to i32
991 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
992 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
993 ret <2 x i32> %vecinit1.i
996 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
997 ; CHECK-LABEL: test_dup_v2i64_v8i16:
998 ; CHECK: dup v0.8h, v0.h[4]
1000 %x = extractelement <2 x i64> %a, i32 1
1001 %vget_lane = trunc i64 %x to i16
1002 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
1003 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
1004 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1005 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1006 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
1007 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
1008 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
1009 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
1010 ret <8 x i16> %vecinit7.i
1013 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
1014 ; CHECK-LABEL: test_dup_v2i64_v4i32:
1015 ; CHECK: dup v0.4s, v0.s[2]
1017 %x = extractelement <2 x i64> %a, i32 1
1018 %vget_lane = trunc i64 %x to i32
1019 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
1020 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
1021 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
1022 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
1023 ret <4 x i32> %vecinit3.i
1026 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
1027 ; CHECK-LABEL: test_dup_v4i32_v4i16:
1028 ; CHECK: dup v0.4h, v0.h[2]
1030 %x = extractelement <4 x i32> %a, i32 1
1031 %vget_lane = trunc i32 %x to i16
1032 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1033 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1034 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1035 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1036 ret <4 x i16> %vecinit3.i
1039 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
1040 ; CHECK-LABEL: test_dup_v2i64_v4i16:
1041 ; CHECK: dup v0.4h, v0.h[0]
1043 %x = extractelement <2 x i64> %a, i32 0
1044 %vget_lane = trunc i64 %x to i16
1045 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
1046 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
1047 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
1048 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
1049 ret <4 x i16> %vecinit3.i
1052 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
1053 ; CHECK-LABEL: test_dup_v2i64_v2i32:
1054 ; CHECK: dup v0.2s, v0.s[0]
1056 %x = extractelement <2 x i64> %a, i32 0
1057 %vget_lane = trunc i64 %x to i32
1058 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
1059 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
1060 ret <2 x i32> %vecinit1.i
1064 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
1065 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
1066 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1069 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1070 %1 = insertelement <1 x float> undef, float %0, i32 0
1071 %2 = extractelement <1 x float> %1, i32 0
1072 %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
1073 ret <2 x float> %vecinit1.i
1076 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
1077 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
1078 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
1081 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
1082 %1 = insertelement <1 x float> undef, float %0, i32 0
1083 %2 = extractelement <1 x float> %1, i32 0
1084 %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
1085 ret <4 x float> %vecinit1.i
1088 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
1090 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
1091 ; CHECK-LABEL: test_concat_undef_v1i32:
1092 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
1094 %0 = extractelement <2 x i32> %a, i32 0
1095 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
1096 ret <2 x i32> %vecinit1.i
1099 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
1101 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
1102 ; CHECK-LABEL: test_concat_v1i32_undef:
1103 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1106 %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1107 %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
1108 ret <2 x i32> %vecinit.i432
1111 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
1112 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
1113 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
1115 %0 = extractelement <2 x i32> %a, i32 0
1116 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
1117 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
1118 ret <2 x i32> %vecinit1.i
1121 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
1122 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
1123 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1124 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
1125 ; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
1127 %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
1128 %d = insertelement <2 x i32> undef, i32 %c, i32 0
1129 %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
1130 %f = insertelement <2 x i32> undef, i32 %e, i32 0
1131 %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
1135 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1136 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1137 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1139 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1140 ret <16 x i8> %vecinit30
1143 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
1144 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
1145 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1147 %vecext = extractelement <8 x i8> %x, i32 0
1148 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1149 %vecext1 = extractelement <8 x i8> %x, i32 1
1150 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1151 %vecext3 = extractelement <8 x i8> %x, i32 2
1152 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1153 %vecext5 = extractelement <8 x i8> %x, i32 3
1154 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1155 %vecext7 = extractelement <8 x i8> %x, i32 4
1156 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1157 %vecext9 = extractelement <8 x i8> %x, i32 5
1158 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1159 %vecext11 = extractelement <8 x i8> %x, i32 6
1160 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1161 %vecext13 = extractelement <8 x i8> %x, i32 7
1162 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1163 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
1164 ret <16 x i8> %vecinit30
1167 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
1168 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
1169 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1171 %vecext = extractelement <16 x i8> %x, i32 0
1172 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1173 %vecext1 = extractelement <16 x i8> %x, i32 1
1174 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1175 %vecext3 = extractelement <16 x i8> %x, i32 2
1176 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1177 %vecext5 = extractelement <16 x i8> %x, i32 3
1178 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1179 %vecext7 = extractelement <16 x i8> %x, i32 4
1180 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1181 %vecext9 = extractelement <16 x i8> %x, i32 5
1182 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1183 %vecext11 = extractelement <16 x i8> %x, i32 6
1184 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1185 %vecext13 = extractelement <16 x i8> %x, i32 7
1186 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1187 %vecext15 = extractelement <8 x i8> %y, i32 0
1188 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1189 %vecext17 = extractelement <8 x i8> %y, i32 1
1190 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1191 %vecext19 = extractelement <8 x i8> %y, i32 2
1192 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1193 %vecext21 = extractelement <8 x i8> %y, i32 3
1194 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1195 %vecext23 = extractelement <8 x i8> %y, i32 4
1196 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1197 %vecext25 = extractelement <8 x i8> %y, i32 5
1198 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1199 %vecext27 = extractelement <8 x i8> %y, i32 6
1200 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1201 %vecext29 = extractelement <8 x i8> %y, i32 7
1202 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1203 ret <16 x i8> %vecinit30
1206 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
1207 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
1208 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1210 %vecext = extractelement <8 x i8> %x, i32 0
1211 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
1212 %vecext1 = extractelement <8 x i8> %x, i32 1
1213 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
1214 %vecext3 = extractelement <8 x i8> %x, i32 2
1215 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
1216 %vecext5 = extractelement <8 x i8> %x, i32 3
1217 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
1218 %vecext7 = extractelement <8 x i8> %x, i32 4
1219 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
1220 %vecext9 = extractelement <8 x i8> %x, i32 5
1221 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
1222 %vecext11 = extractelement <8 x i8> %x, i32 6
1223 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
1224 %vecext13 = extractelement <8 x i8> %x, i32 7
1225 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
1226 %vecext15 = extractelement <8 x i8> %y, i32 0
1227 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
1228 %vecext17 = extractelement <8 x i8> %y, i32 1
1229 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
1230 %vecext19 = extractelement <8 x i8> %y, i32 2
1231 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
1232 %vecext21 = extractelement <8 x i8> %y, i32 3
1233 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
1234 %vecext23 = extractelement <8 x i8> %y, i32 4
1235 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
1236 %vecext25 = extractelement <8 x i8> %y, i32 5
1237 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
1238 %vecext27 = extractelement <8 x i8> %y, i32 6
1239 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
1240 %vecext29 = extractelement <8 x i8> %y, i32 7
1241 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
1242 ret <16 x i8> %vecinit30
1245 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
1246 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1247 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1249 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1250 ret <8 x i16> %vecinit14
1253 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
1254 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
1255 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1257 %vecext = extractelement <4 x i16> %x, i32 0
1258 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1259 %vecext1 = extractelement <4 x i16> %x, i32 1
1260 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1261 %vecext3 = extractelement <4 x i16> %x, i32 2
1262 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1263 %vecext5 = extractelement <4 x i16> %x, i32 3
1264 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1265 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1266 ret <8 x i16> %vecinit14
1269 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
1270 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
1271 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1273 %vecext = extractelement <8 x i16> %x, i32 0
1274 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1275 %vecext1 = extractelement <8 x i16> %x, i32 1
1276 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1277 %vecext3 = extractelement <8 x i16> %x, i32 2
1278 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1279 %vecext5 = extractelement <8 x i16> %x, i32 3
1280 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1281 %vecext7 = extractelement <4 x i16> %y, i32 0
1282 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1283 %vecext9 = extractelement <4 x i16> %y, i32 1
1284 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1285 %vecext11 = extractelement <4 x i16> %y, i32 2
1286 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1287 %vecext13 = extractelement <4 x i16> %y, i32 3
1288 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1289 ret <8 x i16> %vecinit14
1292 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
1293 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
1294 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1296 %vecext = extractelement <4 x i16> %x, i32 0
1297 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
1298 %vecext1 = extractelement <4 x i16> %x, i32 1
1299 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
1300 %vecext3 = extractelement <4 x i16> %x, i32 2
1301 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
1302 %vecext5 = extractelement <4 x i16> %x, i32 3
1303 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
1304 %vecext7 = extractelement <4 x i16> %y, i32 0
1305 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
1306 %vecext9 = extractelement <4 x i16> %y, i32 1
1307 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
1308 %vecext11 = extractelement <4 x i16> %y, i32 2
1309 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
1310 %vecext13 = extractelement <4 x i16> %y, i32 3
1311 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
1312 ret <8 x i16> %vecinit14
1315 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
1316 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
1317 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1319 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1320 ret <4 x i32> %vecinit6
1323 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
1324 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
1325 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1327 %vecext = extractelement <2 x i32> %x, i32 0
1328 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1329 %vecext1 = extractelement <2 x i32> %x, i32 1
1330 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1331 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1332 ret <4 x i32> %vecinit6
1335 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
1336 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
1337 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1339 %vecext = extractelement <4 x i32> %x, i32 0
1340 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
1341 %vecext1 = extractelement <4 x i32> %x, i32 1
1342 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
1343 %vecext3 = extractelement <2 x i32> %y, i32 0
1344 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
1345 %vecext5 = extractelement <2 x i32> %y, i32 1
1346 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
1347 ret <4 x i32> %vecinit6
1350 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
1351 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
1352 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1354 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1355 ret <4 x i32> %vecinit6
1358 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
1359 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
1360 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1362 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1363 ret <2 x i64> %vecinit2
1366 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
1367 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
1368 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
1370 %vecext = extractelement <1 x i64> %x, i32 0
1371 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1372 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
1373 ret <2 x i64> %vecinit2
1376 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
1377 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
1378 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1380 %vecext = extractelement <2 x i64> %x, i32 0
1381 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1382 %vecext1 = extractelement <1 x i64> %y, i32 0
1383 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1384 ret <2 x i64> %vecinit2
1387 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
1388 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
1389 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
1391 %vecext = extractelement <1 x i64> %x, i32 0
1392 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
1393 %vecext1 = extractelement <1 x i64> %y, i32 0
1394 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
1395 ret <2 x i64> %vecinit2
1399 define <4 x i16> @concat_vector_v4i16_const() {
1400 ; CHECK-LABEL: concat_vector_v4i16_const:
1401 ; CHECK: movi {{d[0-9]+}}, #0
1402 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
1406 define <4 x i16> @concat_vector_v4i16_const_one() {
1407 ; CHECK-LABEL: concat_vector_v4i16_const_one:
1408 ; CHECK: movi {{v[0-9]+}}.4h, #1
1409 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
1413 define <4 x i32> @concat_vector_v4i32_const() {
1414 ; CHECK-LABEL: concat_vector_v4i32_const:
1415 ; CHECK: movi {{v[0-9]+}}.2d, #0
1416 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
1420 define <8 x i8> @concat_vector_v8i8_const() {
1421 ; CHECK-LABEL: concat_vector_v8i8_const:
1422 ; CHECK: movi {{d[0-9]+}}, #0
1423 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
1427 define <8 x i16> @concat_vector_v8i16_const() {
1428 ; CHECK-LABEL: concat_vector_v8i16_const:
1429 ; CHECK: movi {{v[0-9]+}}.2d, #0
1430 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
1434 define <8 x i16> @concat_vector_v8i16_const_one() {
1435 ; CHECK-LABEL: concat_vector_v8i16_const_one:
1436 ; CHECK: movi {{v[0-9]+}}.8h, #1
1437 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
1441 define <16 x i8> @concat_vector_v16i8_const() {
1442 ; CHECK-LABEL: concat_vector_v16i8_const:
1443 ; CHECK: movi {{v[0-9]+}}.2d, #0
1444 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
1448 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
1449 ; CHECK-LABEL: concat_vector_v4i16:
1450 ; CHECK: dup v0.4h, v0.h[0]
1451 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
1455 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
1456 ; CHECK-LABEL: concat_vector_v4i32:
1457 ; CHECK: dup v0.4s, v0.s[0]
1458 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
1462 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
1463 ; CHECK-LABEL: concat_vector_v8i8:
1464 ; CHECK: dup v0.8b, v0.b[0]
1465 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
1469 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
1470 ; CHECK-LABEL: concat_vector_v8i16:
1471 ; CHECK: dup v0.8h, v0.h[0]
1472 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
1476 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
1477 ; CHECK-LABEL: concat_vector_v16i8:
1478 ; CHECK: dup v0.16b, v0.b[0]
1479 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer