test/CodeGen/aarch64-neon-perm.c

   1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
   2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
   3
   4 // Test new aarch64 intrinsics and types
   5 #include <arm_neon.h>
   6
   7 // CHECK-LABEL: @test_vuzp1_s8(
   8 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   9 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
  10 int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
  11   return vuzp1_s8(a, b);
  12 }
  13
  14 // CHECK-LABEL: @test_vuzp1q_s8(
  15 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
  16 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
  17 int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
  18   return vuzp1q_s8(a, b);
  19 }
  20
  21 // CHECK-LABEL: @test_vuzp1_s16(
  22 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  23 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
  24 int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
  25   return vuzp1_s16(a, b);
  26 }
  27
  28 // CHECK-LABEL: @test_vuzp1q_s16(
  29 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  30 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
  31 int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
  32   return vuzp1q_s16(a, b);
  33 }
  34
  35 // CHECK-LABEL: @test_vuzp1_s32(
  36 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
  37 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
  38 int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
  39   return vuzp1_s32(a, b);
  40 }
  41
  42 // CHECK-LABEL: @test_vuzp1q_s32(
  43 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  44 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
  45 int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
  46   return vuzp1q_s32(a, b);
  47 }
  48
  49 // CHECK-LABEL: @test_vuzp1q_s64(
  50 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
  51 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
  52 int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
  53   return vuzp1q_s64(a, b);
  54 }
  55
  56 // CHECK-LABEL: @test_vuzp1_u8(
  57 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  58 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
  59 uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
  60   return vuzp1_u8(a, b);
  61 }
  62
  63 // CHECK-LABEL: @test_vuzp1q_u8(
  64 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
  65 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
  66 uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
  67   return vuzp1q_u8(a, b);
  68 }
  69
  70 // CHECK-LABEL: @test_vuzp1_u16(
  71 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  72 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
  73 uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
  74   return vuzp1_u16(a, b);
  75 }
  76
  77 // CHECK-LABEL: @test_vuzp1q_u16(
  78 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  79 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
  80 uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
  81   return vuzp1q_u16(a, b);
  82 }
  83
  84 // CHECK-LABEL: @test_vuzp1_u32(
  85 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
  86 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
  87 uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
  88   return vuzp1_u32(a, b);
  89 }
  90
  91 // CHECK-LABEL: @test_vuzp1q_u32(
  92 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  93 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
  94 uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
  95   return vuzp1q_u32(a, b);
  96 }
  97
  98 // CHECK-LABEL: @test_vuzp1q_u64(
  99 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 100 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 101 uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
 102   return vuzp1q_u64(a, b);
 103 }
 104
 105 // CHECK-LABEL: @test_vuzp1_f32(
 106 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 107 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 108 float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
 109   return vuzp1_f32(a, b);
 110 }
 111
 112 // CHECK-LABEL: @test_vuzp1q_f32(
 113 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 114 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 115 float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
 116   return vuzp1q_f32(a, b);
 117 }
 118
 119 // CHECK-LABEL: @test_vuzp1q_f64(
 120 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
 121 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 122 float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
 123   return vuzp1q_f64(a, b);
 124 }
 125
 126 // CHECK-LABEL: @test_vuzp1_p8(
 127 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 128 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 129 poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
 130   return vuzp1_p8(a, b);
 131 }
 132
 133 // CHECK-LABEL: @test_vuzp1q_p8(
 134 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 135 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 136 poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
 137   return vuzp1q_p8(a, b);
 138 }
 139
 140 // CHECK-LABEL: @test_vuzp1_p16(
 141 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 142 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 143 poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
 144   return vuzp1_p16(a, b);
 145 }
 146
 147 // CHECK-LABEL: @test_vuzp1q_p16(
 148 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 149 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 150 poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
 151   return vuzp1q_p16(a, b);
 152 }
 153
 154 // CHECK-LABEL: @test_vuzp2_s8(
 155 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 156 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 157 int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
 158   return vuzp2_s8(a, b);
 159 }
 160
 161 // CHECK-LABEL: @test_vuzp2q_s8(
 162 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 163 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 164 int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
 165   return vuzp2q_s8(a, b);
 166 }
 167
 168 // CHECK-LABEL: @test_vuzp2_s16(
 169 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 170 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 171 int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
 172   return vuzp2_s16(a, b);
 173 }
 174
 175 // CHECK-LABEL: @test_vuzp2q_s16(
 176 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 177 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 178 int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
 179   return vuzp2q_s16(a, b);
 180 }
 181
 182 // CHECK-LABEL: @test_vuzp2_s32(
 183 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 184 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 185 int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
 186   return vuzp2_s32(a, b);
 187 }
 188
 189 // CHECK-LABEL: @test_vuzp2q_s32(
 190 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 191 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 192 int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
 193   return vuzp2q_s32(a, b);
 194 }
 195
 196 // CHECK-LABEL: @test_vuzp2q_s64(
 197 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 198 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 199 int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
 200   return vuzp2q_s64(a, b);
 201 }
 202
 203 // CHECK-LABEL: @test_vuzp2_u8(
 204 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 205 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 206 uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
 207   return vuzp2_u8(a, b);
 208 }
 209
 210 // CHECK-LABEL: @test_vuzp2q_u8(
 211 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 212 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 213 uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
 214   return vuzp2q_u8(a, b);
 215 }
 216
 217 // CHECK-LABEL: @test_vuzp2_u16(
 218 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 219 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 220 uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
 221   return vuzp2_u16(a, b);
 222 }
 223
 224 // CHECK-LABEL: @test_vuzp2q_u16(
 225 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 226 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 227 uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
 228   return vuzp2q_u16(a, b);
 229 }
 230
 231 // CHECK-LABEL: @test_vuzp2_u32(
 232 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 233 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 234 uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
 235   return vuzp2_u32(a, b);
 236 }
 237
 238 // CHECK-LABEL: @test_vuzp2q_u32(
 239 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 240 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 241 uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
 242   return vuzp2q_u32(a, b);
 243 }
 244
 245 // CHECK-LABEL: @test_vuzp2q_u64(
 246 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 247 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 248 uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
 249   return vuzp2q_u64(a, b);
 250 }
 251
 252 // CHECK-LABEL: @test_vuzp2_f32(
 253 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
 254 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 255 float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
 256   return vuzp2_f32(a, b);
 257 }
 258
 259 // CHECK-LABEL: @test_vuzp2q_f32(
 260 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 261 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 262 float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
 263   return vuzp2q_f32(a, b);
 264 }
 265
 266 // CHECK-LABEL: @test_vuzp2q_f64(
 267 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
 268 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 269 float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
 270   return vuzp2q_f64(a, b);
 271 }
 272
 273 // CHECK-LABEL: @test_vuzp2_p8(
 274 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 275 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 276 poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
 277   return vuzp2_p8(a, b);
 278 }
 279
 280 // CHECK-LABEL: @test_vuzp2q_p8(
 281 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 282 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 283 poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
 284   return vuzp2q_p8(a, b);
 285 }
 286
 287 // CHECK-LABEL: @test_vuzp2_p16(
 288 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 289 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 290 poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
 291   return vuzp2_p16(a, b);
 292 }
 293
 294 // CHECK-LABEL: @test_vuzp2q_p16(
 295 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 296 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 297 poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
 298   return vuzp2q_p16(a, b);
 299 }
 300
 301 // CHECK-LABEL: @test_vzip1_s8(
 302 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 303 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 304 int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
 305   return vzip1_s8(a, b);
 306 }
 307
 308 // CHECK-LABEL: @test_vzip1q_s8(
 309 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 310 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 311 int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
 312   return vzip1q_s8(a, b);
 313 }
 314
 315 // CHECK-LABEL: @test_vzip1_s16(
 316 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 317 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 318 int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
 319   return vzip1_s16(a, b);
 320 }
 321
 322 // CHECK-LABEL: @test_vzip1q_s16(
 323 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 324 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 325 int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
 326   return vzip1q_s16(a, b);
 327 }
 328
 329 // CHECK-LABEL: @test_vzip1_s32(
 330 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 331 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 332 int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
 333   return vzip1_s32(a, b);
 334 }
 335
 336 // CHECK-LABEL: @test_vzip1q_s32(
 337 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 338 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 339 int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
 340   return vzip1q_s32(a, b);
 341 }
 342
 343 // CHECK-LABEL: @test_vzip1q_s64(
 344 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 345 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 346 int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
 347   return vzip1q_s64(a, b);
 348 }
 349
 350 // CHECK-LABEL: @test_vzip1_u8(
 351 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 352 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 353 uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
 354   return vzip1_u8(a, b);
 355 }
 356
 357 // CHECK-LABEL: @test_vzip1q_u8(
 358 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 359 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 360 uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
 361   return vzip1q_u8(a, b);
 362 }
 363
 364 // CHECK-LABEL: @test_vzip1_u16(
 365 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 366 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 367 uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
 368   return vzip1_u16(a, b);
 369 }
 370
 371 // CHECK-LABEL: @test_vzip1q_u16(
 372 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 373 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 374 uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
 375   return vzip1q_u16(a, b);
 376 }
 377
 378 // CHECK-LABEL: @test_vzip1_u32(
 379 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 380 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 381 uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
 382   return vzip1_u32(a, b);
 383 }
 384
 385 // CHECK-LABEL: @test_vzip1q_u32(
 386 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 387 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 388 uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
 389   return vzip1q_u32(a, b);
 390 }
 391
 392 // CHECK-LABEL: @test_vzip1q_u64(
 393 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 394 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 395 uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
 396   return vzip1q_u64(a, b);
 397 }
 398
 399 // CHECK-LABEL: @test_vzip1_f32(
 400 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 401 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 402 float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
 403   return vzip1_f32(a, b);
 404 }
 405
 406 // CHECK-LABEL: @test_vzip1q_f32(
 407 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 408 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 409 float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
 410   return vzip1q_f32(a, b);
 411 }
 412
 413 // CHECK-LABEL: @test_vzip1q_f64(
 414 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
 415 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 416 float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
 417   return vzip1q_f64(a, b);
 418 }
 419
 420 // CHECK-LABEL: @test_vzip1_p8(
 421 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 422 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 423 poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
 424   return vzip1_p8(a, b);
 425 }
 426
 427 // CHECK-LABEL: @test_vzip1q_p8(
 428 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 429 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 430 poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
 431   return vzip1q_p8(a, b);
 432 }
 433
 434 // CHECK-LABEL: @test_vzip1_p16(
 435 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 436 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 437 poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
 438   return vzip1_p16(a, b);
 439 }
 440
 441 // CHECK-LABEL: @test_vzip1q_p16(
 442 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 443 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 444 poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
 445   return vzip1q_p16(a, b);
 446 }
 447
 448 // CHECK-LABEL: @test_vzip2_s8(
 449 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 450 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 451 int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
 452   return vzip2_s8(a, b);
 453 }
 454
 455 // CHECK-LABEL: @test_vzip2q_s8(
 456 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 457 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 458 int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
 459   return vzip2q_s8(a, b);
 460 }
 461
 462 // CHECK-LABEL: @test_vzip2_s16(
 463 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 464 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 465 int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
 466   return vzip2_s16(a, b);
 467 }
 468
 469 // CHECK-LABEL: @test_vzip2q_s16(
 470 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 471 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 472 int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
 473   return vzip2q_s16(a, b);
 474 }
 475
 476 // CHECK-LABEL: @test_vzip2_s32(
 477 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 478 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 479 int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
 480   return vzip2_s32(a, b);
 481 }
 482
 483 // CHECK-LABEL: @test_vzip2q_s32(
 484 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 485 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 486 int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
 487   return vzip2q_s32(a, b);
 488 }
 489
 490 // CHECK-LABEL: @test_vzip2q_s64(
 491 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 492 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 493 int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
 494   return vzip2q_s64(a, b);
 495 }
 496
 497 // CHECK-LABEL: @test_vzip2_u8(
 498 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 499 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 500 uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
 501   return vzip2_u8(a, b);
 502 }
 503
 504 // CHECK-LABEL: @test_vzip2q_u8(
 505 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 506 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 507 uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
 508   return vzip2q_u8(a, b);
 509 }
 510
 511 // CHECK-LABEL: @test_vzip2_u16(
 512 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 513 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 514 uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
 515   return vzip2_u16(a, b);
 516 }
 517
 518 // CHECK-LABEL: @test_vzip2q_u16(
 519 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 520 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 521 uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
 522   return vzip2q_u16(a, b);
 523 }
 524
 525 // CHECK-LABEL: @test_vzip2_u32(
 526 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 527 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 528 uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
 529   return vzip2_u32(a, b);
 530 }
 531
 532 // CHECK-LABEL: @test_vzip2q_u32(
 533 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 534 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 535 uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
 536   return vzip2q_u32(a, b);
 537 }
 538
 539 // CHECK-LABEL: @test_vzip2q_u64(
 540 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 541 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 542 uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
 543   return vzip2q_u64(a, b);
 544 }
 545
 546 // CHECK-LABEL: @test_vzip2_f32(
 547 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
 548 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 549 float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
 550   return vzip2_f32(a, b);
 551 }
 552
 553 // CHECK-LABEL: @test_vzip2q_f32(
 554 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 555 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 556 float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
 557   return vzip2q_f32(a, b);
 558 }
 559
 560 // CHECK-LABEL: @test_vzip2q_f64(
 561 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
 562 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 563 float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
 564   return vzip2q_f64(a, b);
 565 }
 566
 567 // CHECK-LABEL: @test_vzip2_p8(
 568 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 569 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 570 poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
 571   return vzip2_p8(a, b);
 572 }
 573
 574 // CHECK-LABEL: @test_vzip2q_p8(
 575 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 576 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 577 poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
 578   return vzip2q_p8(a, b);
 579 }
 580
 581 // CHECK-LABEL: @test_vzip2_p16(
 582 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 583 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 584 poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
 585   return vzip2_p16(a, b);
 586 }
 587
 588 // CHECK-LABEL: @test_vzip2q_p16(
 589 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 590 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 591 poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
 592   return vzip2q_p16(a, b);
 593 }
 594
 595 // CHECK-LABEL: @test_vtrn1_s8(
 596 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 597 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 598 int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
 599   return vtrn1_s8(a, b);
 600 }
 601
 602 // CHECK-LABEL: @test_vtrn1q_s8(
 603 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 604 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 605 int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
 606   return vtrn1q_s8(a, b);
 607 }
 608
 609 // CHECK-LABEL: @test_vtrn1_s16(
 610 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 611 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 612 int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
 613   return vtrn1_s16(a, b);
 614 }
 615
 616 // CHECK-LABEL: @test_vtrn1q_s16(
 617 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 618 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 619 int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
 620   return vtrn1q_s16(a, b);
 621 }
 622
 623 // CHECK-LABEL: @test_vtrn1_s32(
 624 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 625 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 626 int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
 627   return vtrn1_s32(a, b);
 628 }
 629
 630 // CHECK-LABEL: @test_vtrn1q_s32(
 631 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 632 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 633 int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
 634   return vtrn1q_s32(a, b);
 635 }
 636
 637 // CHECK-LABEL: @test_vtrn1q_s64(
 638 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 639 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 640 int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
 641   return vtrn1q_s64(a, b);
 642 }
 643
 644 // CHECK-LABEL: @test_vtrn1_u8(
 645 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 646 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 647 uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
 648   return vtrn1_u8(a, b);
 649 }
 650
 651 // CHECK-LABEL: @test_vtrn1q_u8(
 652 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 653 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 654 uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
 655   return vtrn1q_u8(a, b);
 656 }
 657
 658 // CHECK-LABEL: @test_vtrn1_u16(
 659 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 660 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 661 uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
 662   return vtrn1_u16(a, b);
 663 }
 664
 665 // CHECK-LABEL: @test_vtrn1q_u16(
 666 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 667 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 668 uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
 669   return vtrn1q_u16(a, b);
 670 }
 671
 672 // CHECK-LABEL: @test_vtrn1_u32(
 673 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 674 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 675 uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
 676   return vtrn1_u32(a, b);
 677 }
 678
 679 // CHECK-LABEL: @test_vtrn1q_u32(
 680 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 681 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 682 uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
 683   return vtrn1q_u32(a, b);
 684 }
 685
 686 // CHECK-LABEL: @test_vtrn1q_u64(
 687 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 688 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 689 uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
 690   return vtrn1q_u64(a, b);
 691 }
 692
 693 // CHECK-LABEL: @test_vtrn1_f32(
 694 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 695 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 696 float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
 697   return vtrn1_f32(a, b);
 698 }
 699
 700 // CHECK-LABEL: @test_vtrn1q_f32(
 701 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 702 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 703 float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
 704   return vtrn1q_f32(a, b);
 705 }
 706
 707 // CHECK-LABEL: @test_vtrn1q_f64(
 708 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
 709 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 710 float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
 711   return vtrn1q_f64(a, b);
 712 }
 713
 714 // CHECK-LABEL: @test_vtrn1_p8(
 715 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 716 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 717 poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
 718   return vtrn1_p8(a, b);
 719 }
 720
 721 // CHECK-LABEL: @test_vtrn1q_p8(
 722 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 723 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 724 poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
 725   return vtrn1q_p8(a, b);
 726 }
 727
 728 // CHECK-LABEL: @test_vtrn1_p16(
 729 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 730 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 731 poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
 732   return vtrn1_p16(a, b);
 733 }
 734
 735 // CHECK-LABEL: @test_vtrn1q_p16(
 736 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 737 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 738 poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
 739   return vtrn1q_p16(a, b);
 740 }
 741
 742 // CHECK-LABEL: @test_vtrn2_s8(
 743 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 744 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 745 int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
 746   return vtrn2_s8(a, b);
 747 }
 748
 749 // CHECK-LABEL: @test_vtrn2q_s8(
 750 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 751 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 752 int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
 753   return vtrn2q_s8(a, b);
 754 }
 755
 756 // CHECK-LABEL: @test_vtrn2_s16(
 757 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 758 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 759 int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
 760   return vtrn2_s16(a, b);
 761 }
 762
 763 // CHECK-LABEL: @test_vtrn2q_s16(
 764 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 765 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 766 int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
 767   return vtrn2q_s16(a, b);
 768 }
 769
 770 // CHECK-LABEL: @test_vtrn2_s32(
 771 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 772 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 773 int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
 774   return vtrn2_s32(a, b);
 775 }
 776
 777 // CHECK-LABEL: @test_vtrn2q_s32(
 778 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 779 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 780 int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
 781   return vtrn2q_s32(a, b);
 782 }
 783
 784 // CHECK-LABEL: @test_vtrn2q_s64(
 785 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 786 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 787 int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
 788   return vtrn2q_s64(a, b);
 789 }
 790
 791 // CHECK-LABEL: @test_vtrn2_u8(
 792 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 793 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 794 uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
 795   return vtrn2_u8(a, b);
 796 }
 797
 798 // CHECK-LABEL: @test_vtrn2q_u8(
 799 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 800 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 801 uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
 802   return vtrn2q_u8(a, b);
 803 }
 804
 805 // CHECK-LABEL: @test_vtrn2_u16(
 806 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 807 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 808 uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
 809   return vtrn2_u16(a, b);
 810 }
 811
 812 // CHECK-LABEL: @test_vtrn2q_u16(
 813 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 814 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 815 uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
 816   return vtrn2q_u16(a, b);
 817 }
 818
 819 // CHECK-LABEL: @test_vtrn2_u32(
 820 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 821 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 822 uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
 823   return vtrn2_u32(a, b);
 824 }
 825
 826 // CHECK-LABEL: @test_vtrn2q_u32(
 827 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 828 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 829 uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
 830   return vtrn2q_u32(a, b);
 831 }
 832
 833 // CHECK-LABEL: @test_vtrn2q_u64(
 834 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 835 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 836 uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
 837   return vtrn2q_u64(a, b);
 838 }
 839
 840 // CHECK-LABEL: @test_vtrn2_f32(
 841 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
 842 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 843 float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
 844   return vtrn2_f32(a, b);
 845 }
 846
 847 // CHECK-LABEL: @test_vtrn2q_f32(
 848 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 849 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 850 float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
 851   return vtrn2q_f32(a, b);
 852 }
 853
 854 // CHECK-LABEL: @test_vtrn2q_f64(
 855 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
 856 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 857 float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
 858   return vtrn2q_f64(a, b);
 859 }
 860
 861 // CHECK-LABEL: @test_vtrn2_p8(
 862 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 863 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 864 poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
 865   return vtrn2_p8(a, b);
 866 }
 867
 868 // CHECK-LABEL: @test_vtrn2q_p8(
 869 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 870 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 871 poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
 872   return vtrn2q_p8(a, b);
 873 }
 874
 875 // CHECK-LABEL: @test_vtrn2_p16(
 876 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 877 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 878 poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
 879   return vtrn2_p16(a, b);
 880 }
 881
 882 // CHECK-LABEL: @test_vtrn2q_p16(
 883 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 884 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 885 poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
 886   return vtrn2q_p16(a, b);
 887 }
 888
 889 // CHECK-LABEL: @test_vuzp_s8(
 890 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
 891 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
 892 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
 893 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
 894 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 895 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 896 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
 897 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
 898 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 899 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
 900 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
 901 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
 902 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
 903 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
 904 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
 905 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
 906 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
 907 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
 908 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
 909 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
 910   return vuzp_s8(a, b);
 911 }
 912
 913 // CHECK-LABEL: @test_vuzp_s16(
 914 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
 915 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
 916 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
 917 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
 918 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 919 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 920 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
 921 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 922 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
 923 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
 924 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 925 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
 926 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
 927 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
 928 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
 929 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
 930 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
 931 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
 932 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
 933 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
 934 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
 935 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
 936   return vuzp_s16(a, b);
 937 }
 938
 939 // CHECK-LABEL: @test_vuzp_s32(
 940 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
 941 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
 942 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
 943 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
 944 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 945 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 946 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
 947 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 948 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
 949 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
 950 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 951 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]]
 952 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
 953 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
 954 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
 955 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
 956 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
 957 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
 958 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
 959 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
 960 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
 961 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
 962   return vuzp_s32(a, b);
 963 }
 964
 965 // CHECK-LABEL: @test_vuzp_u8(
 966 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
 967 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
 968 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
 969 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
 970 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 971 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 972 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
 973 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
 974 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 975 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
 976 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
 977 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
 978 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
 979 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
 980 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
 981 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
 982 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
 983 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
 984 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
 985 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
 986   return vuzp_u8(a, b);
 987 }
 988
 989 // CHECK-LABEL: @test_vuzp_u16(
 990 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
 991 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
 992 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
 993 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
 994 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 995 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 996 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
 997 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 998 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
 999 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1000 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1001 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
1002 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1003 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1004 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1005 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1006 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1007 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
1008 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1009 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1010 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
1011 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
1012   return vuzp_u16(a, b);
1013 }
1014
1015 // CHECK-LABEL: @test_vuzp_u32(
1016 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1017 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1018 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1019 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1020 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1021 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1022 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1023 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1024 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
1025 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1026 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1027 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]]
1028 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1029 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1030 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1031 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1032 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1033 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1034 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1035 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1036 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
1037 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
1038   return vuzp_u32(a, b);
1039 }
1040
1041 // CHECK-LABEL: @test_vuzp_f32(
1042 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1043 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1044 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1045 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1046 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1047 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1048 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1049 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1050 // CHECK:   store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]]
1051 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1052 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1053 // CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]]
1054 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1055 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1056 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1057 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1058 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1059 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1060 // CHECK:   store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8
1061 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1062 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
1063 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
1064   return vuzp_f32(a, b);
1065 }
1066
1067 // CHECK-LABEL: @test_vuzp_p8(
1068 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1069 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1070 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1071 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1072 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1073 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1074 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
1075 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1076 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1077 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
1078 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1079 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1080 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1081 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1082 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1083 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1084 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1085 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1086 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
1087 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
1088   return vuzp_p8(a, b);
1089 }
1090
1091 // CHECK-LABEL: @test_vuzp_p16(
1092 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1093 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1094 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1095 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1096 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1097 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1098 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1099 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1100 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
1101 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1102 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1103 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
1104 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
1105 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1106 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1107 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
1108 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
1109 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1110 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1111 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
1112 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
1113 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
1114   return vuzp_p16(a, b);
1115 }
1116
1117 // CHECK-LABEL: @test_vuzpq_s8(
1118 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1119 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1120 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1121 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1122 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1123 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1124 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1125 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1126 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1127 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1128 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
1129 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1130 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1131 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
1132 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
1133 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1134 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1135 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
1136 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
1137 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
1138   return vuzpq_s8(a, b);
1139 }
1140
1141 // CHECK-LABEL: @test_vuzpq_s16(
1142 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1143 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1144 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1145 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1146 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1147 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1148 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1149 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1150 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1151 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1152 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1153 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
1154 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
1155 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1156 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1157 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
1158 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
1159 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1160 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1161 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
1162 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
1163 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
1164   return vuzpq_s16(a, b);
1165 }
1166
1167 // CHECK-LABEL: @test_vuzpq_s32(
1168 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1169 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1170 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1171 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1172 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1173 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1174 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1175 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1176 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
1177 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1178 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1179 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]]
1180 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
1181 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1182 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1183 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
1184 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
1185 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1186 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1187 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
1188 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
1189 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
1190   return vuzpq_s32(a, b);
1191 }
1192
1193 // CHECK-LABEL: @test_vuzpq_u8(
1194 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1195 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1196 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1197 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1198 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1199 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1200 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1201 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1202 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1203 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1204 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
1205 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1206 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1207 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
1208 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
1209 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1210 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1211 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
1212 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
1213 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
1214   return vuzpq_u8(a, b);
1215 }
1216
1217 // CHECK-LABEL: @test_vuzpq_u16(
1218 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1219 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1220 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1221 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1222 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1223 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1224 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1225 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1226 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1227 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1228 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1229 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
1230 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
1231 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1232 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1233 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
1234 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
1235 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1236 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1237 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
1238 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
1239 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
1240   return vuzpq_u16(a, b);
1241 }
1242
1243 // CHECK-LABEL: @test_vuzpq_u32(
1244 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1245 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1246 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1247 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1248 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1249 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1250 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1251 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1252 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
1253 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1254 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1255 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]]
1256 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
1257 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1258 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1259 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
1260 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
1261 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1262 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1263 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
1264 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
1265 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
1266   return vuzpq_u32(a, b);
1267 }
1268
1269 // CHECK-LABEL: @test_vuzpq_f32(
1270 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1271 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1272 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1273 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1274 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1275 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1276 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
1277 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1278 // CHECK:   store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]]
1279 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
1280 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1281 // CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]]
1282 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
1283 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1284 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1285 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
1286 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
1287 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1288 // CHECK:   store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16
1289 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
1290 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
1291 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
1292   return vuzpq_f32(a, b);
1293 }
1294
1295 // CHECK-LABEL: @test_vuzpq_p8(
1296 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1297 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1298 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1299 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1300 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1301 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1302 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1303 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1304 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1305 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1306 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
1307 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1308 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1309 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
1310 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
1311 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1312 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1313 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
1314 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
1315 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
1316   return vuzpq_p8(a, b);
1317 }
1318
1319 // CHECK-LABEL: @test_vuzpq_p16(
1320 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1321 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1322 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1323 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1324 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1325 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1326 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1327 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1328 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1329 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1330 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1331 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
1332 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
1333 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1334 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1335 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
1336 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
1337 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1338 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1339 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
1340 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
1341 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
1342   return vuzpq_p16(a, b);
1343 }
1344
1345 // CHECK-LABEL: @test_vzip_s8(
1346 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1347 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1348 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1349 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1350 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1351 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1352 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1353 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1354 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1355 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1356 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
1357 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1358 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1359 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
1360 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
1361 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1362 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1363 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
1364 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
1365 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
1366   return vzip_s8(a, b);
1367 }
1368
1369 // CHECK-LABEL: @test_vzip_s16(
1370 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1371 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1372 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1373 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1374 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1375 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1376 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1377 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1378 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1379 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1380 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1381 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
1382 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
1383 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1384 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1385 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
1386 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
1387 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
1388 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1389 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
1390 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
1391 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
1392   return vzip_s16(a, b);
1393 }
1394
1395 // CHECK-LABEL: @test_vzip_s32(
1396 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1397 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1398 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1399 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1400 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1401 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1402 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1403 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1404 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
1405 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1406 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1407 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]]
1408 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
1409 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1410 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1411 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
1412 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
1413 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
1414 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1415 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
1416 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
1417 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
1418   return vzip_s32(a, b);
1419 }
1420
1421 // CHECK-LABEL: @test_vzip_u8(
1422 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1423 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1424 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1425 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1426 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1427 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1428 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1429 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1430 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1431 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1432 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
1433 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1434 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1435 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
1436 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
1437 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1438 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1439 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
1440 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
1441 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
1442   return vzip_u8(a, b);
1443 }
1444
1445 // CHECK-LABEL: @test_vzip_u16(
1446 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1447 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1448 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1449 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1450 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1451 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1452 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1453 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1454 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1455 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1456 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1457 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
1458 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1459 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1460 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1461 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1462 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1463 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
1464 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1465 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1466 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
1467 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
1468   return vzip_u16(a, b);
1469 }
1470
1471 // CHECK-LABEL: @test_vzip_u32(
1472 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1473 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1474 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1475 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1476 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1477 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1478 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1479 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1480 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
1481 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1482 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1483 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]]
1484 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1485 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1486 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1487 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1488 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1489 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1490 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1491 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1492 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
1493 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
1494   return vzip_u32(a, b);
1495 }
1496
1497 // CHECK-LABEL: @test_vzip_f32(
1498 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1499 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1500 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1501 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1502 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1503 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1504 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1505 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1506 // CHECK:   store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]]
1507 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1508 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1509 // CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]]
1510 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1511 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1512 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1513 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1514 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1515 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1516 // CHECK:   store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8
1517 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1518 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
1519 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
1520   return vzip_f32(a, b);
1521 }
1522
1523 // CHECK-LABEL: @test_vzip_p8(
1524 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1525 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1526 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1527 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1528 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1529 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1530 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1531 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1532 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1533 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1534 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1535 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1536 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1537 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1538 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1539 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1540 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1541 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1542 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
1543 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
1544   return vzip_p8(a, b);
1545 }
1546
1547 // CHECK-LABEL: @test_vzip_p16(
1548 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1549 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1550 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1551 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1552 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1553 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1554 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1555 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1556 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1557 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1558 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1559 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
1560 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
1561 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
1562 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1563 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
1564 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
1565 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1566 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1567 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
1568 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
1569 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
1570   return vzip_p16(a, b);
1571 }
1572
1573 // CHECK-LABEL: @test_vzipq_s8(
1574 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1575 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1576 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1577 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1578 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1579 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1580 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1581 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1582 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1583 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1584 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
1585 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
1586 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1587 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
1588 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
1589 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1590 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1591 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
1592 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
1593 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
1594   return vzipq_s8(a, b);
1595 }
1596
1597 // CHECK-LABEL: @test_vzipq_s16(
1598 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1599 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1600 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1601 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1602 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1603 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1604 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1605 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1606 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1607 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1608 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1609 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
1610 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
1611 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
1612 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1613 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
1614 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
1615 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1616 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1617 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
1618 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
1619 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
1620   return vzipq_s16(a, b);
1621 }
1622
1623 // CHECK-LABEL: @test_vzipq_s32(
1624 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1625 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1626 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1627 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1628 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1629 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1630 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1631 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1632 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
1633 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1634 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1635 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]]
1636 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
1637 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
1638 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1639 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
1640 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
1641 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1642 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1643 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
1644 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
1645 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
1646   return vzipq_s32(a, b);
1647 }
1648
1649 // CHECK-LABEL: @test_vzipq_u8(
1650 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1651 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1652 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1653 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1654 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1655 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1656 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1657 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1658 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1659 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1660 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
1661 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
1662 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1663 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
1664 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
1665 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1666 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1667 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
1668 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
1669 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
1670   return vzipq_u8(a, b);
1671 }
1672
1673 // CHECK-LABEL: @test_vzipq_u16(
1674 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1675 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1676 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1677 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1678 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1679 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1680 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1681 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1682 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1683 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1684 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1685 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
1686 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
1687 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
1688 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1689 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
1690 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
1691 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1692 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1693 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
1694 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
1695 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
1696   return vzipq_u16(a, b);
1697 }
1698
1699 // CHECK-LABEL: @test_vzipq_u32(
1700 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1701 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1702 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1703 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1704 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1705 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1706 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1707 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1708 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
1709 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1710 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1711 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]]
1712 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
1713 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
1714 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1715 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
1716 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
1717 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1718 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1719 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
1720 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
1721 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
1722   return vzipq_u32(a, b);
1723 }
1724
1725 // CHECK-LABEL: @test_vzipq_f32(
1726 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1727 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1728 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1729 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1730 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1731 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1732 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
1733 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1734 // CHECK:   store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]]
1735 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
1736 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1737 // CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]]
1738 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
1739 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
1740 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1741 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
1742 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
1743 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1744 // CHECK:   store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16
1745 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
1746 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
1747 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
1748   return vzipq_f32(a, b);
1749 }
1750
1751 // CHECK-LABEL: @test_vzipq_p8(
1752 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1753 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1754 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1755 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1756 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1757 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1758 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1759 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1760 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1761 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1762 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
1763 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
1764 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
1765 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
1766 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
1767 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1768 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1769 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
1770 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
1771 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
1772   return vzipq_p8(a, b);
1773 }
1774
1775 // CHECK-LABEL: @test_vzipq_p16(
1776 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1777 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1778 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1779 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1780 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1781 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1782 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1783 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1784 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1785 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1786 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1787 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
1788 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
1789 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
1790 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
1791 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
1792 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
1793 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1794 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1795 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
1796 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
1797 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
1798   return vzipq_p16(a, b);
1799 }
1800
1801 // CHECK-LABEL: @test_vtrn_s8(
1802 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1803 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1804 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1805 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1806 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1807 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1808 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1809 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1810 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1811 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1812 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
1813 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
1814 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1815 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
1816 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
1817 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1818 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1819 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
1820 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
1821 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
1822   return vtrn_s8(a, b);
1823 }
1824
1825 // CHECK-LABEL: @test_vtrn_s16(
1826 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1827 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1828 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1829 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1830 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1831 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1832 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1833 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1834 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
1835 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1836 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1837 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
1838 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
1839 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
1840 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1841 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
1842 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
1843 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
1844 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1845 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
1846 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
1847 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
1848   return vtrn_s16(a, b);
1849 }
1850
1851 // CHECK-LABEL: @test_vtrn_s32(
1852 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1853 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1854 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1855 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1856 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1857 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1858 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1859 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1860 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
1861 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1862 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1863 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]]
1864 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
1865 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
1866 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1867 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
1868 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
1869 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
1870 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1871 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
1872 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
1873 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
1874   return vtrn_s32(a, b);
1875 }
1876
1877 // CHECK-LABEL: @test_vtrn_u8(
1878 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1879 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1880 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1881 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1882 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1883 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1884 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1885 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1886 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1887 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1888 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
1889 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
1890 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1891 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
1892 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
1893 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1894 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1895 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
1896 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
1897 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
1898   return vtrn_u8(a, b);
1899 }
1900
1901 // CHECK-LABEL: @test_vtrn_u16(
1902 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1903 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1904 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1905 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1906 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1907 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1908 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1909 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1910 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
1911 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1912 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1913 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
1914 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1915 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
1916 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1917 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1918 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1919 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
1920 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1921 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1922 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
1923 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
1924   return vtrn_u16(a, b);
1925 }
1926
1927 // CHECK-LABEL: @test_vtrn_u32(
1928 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1929 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1930 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1931 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1932 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1933 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1934 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1935 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1936 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
1937 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1938 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1939 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]]
1940 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1941 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
1942 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1943 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1944 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1945 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1946 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1947 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1948 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
1949 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
1950   return vtrn_u32(a, b);
1951 }
1952
1953 // CHECK-LABEL: @test_vtrn_f32(
1954 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1955 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1956 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1957 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1958 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1959 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1960 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1961 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1962 // CHECK:   store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]]
1963 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1964 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1965 // CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]]
1966 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1967 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
1968 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
1969 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1970 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1971 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1972 // CHECK:   store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8
1973 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1974 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
1975 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
1976   return vtrn_f32(a, b);
1977 }
1978
1979 // CHECK-LABEL: @test_vtrn_p8(
1980 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1981 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1982 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1983 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1984 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1985 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1986 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1987 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1988 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1989 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1990 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1991 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
1992 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
1993 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1994 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1995 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1996 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1997 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1998 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
1999 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
2000   return vtrn_p8(a, b);
2001 }
2002
2003 // CHECK-LABEL: @test_vtrn_p16(
2004 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
2005 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
2006 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
2007 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
2008 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2009 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2010 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
2011 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2012 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
2013 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
2014 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2015 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
2016 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
2017 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
2018 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 16, i32 8, i1 false) #2
2019 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
2020 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
2021 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
2022 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
2023 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
2024 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
2025 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
2026   return vtrn_p16(a, b);
2027 }
2028
2029 // CHECK-LABEL: @test_vtrnq_s8(
2030 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
2031 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
2032 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
2033 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
2034 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2035 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2036 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2037 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2038 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2039 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2040 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
2041 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
2042 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
2043 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
2044 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
2045 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
2046 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2047 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
2048 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
2049 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
2050   return vtrnq_s8(a, b);
2051 }
2052
2053 // CHECK-LABEL: @test_vtrnq_s16(
2054 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
2055 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
2056 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
2057 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
2058 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2059 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2060 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2061 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2062 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2063 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2064 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2065 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
2066 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
2067 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
2068 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
2069 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
2070 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
2071 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
2072 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
2073 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
2074 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
2075 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
2076   return vtrnq_s16(a, b);
2077 }
2078
2079 // CHECK-LABEL: @test_vtrnq_s32(
2080 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
2081 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
2082 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
2083 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
2084 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2085 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2086 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
2087 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2088 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
2089 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
2090 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2091 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]]
2092 // CHECK:   [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
2093 // CHECK:   [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
2094 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
2095 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
2096 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
2097 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
2098 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
2099 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
2100 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
2101 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
2102   return vtrnq_s32(a, b);
2103 }
2104
2105 // CHECK-LABEL: @test_vtrnq_u8(
2106 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
2107 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
2108 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
2109 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
2110 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2111 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2112 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2113 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2114 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2115 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2116 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
2117 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
2118 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
2119 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
2120 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
2121 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
2122 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2123 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
2124 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
2125 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
2126   return vtrnq_u8(a, b);
2127 }
2128
2129 // CHECK-LABEL: @test_vtrnq_u16(
2130 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
2131 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
2132 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
2133 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
2134 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2135 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2136 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2137 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2138 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2139 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2140 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2141 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
2142 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
2143 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
2144 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
2145 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
2146 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
2147 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
2148 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
2149 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
2150 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
2151 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
2152   return vtrnq_u16(a, b);
2153 }
2154
2155 // CHECK-LABEL: @test_vtrnq_u32(
2156 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
2157 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
2158 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
2159 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
2160 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2161 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2162 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
2163 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2164 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
2165 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
2166 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2167 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]]
2168 // CHECK:   [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
2169 // CHECK:   [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
2170 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
2171 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
2172 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
2173 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
2174 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
2175 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
2176 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
2177 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
2178   return vtrnq_u32(a, b);
2179 }
2180
2181 // CHECK-LABEL: @test_vtrnq_f32(
2182 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
2183 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
2184 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
2185 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
2186 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
2187 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
2188 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
2189 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
2190 // CHECK:   store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]]
2191 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
2192 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2193 // CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]]
2194 // CHECK:   [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
2195 // CHECK:   [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
2196 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
2197 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
2198 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
2199 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
2200 // CHECK:   store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16
2201 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
2202 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
2203 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
2204   return vtrnq_f32(a, b);
2205 }
2206
2207 // CHECK-LABEL: @test_vtrnq_p8(
2208 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
2209 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
2210 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
2211 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
2212 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2213 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2214 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2215 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2216 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2217 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2218 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
2219 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
2220 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
2221 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
2222 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
2223 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
2224 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2225 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
2226 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
2227 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
2228   return vtrnq_p8(a, b);
2229 }
2230
2231 // CHECK-LABEL: @test_vtrnq_p16(
2232 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
2233 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
2234 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
2235 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
2236 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2237 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2238 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2239 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2240 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2241 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2242 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2243 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
2244 // CHECK:   [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
2245 // CHECK:   [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
2246 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP5]], i8* [[TMP6]], i64 32, i32 16, i1 false) #2
2247 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
2248 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
2249 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
2250 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
2251 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
2252 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
2253 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
2254   return vtrnq_p16(a, b);
2255 }