Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
      2 // RUN:  -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
      3 
      4 // Test new aarch64 intrinsics and types
      5 #include <arm_neon.h>
      6 
      7 // CHECK-LABEL: define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
      8 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
      9 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
     10 int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
     11   return vuzp1_s8(a, b);
     12 }
     13 
     14 // CHECK-LABEL: define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
     15 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
     16 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
     17 int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
     18   return vuzp1q_s8(a, b);
     19 }
     20 
     21 // CHECK-LABEL: define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
     22 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
     23 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
     24 int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
     25   return vuzp1_s16(a, b);
     26 }
     27 
     28 // CHECK-LABEL: define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
     29 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
     30 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
     31 int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
     32   return vuzp1q_s16(a, b);
     33 }
     34 
     35 // CHECK-LABEL: define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
     36 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
     37 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
     38 int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
     39   return vuzp1_s32(a, b);
     40 }
     41 
     42 // CHECK-LABEL: define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
     43 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
     44 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
     45 int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
     46   return vuzp1q_s32(a, b);
     47 }
     48 
     49 // CHECK-LABEL: define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
     50 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
     51 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
     52 int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
     53   return vuzp1q_s64(a, b);
     54 }
     55 
     56 // CHECK-LABEL: define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
     57 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
     58 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
     59 uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
     60   return vuzp1_u8(a, b);
     61 }
     62 
     63 // CHECK-LABEL: define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
     64 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
     65 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
     66 uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
     67   return vuzp1q_u8(a, b);
     68 }
     69 
     70 // CHECK-LABEL: define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
     71 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
     72 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
     73 uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
     74   return vuzp1_u16(a, b);
     75 }
     76 
     77 // CHECK-LABEL: define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
     78 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
     79 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
     80 uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
     81   return vuzp1q_u16(a, b);
     82 }
     83 
     84 // CHECK-LABEL: define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
     85 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
     86 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
     87 uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
     88   return vuzp1_u32(a, b);
     89 }
     90 
     91 // CHECK-LABEL: define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
     92 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
     93 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
     94 uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
     95   return vuzp1q_u32(a, b);
     96 }
     97 
     98 // CHECK-LABEL: define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
     99 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    100 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    101 uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
    102   return vuzp1q_u64(a, b);
    103 }
    104 
    105 // CHECK-LABEL: define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) #0 {
    106 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
    107 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
    108 float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
    109   return vuzp1_f32(a, b);
    110 }
    111 
    112 // CHECK-LABEL: define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) #0 {
    113 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    114 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
    115 float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
    116   return vuzp1q_f32(a, b);
    117 }
    118 
    119 // CHECK-LABEL: define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) #0 {
    120 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
    121 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
    122 float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
    123   return vuzp1q_f64(a, b);
    124 }
    125 
    126 // CHECK-LABEL: define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
    127 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
    128 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    129 poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
    130   return vuzp1_p8(a, b);
    131 }
    132 
    133 // CHECK-LABEL: define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
    134 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
    135 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    136 poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
    137   return vuzp1q_p8(a, b);
    138 }
    139 
    140 // CHECK-LABEL: define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
    141 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    142 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    143 poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
    144   return vuzp1_p16(a, b);
    145 }
    146 
    147 // CHECK-LABEL: define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
    148 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
    149 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    150 poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
    151   return vuzp1q_p16(a, b);
    152 }
    153 
    154 // CHECK-LABEL: define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
    155 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    156 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    157 int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
    158   return vuzp2_s8(a, b);
    159 }
    160 
    161 // CHECK-LABEL: define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
    162 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
    163 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    164 int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
    165   return vuzp2q_s8(a, b);
    166 }
    167 
    168 // CHECK-LABEL: define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
    169 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    170 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    171 int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
    172   return vuzp2_s16(a, b);
    173 }
    174 
    175 // CHECK-LABEL: define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
    176 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    177 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    178 int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
    179   return vuzp2q_s16(a, b);
    180 }
    181 
    182 // CHECK-LABEL: define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
    183 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
    184 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    185 int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
    186   return vuzp2_s32(a, b);
    187 }
    188 
    189 // CHECK-LABEL: define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
    190 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    191 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    192 int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
    193   return vuzp2q_s32(a, b);
    194 }
    195 
    196 // CHECK-LABEL: define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
    197 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    198 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    199 int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
    200   return vuzp2q_s64(a, b);
    201 }
    202 
    203 // CHECK-LABEL: define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
    204 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    205 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    206 uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
    207   return vuzp2_u8(a, b);
    208 }
    209 
    210 // CHECK-LABEL: define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
    211 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
    212 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    213 uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
    214   return vuzp2q_u8(a, b);
    215 }
    216 
    217 // CHECK-LABEL: define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
    218 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    219 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    220 uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
    221   return vuzp2_u16(a, b);
    222 }
    223 
    224 // CHECK-LABEL: define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
    225 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    226 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    227 uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
    228   return vuzp2q_u16(a, b);
    229 }
    230 
    231 // CHECK-LABEL: define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
    232 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
    233 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    234 uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
    235   return vuzp2_u32(a, b);
    236 }
    237 
    238 // CHECK-LABEL: define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
    239 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    240 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    241 uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
    242   return vuzp2q_u32(a, b);
    243 }
    244 
    245 // CHECK-LABEL: define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
    246 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    247 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    248 uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
    249   return vuzp2q_u64(a, b);
    250 }
    251 
    252 // CHECK-LABEL: define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) #0 {
    253 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
    254 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
    255 float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
    256   return vuzp2_f32(a, b);
    257 }
    258 
    259 // CHECK-LABEL: define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) #0 {
    260 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    261 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
    262 float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
    263   return vuzp2q_f32(a, b);
    264 }
    265 
    266 // CHECK-LABEL: define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) #0 {
    267 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
    268 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
    269 float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
    270   return vuzp2q_f64(a, b);
    271 }
    272 
    273 // CHECK-LABEL: define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
    274 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    275 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    276 poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
    277   return vuzp2_p8(a, b);
    278 }
    279 
    280 // CHECK-LABEL: define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
    281 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
    282 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    283 poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
    284   return vuzp2q_p8(a, b);
    285 }
    286 
    287 // CHECK-LABEL: define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
    288 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    289 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    290 poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
    291   return vuzp2_p16(a, b);
    292 }
    293 
    294 // CHECK-LABEL: define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
    295 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    296 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    297 poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
    298   return vuzp2q_p16(a, b);
    299 }
    300 
    301 // CHECK-LABEL: define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
    302 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    303 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    304 int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
    305   return vzip1_s8(a, b);
    306 }
    307 
    308 // CHECK-LABEL: define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
    309 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
    310 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    311 int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
    312   return vzip1q_s8(a, b);
    313 }
    314 
    315 // CHECK-LABEL: define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
    316 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    317 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    318 int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
    319   return vzip1_s16(a, b);
    320 }
    321 
    322 // CHECK-LABEL: define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
    323 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    324 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    325 int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
    326   return vzip1q_s16(a, b);
    327 }
    328 
    329 // CHECK-LABEL: define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
    330 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
    331 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    332 int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
    333   return vzip1_s32(a, b);
    334 }
    335 
    336 // CHECK-LABEL: define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
    337 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    338 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    339 int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
    340   return vzip1q_s32(a, b);
    341 }
    342 
    343 // CHECK-LABEL: define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
    344 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    345 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    346 int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
    347   return vzip1q_s64(a, b);
    348 }
    349 
    350 // CHECK-LABEL: define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
    351 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    352 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    353 uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
    354   return vzip1_u8(a, b);
    355 }
    356 
    357 // CHECK-LABEL: define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
    358 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
    359 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    360 uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
    361   return vzip1q_u8(a, b);
    362 }
    363 
    364 // CHECK-LABEL: define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
    365 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    366 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    367 uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
    368   return vzip1_u16(a, b);
    369 }
    370 
    371 // CHECK-LABEL: define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
    372 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    373 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    374 uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
    375   return vzip1q_u16(a, b);
    376 }
    377 
    378 // CHECK-LABEL: define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
    379 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
    380 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    381 uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
    382   return vzip1_u32(a, b);
    383 }
    384 
    385 // CHECK-LABEL: define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
    386 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    387 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    388 uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
    389   return vzip1q_u32(a, b);
    390 }
    391 
    392 // CHECK-LABEL: define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
    393 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    394 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    395 uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
    396   return vzip1q_u64(a, b);
    397 }
    398 
    399 // CHECK-LABEL: define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) #0 {
    400 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
    401 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
    402 float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
    403   return vzip1_f32(a, b);
    404 }
    405 
    406 // CHECK-LABEL: define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) #0 {
    407 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    408 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
    409 float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
    410   return vzip1q_f32(a, b);
    411 }
    412 
    413 // CHECK-LABEL: define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) #0 {
    414 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
    415 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
    416 float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
    417   return vzip1q_f64(a, b);
    418 }
    419 
    420 // CHECK-LABEL: define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
    421 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    422 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    423 poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
    424   return vzip1_p8(a, b);
    425 }
    426 
    427 // CHECK-LABEL: define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
    428 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
    429 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    430 poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
    431   return vzip1q_p8(a, b);
    432 }
    433 
    434 // CHECK-LABEL: define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
    435 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    436 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    437 poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
    438   return vzip1_p16(a, b);
    439 }
    440 
    441 // CHECK-LABEL: define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
    442 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    443 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    444 poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
    445   return vzip1q_p16(a, b);
    446 }
    447 
    448 // CHECK-LABEL: define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
    449 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    450 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    451 int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
    452   return vzip2_s8(a, b);
    453 }
    454 
    455 // CHECK-LABEL: define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
    456 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    457 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    458 int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
    459   return vzip2q_s8(a, b);
    460 }
    461 
    462 // CHECK-LABEL: define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
    463 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    464 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    465 int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
    466   return vzip2_s16(a, b);
    467 }
    468 
    469 // CHECK-LABEL: define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
    470 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    471 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    472 int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
    473   return vzip2q_s16(a, b);
    474 }
    475 
    476 // CHECK-LABEL: define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
    477 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
    478 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    479 int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
    480   return vzip2_s32(a, b);
    481 }
    482 
    483 // CHECK-LABEL: define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
    484 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    485 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    486 int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
    487   return vzip2q_s32(a, b);
    488 }
    489 
    490 // CHECK-LABEL: define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
    491 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    492 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    493 int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
    494   return vzip2q_s64(a, b);
    495 }
    496 
    497 // CHECK-LABEL: define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
    498 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    499 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    500 uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
    501   return vzip2_u8(a, b);
    502 }
    503 
    504 // CHECK-LABEL: define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
    505 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    506 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    507 uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
    508   return vzip2q_u8(a, b);
    509 }
    510 
    511 // CHECK-LABEL: define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
    512 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    513 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    514 uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
    515   return vzip2_u16(a, b);
    516 }
    517 
    518 // CHECK-LABEL: define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
    519 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    520 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    521 uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
    522   return vzip2q_u16(a, b);
    523 }
    524 
    525 // CHECK-LABEL: define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
    526 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
    527 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    528 uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
    529   return vzip2_u32(a, b);
    530 }
    531 
    532 // CHECK-LABEL: define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
    533 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    534 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    535 uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
    536   return vzip2q_u32(a, b);
    537 }
    538 
    539 // CHECK-LABEL: define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
    540 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    541 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    542 uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
    543   return vzip2q_u64(a, b);
    544 }
    545 
    546 // CHECK-LABEL: define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) #0 {
    547 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
    548 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
    549 float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
    550   return vzip2_f32(a, b);
    551 }
    552 
    553 // CHECK-LABEL: define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) #0 {
    554 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    555 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
    556 float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
    557   return vzip2q_f32(a, b);
    558 }
    559 
    560 // CHECK-LABEL: define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) #0 {
    561 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
    562 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
    563 float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
    564   return vzip2q_f64(a, b);
    565 }
    566 
    567 // CHECK-LABEL: define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
    568 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    569 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    570 poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
    571   return vzip2_p8(a, b);
    572 }
    573 
    574 // CHECK-LABEL: define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
    575 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    576 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    577 poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
    578   return vzip2q_p8(a, b);
    579 }
    580 
    581 // CHECK-LABEL: define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
    582 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    583 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    584 poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
    585   return vzip2_p16(a, b);
    586 }
    587 
    588 // CHECK-LABEL: define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
    589 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    590 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    591 poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
    592   return vzip2q_p16(a, b);
    593 }
    594 
    595 // CHECK-LABEL: define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
    596 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
    597 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    598 int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
    599   return vtrn1_s8(a, b);
    600 }
    601 
    602 // CHECK-LABEL: define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
    603 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
    604 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    605 int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
    606   return vtrn1q_s8(a, b);
    607 }
    608 
    609 // CHECK-LABEL: define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) #0 {
    610 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    611 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    612 int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
    613   return vtrn1_s16(a, b);
    614 }
    615 
    616 // CHECK-LABEL: define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
    617 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
    618 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    619 int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
    620   return vtrn1q_s16(a, b);
    621 }
    622 
    623 // CHECK-LABEL: define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) #0 {
    624 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
    625 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    626 int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
    627   return vtrn1_s32(a, b);
    628 }
    629 
    630 // CHECK-LABEL: define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
    631 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    632 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    633 int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
    634   return vtrn1q_s32(a, b);
    635 }
    636 
    637 // CHECK-LABEL: define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
    638 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    639 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    640 int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
    641   return vtrn1q_s64(a, b);
    642 }
    643 
    644 // CHECK-LABEL: define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
    645 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
    646 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    647 uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
    648   return vtrn1_u8(a, b);
    649 }
    650 
    651 // CHECK-LABEL: define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
    652 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
    653 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    654 uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
    655   return vtrn1q_u8(a, b);
    656 }
    657 
    658 // CHECK-LABEL: define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) #0 {
    659 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    660 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    661 uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
    662   return vtrn1_u16(a, b);
    663 }
    664 
    665 // CHECK-LABEL: define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
    666 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
    667 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    668 uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
    669   return vtrn1q_u16(a, b);
    670 }
    671 
    672 // CHECK-LABEL: define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) #0 {
    673 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
    674 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    675 uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
    676   return vtrn1_u32(a, b);
    677 }
    678 
    679 // CHECK-LABEL: define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
    680 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    681 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    682 uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
    683   return vtrn1q_u32(a, b);
    684 }
    685 
    686 // CHECK-LABEL: define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
    687 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
    688 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    689 uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
    690   return vtrn1q_u64(a, b);
    691 }
    692 
    693 // CHECK-LABEL: define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) #0 {
    694 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
    695 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
    696 float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
    697   return vtrn1_f32(a, b);
    698 }
    699 
    700 // CHECK-LABEL: define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) #0 {
    701 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    702 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
    703 float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
    704   return vtrn1q_f32(a, b);
    705 }
    706 
    707 // CHECK-LABEL: define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) #0 {
    708 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
    709 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
    710 float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
    711   return vtrn1q_f64(a, b);
    712 }
    713 
    714 // CHECK-LABEL: define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
    715 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
    716 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    717 poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
    718   return vtrn1_p8(a, b);
    719 }
    720 
    721 // CHECK-LABEL: define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
    722 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
    723 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    724 poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
    725   return vtrn1q_p8(a, b);
    726 }
    727 
    728 // CHECK-LABEL: define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) #0 {
    729 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    730 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    731 poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
    732   return vtrn1_p16(a, b);
    733 }
    734 
    735 // CHECK-LABEL: define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
    736 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
    737 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    738 poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
    739   return vtrn1q_p16(a, b);
    740 }
    741 
    742 // CHECK-LABEL: define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) #0 {
    743 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
    744 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    745 int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
    746   return vtrn2_s8(a, b);
    747 }
    748 
    749 // CHECK-LABEL: define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
    750 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
    751 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    752 int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
    753   return vtrn2q_s8(a, b);
    754 }
    755 
    756 // CHECK-LABEL: define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) #0 {
    757 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    758 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    759 int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
    760   return vtrn2_s16(a, b);
    761 }
    762 
    763 // CHECK-LABEL: define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) #0 {
    764 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
    765 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    766 int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
    767   return vtrn2q_s16(a, b);
    768 }
    769 
    770 // CHECK-LABEL: define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) #0 {
    771 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
    772 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    773 int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
    774   return vtrn2_s32(a, b);
    775 }
    776 
    777 // CHECK-LABEL: define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) #0 {
    778 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    779 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    780 int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
    781   return vtrn2q_s32(a, b);
    782 }
    783 
    784 // CHECK-LABEL: define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) #0 {
    785 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    786 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    787 int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
    788   return vtrn2q_s64(a, b);
    789 }
    790 
    791 // CHECK-LABEL: define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) #0 {
    792 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
    793 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    794 uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
    795   return vtrn2_u8(a, b);
    796 }
    797 
    798 // CHECK-LABEL: define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
    799 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
    800 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    801 uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
    802   return vtrn2q_u8(a, b);
    803 }
    804 
    805 // CHECK-LABEL: define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) #0 {
    806 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    807 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    808 uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
    809   return vtrn2_u16(a, b);
    810 }
    811 
    812 // CHECK-LABEL: define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) #0 {
    813 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
    814 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    815 uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
    816   return vtrn2q_u16(a, b);
    817 }
    818 
    819 // CHECK-LABEL: define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) #0 {
    820 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
    821 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    822 uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
    823   return vtrn2_u32(a, b);
    824 }
    825 
    826 // CHECK-LABEL: define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) #0 {
    827 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    828 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    829 uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
    830   return vtrn2q_u32(a, b);
    831 }
    832 
    833 // CHECK-LABEL: define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) #0 {
    834 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
    835 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
    836 uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
    837   return vtrn2q_u64(a, b);
    838 }
    839 
    840 // CHECK-LABEL: define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) #0 {
    841 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
    842 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
    843 float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
    844   return vtrn2_f32(a, b);
    845 }
    846 
    847 // CHECK-LABEL: define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) #0 {
    848 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    849 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
    850 float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
    851   return vtrn2q_f32(a, b);
    852 }
    853 
    854 // CHECK-LABEL: define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) #0 {
    855 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
    856 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
    857 float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
    858   return vtrn2q_f64(a, b);
    859 }
    860 
    861 // CHECK-LABEL: define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) #0 {
    862 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
    863 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    864 poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
    865   return vtrn2_p8(a, b);
    866 }
    867 
    868 // CHECK-LABEL: define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
    869 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
    870 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    871 poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
    872   return vtrn2q_p8(a, b);
    873 }
    874 
    875 // CHECK-LABEL: define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) #0 {
    876 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    877 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    878 poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
    879   return vtrn2_p16(a, b);
    880 }
    881 
    882 // CHECK-LABEL: define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) #0 {
    883 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
    884 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    885 poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
    886   return vtrn2q_p16(a, b);
    887 }
    888 
    889 // CHECK-LABEL: define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) #0 {
    890 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
    891 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
    892 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
    893 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
    894 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
    895 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
    896 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
    897 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
    898 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    899 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
    900 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
    901 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
    902 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
    903 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
    904 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
    905 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
    906 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
    907 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
    908 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
    909 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
    910   return vuzp_s8(a, b);
    911 }
    912 
    913 // CHECK-LABEL: define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) #0 {
    914 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
    915 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
    916 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
    917 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
    918 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    919 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
    920 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
    921 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
    922 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
    923 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    924 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
    925 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
    926 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    927 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
    928 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
    929 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
    930 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
    931 // CHECK:   [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
    932 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
    933 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
    934 // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
    935 // CHECK:   [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
    936 // CHECK:   ret %struct.int16x4x2_t [[TMP12]]
    937 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
    938   return vuzp_s16(a, b);
    939 }
    940 // CHECK-LABEL: define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) #0 {
    941 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
    942 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
    943 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
    944 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
    945 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
    946 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
    947 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
    948 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
    949 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
    950 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
    951 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
    952 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
    953 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
    954 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
    955 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
    956 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
    957 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
    958 // CHECK:   [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
    959 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
    960 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
    961 // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
    962 // CHECK:   [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
    963 // CHECK:   ret %struct.int32x2x2_t [[TMP12]]
    964 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
    965   return vuzp_s32(a, b);
    966 }
    967 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) #0 {
    968 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
    969 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
    970 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
    971 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
    972 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
    973 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
    974 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
    975 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
    976 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    977 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
    978 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
    979 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
    980 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
    981 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
    982 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
    983 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
    984 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
    985 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
    986 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
    987 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
    988   return vuzp_u8(a, b);
    989 }
    990 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) #0 {
    991 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
    992 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
    993 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
    994 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
    995 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    996 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
    997 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
    998 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
    999 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
   1000 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   1001 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
   1002 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
   1003 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
   1004 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
   1005 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
   1006 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
   1007 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1008 // CHECK:   [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
   1009 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
   1010 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
   1011 // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
   1012 // CHECK:   [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
   1013 // CHECK:   ret %struct.uint16x4x2_t [[TMP12]]
   1014 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
   1015   return vuzp_u16(a, b);
   1016 }
   1017 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) #0 {
   1018 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
   1019 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
   1020 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
   1021 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
   1022 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1023 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
   1024 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
   1025 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
   1026 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
   1027 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
   1028 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
   1029 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
   1030 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
   1031 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]]
   1032 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
   1033 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
   1034 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1035 // CHECK:   [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
   1036 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
   1037 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
   1038 // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
   1039 // CHECK:   [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
   1040 // CHECK:   ret %struct.uint32x2x2_t [[TMP12]]
   1041 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
   1042   return vuzp_u32(a, b);
   1043 }
   1044 // CHECK-LABEL: define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) #0 {
   1045 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
   1046 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
   1047 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
   1048 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
   1049 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
   1050 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
   1051 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
   1052 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
   1053 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
   1054 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
   1055 // CHECK:   store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]]
   1056 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
   1057 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
   1058 // CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP6]]
   1059 // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
   1060 // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
   1061 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1062 // CHECK:   [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
   1063 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
   1064 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
   1065 // CHECK:   store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
   1066 // CHECK:   [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
   1067 // CHECK:   ret %struct.float32x2x2_t [[TMP12]]
   1068 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
   1069   return vuzp_f32(a, b);
   1070 }
   1071 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) #0 {
   1072 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
   1073 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
   1074 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
   1075 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
   1076 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
   1077 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   1078 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
   1079 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
   1080 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   1081 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
   1082 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
   1083 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
   1084 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
   1085 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
   1086 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
   1087 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
   1088 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
   1089 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
   1090 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
   1091 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
   1092   return vuzp_p8(a, b);
   1093 }
   1094 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) #0 {
   1095 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
   1096 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
   1097 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
   1098 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
   1099 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1100 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   1101 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
   1102 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   1103 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
   1104 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   1105 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
   1106 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
   1107 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
   1108 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]]
   1109 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
   1110 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
   1111 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1112 // CHECK:   [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
   1113 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
   1114 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
   1115 // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
   1116 // CHECK:   [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
   1117 // CHECK:   ret %struct.poly16x4x2_t [[TMP12]]
   1118 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
   1119   return vuzp_p16(a, b);
   1120 }
   1121 // CHECK-LABEL: define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
   1122 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
   1123 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
   1124 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
   1125 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
   1126 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
   1127 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   1128 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
   1129 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
   1130 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
   1131 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
   1132 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
   1133 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
   1134 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
   1135 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
   1136 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
   1137 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
   1138 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
   1139 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
   1140 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
   1141 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
   1142   return vuzpq_s8(a, b);
   1143 }
   1144 // CHECK-LABEL: define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
   1145 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
   1146 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
   1147 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
   1148 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
   1149 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1150 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1151 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
   1152 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   1153 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
   1154 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   1155 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
   1156 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
   1157 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   1158 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
   1159 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
   1160 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
   1161 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1162 // CHECK:   [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
   1163 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
   1164 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
   1165 // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
   1166 // CHECK:   [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
   1167 // CHECK:   ret %struct.int16x8x2_t [[TMP12]]
   1168 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
   1169   return vuzpq_s16(a, b);
   1170 }
   1171 // CHECK-LABEL: define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
   1172 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
   1173 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
   1174 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
   1175 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
   1176 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1177 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   1178 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
   1179 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
   1180 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
   1181 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   1182 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
   1183 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
   1184 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
   1185 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
   1186 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
   1187 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
   1188 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1189 // CHECK:   [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
   1190 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
   1191 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
   1192 // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
   1193 // CHECK:   [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
   1194 // CHECK:   ret %struct.int32x4x2_t [[TMP12]]
   1195 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
   1196   return vuzpq_s32(a, b);
   1197 }
   1198 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
   1199 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
   1200 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
   1201 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
   1202 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
   1203 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
   1204 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   1205 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
   1206 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
   1207 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
   1208 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
   1209 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
   1210 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
   1211 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
   1212 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
   1213 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
   1214 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
   1215 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
   1216 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
   1217 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
   1218 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
   1219   return vuzpq_u8(a, b);
   1220 }
   1221 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
   1222 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
   1223 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
   1224 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
   1225 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
   1226 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1227 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1228 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
   1229 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   1230 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
   1231 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   1232 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
   1233 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
   1234 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   1235 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
   1236 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
   1237 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
   1238 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1239 // CHECK:   [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
   1240 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
   1241 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
   1242 // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
   1243 // CHECK:   [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
   1244 // CHECK:   ret %struct.uint16x8x2_t [[TMP12]]
   1245 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
   1246   return vuzpq_u16(a, b);
   1247 }
   1248 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
   1249 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
   1250 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
   1251 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
   1252 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
   1253 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1254 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   1255 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
   1256 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
   1257 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
   1258 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   1259 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
   1260 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
   1261 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
   1262 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]]
   1263 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
   1264 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
   1265 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1266 // CHECK:   [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
   1267 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
   1268 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
   1269 // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
   1270 // CHECK:   [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
   1271 // CHECK:   ret %struct.uint32x4x2_t [[TMP12]]
   1272 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
   1273   return vuzpq_u32(a, b);
   1274 }
   1275 // CHECK-LABEL: define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) #0 {
   1276 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
   1277 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
   1278 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
   1279 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
   1280 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
   1281 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
   1282 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
   1283 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
   1284 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
   1285 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   1286 // CHECK:   store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]]
   1287 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
   1288 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
   1289 // CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP6]]
   1290 // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
   1291 // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
   1292 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1293 // CHECK:   [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
   1294 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
   1295 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
   1296 // CHECK:   store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
   1297 // CHECK:   [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
   1298 // CHECK:   ret %struct.float32x4x2_t [[TMP12]]
   1299 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
   1300   return vuzpq_f32(a, b);
   1301 }
   1302 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
   1303 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
   1304 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
   1305 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
   1306 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
   1307 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
   1308 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   1309 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
   1310 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
   1311 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
   1312 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
   1313 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
   1314 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
   1315 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
   1316 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
   1317 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
   1318 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
   1319 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
   1320 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
   1321 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
   1322 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
   1323   return vuzpq_p8(a, b);
   1324 }
   1325 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
   1326 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
   1327 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
   1328 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
   1329 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
   1330 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1331 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1332 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
   1333 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   1334 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
   1335 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   1336 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
   1337 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
   1338 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   1339 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]]
   1340 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
   1341 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
   1342 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1343 // CHECK:   [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
   1344 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
   1345 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
   1346 // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
   1347 // CHECK:   [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
   1348 // CHECK:   ret %struct.poly16x8x2_t [[TMP12]]
   1349 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
   1350   return vuzpq_p16(a, b);
   1351 }
   1352 
   1353 // CHECK-LABEL: define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) #0 {
   1354 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
   1355 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
   1356 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
   1357 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
   1358 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
   1359 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1360 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
   1361 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
   1362 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   1363 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
   1364 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
   1365 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
   1366 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
   1367 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
   1368 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
   1369 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
   1370 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
   1371 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
   1372 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
   1373 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
   1374   return vzip_s8(a, b);
   1375 }
   1376 
   1377 // CHECK-LABEL: define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) #0 {
   1378 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
   1379 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
   1380 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
   1381 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
   1382 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1383 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   1384 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
   1385 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   1386 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
   1387 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   1388 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
   1389 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
   1390 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   1391 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
   1392 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
   1393 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
   1394 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1395 // CHECK:   [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
   1396 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
   1397 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
   1398 // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
   1399 // CHECK:   [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
   1400 // CHECK:   ret %struct.int16x4x2_t [[TMP12]]
   1401 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
   1402   return vzip_s16(a, b);
   1403 }
   1404 // CHECK-LABEL: define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) #0 {
   1405 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
   1406 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
   1407 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
   1408 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
   1409 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1410 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
   1411 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
   1412 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
   1413 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
   1414 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
   1415 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
   1416 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
   1417 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
   1418 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
   1419 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
   1420 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
   1421 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1422 // CHECK:   [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
   1423 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
   1424 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
   1425 // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
   1426 // CHECK:   [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
   1427 // CHECK:   ret %struct.int32x2x2_t [[TMP12]]
   1428 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
   1429   return vzip_s32(a, b);
   1430 }
   1431 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) #0 {
   1432 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
   1433 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
   1434 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
   1435 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
   1436 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
   1437 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1438 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
   1439 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
   1440 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   1441 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
   1442 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
   1443 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
   1444 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
   1445 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
   1446 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
   1447 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
   1448 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
   1449 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
   1450 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
   1451 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
   1452   return vzip_u8(a, b);
   1453 }
   1454 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) #0 {
   1455 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
   1456 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
   1457 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
   1458 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
   1459 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1460 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   1461 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
   1462 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   1463 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
   1464 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   1465 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
   1466 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
   1467 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   1468 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
   1469 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
   1470 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
   1471 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1472 // CHECK:   [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
   1473 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
   1474 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
   1475 // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
   1476 // CHECK:   [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
   1477 // CHECK:   ret %struct.uint16x4x2_t [[TMP12]]
   1478 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
   1479   return vzip_u16(a, b);
   1480 }
   1481 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) #0 {
   1482 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
   1483 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
   1484 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
   1485 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
   1486 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1487 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
   1488 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
   1489 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
   1490 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
   1491 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
   1492 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
   1493 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
   1494 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
   1495 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]]
   1496 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
   1497 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
   1498 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1499 // CHECK:   [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
   1500 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
   1501 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
   1502 // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
   1503 // CHECK:   [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
   1504 // CHECK:   ret %struct.uint32x2x2_t [[TMP12]]
   1505 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
   1506   return vzip_u32(a, b);
   1507 }
   1508 // CHECK-LABEL: define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) #0 {
   1509 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
   1510 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
   1511 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
   1512 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
   1513 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
   1514 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
   1515 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
   1516 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
   1517 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
   1518 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
   1519 // CHECK:   store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]]
   1520 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
   1521 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
   1522 // CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP6]]
   1523 // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
   1524 // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
   1525 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1526 // CHECK:   [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
   1527 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
   1528 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
   1529 // CHECK:   store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
   1530 // CHECK:   [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
   1531 // CHECK:   ret %struct.float32x2x2_t [[TMP12]]
   1532 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
   1533   return vzip_f32(a, b);
   1534 }
   1535 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) #0 {
   1536 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
   1537 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
   1538 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
   1539 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
   1540 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
   1541 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1542 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
   1543 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
   1544 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   1545 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
   1546 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
   1547 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
   1548 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
   1549 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
   1550 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
   1551 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
   1552 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
   1553 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
   1554 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
   1555 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
   1556   return vzip_p8(a, b);
   1557 }
   1558 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) #0 {
   1559 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
   1560 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
   1561 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
   1562 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
   1563 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1564 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   1565 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
   1566 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   1567 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
   1568 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   1569 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
   1570 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
   1571 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   1572 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]]
   1573 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
   1574 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
   1575 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1576 // CHECK:   [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
   1577 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
   1578 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
   1579 // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
   1580 // CHECK:   [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
   1581 // CHECK:   ret %struct.poly16x4x2_t [[TMP12]]
   1582 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
   1583   return vzip_p16(a, b);
   1584 }
   1585 // CHECK-LABEL: define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
   1586 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
   1587 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
   1588 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
   1589 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
   1590 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
   1591 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   1592 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
   1593 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
   1594 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   1595 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
   1596 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
   1597 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
   1598 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
   1599 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
   1600 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
   1601 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
   1602 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
   1603 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
   1604 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
   1605 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
   1606   return vzipq_s8(a, b);
   1607 }
   1608 // CHECK-LABEL: define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
   1609 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
   1610 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
   1611 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
   1612 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
   1613 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1614 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1615 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
   1616 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   1617 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
   1618 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1619 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
   1620 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
   1621 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   1622 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
   1623 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
   1624 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
   1625 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1626 // CHECK:   [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
   1627 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
   1628 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
   1629 // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
   1630 // CHECK:   [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
   1631 // CHECK:   ret %struct.int16x8x2_t [[TMP12]]
   1632 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
   1633   return vzipq_s16(a, b);
   1634 }
   1635 // CHECK-LABEL: define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
   1636 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
   1637 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
   1638 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
   1639 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
   1640 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1641 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   1642 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
   1643 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
   1644 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
   1645 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   1646 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
   1647 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
   1648 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   1649 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
   1650 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
   1651 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
   1652 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1653 // CHECK:   [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
   1654 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
   1655 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
   1656 // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
   1657 // CHECK:   [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
   1658 // CHECK:   ret %struct.int32x4x2_t [[TMP12]]
   1659 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
   1660   return vzipq_s32(a, b);
   1661 }
   1662 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
   1663 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
   1664 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
   1665 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
   1666 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
   1667 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
   1668 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   1669 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
   1670 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
   1671 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   1672 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
   1673 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
   1674 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
   1675 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
   1676 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
   1677 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
   1678 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
   1679 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
   1680 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
   1681 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
   1682 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
   1683   return vzipq_u8(a, b);
   1684 }
   1685 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
   1686 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
   1687 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
   1688 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
   1689 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
   1690 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1691 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1692 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
   1693 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   1694 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
   1695 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1696 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
   1697 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
   1698 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   1699 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
   1700 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
   1701 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
   1702 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1703 // CHECK:   [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
   1704 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
   1705 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
   1706 // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
   1707 // CHECK:   [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
   1708 // CHECK:   ret %struct.uint16x8x2_t [[TMP12]]
   1709 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
   1710   return vzipq_u16(a, b);
   1711 }
   1712 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
   1713 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
   1714 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
   1715 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
   1716 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
   1717 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1718 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   1719 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
   1720 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
   1721 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
   1722 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   1723 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
   1724 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
   1725 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   1726 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]]
   1727 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
   1728 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
   1729 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1730 // CHECK:   [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
   1731 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
   1732 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
   1733 // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
   1734 // CHECK:   [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
   1735 // CHECK:   ret %struct.uint32x4x2_t [[TMP12]]
   1736 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
   1737   return vzipq_u32(a, b);
   1738 }
   1739 // CHECK-LABEL: define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) #0 {
   1740 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
   1741 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
   1742 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
   1743 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
   1744 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
   1745 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
   1746 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
   1747 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
   1748 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
   1749 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   1750 // CHECK:   store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]]
   1751 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
   1752 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   1753 // CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP6]]
   1754 // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
   1755 // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
   1756 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1757 // CHECK:   [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
   1758 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
   1759 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
   1760 // CHECK:   store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
   1761 // CHECK:   [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
   1762 // CHECK:   ret %struct.float32x4x2_t [[TMP12]]
   1763 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
   1764   return vzipq_f32(a, b);
   1765 }
   1766 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
   1767 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
   1768 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
   1769 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
   1770 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
   1771 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
   1772 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   1773 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
   1774 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
   1775 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   1776 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
   1777 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
   1778 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
   1779 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
   1780 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
   1781 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
   1782 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
   1783 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
   1784 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
   1785 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
   1786 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
   1787   return vzipq_p8(a, b);
   1788 }
   1789 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
   1790 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
   1791 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
   1792 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
   1793 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
   1794 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1795 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1796 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
   1797 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   1798 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
   1799 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1800 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
   1801 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
   1802 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   1803 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]]
   1804 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
   1805 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
   1806 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   1807 // CHECK:   [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
   1808 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
   1809 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
   1810 // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
   1811 // CHECK:   [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
   1812 // CHECK:   ret %struct.poly16x8x2_t [[TMP12]]
   1813 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
   1814   return vzipq_p16(a, b);
   1815 }
   1816 
   1817 // CHECK-LABEL: define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) #0 {
   1818 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
   1819 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8
   1820 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
   1821 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
   1822 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
   1823 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   1824 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
   1825 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
   1826 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   1827 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
   1828 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
   1829 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8*
   1830 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
   1831 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
   1832 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
   1833 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
   1834 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
   1835 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
   1836 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
   1837 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
   1838   return vtrn_s8(a, b);
   1839 }
   1840 
   1841 // CHECK-LABEL: define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) #0 {
   1842 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
   1843 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8
   1844 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
   1845 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
   1846 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1847 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   1848 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
   1849 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   1850 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
   1851 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   1852 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
   1853 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
   1854 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   1855 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
   1856 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
   1857 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8*
   1858 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1859 // CHECK:   [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
   1860 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
   1861 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0
   1862 // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
   1863 // CHECK:   [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
   1864 // CHECK:   ret %struct.int16x4x2_t [[TMP12]]
   1865 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
   1866   return vtrn_s16(a, b);
   1867 }
   1868 // CHECK-LABEL: define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) #0 {
   1869 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
   1870 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8
   1871 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
   1872 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
   1873 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1874 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
   1875 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
   1876 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
   1877 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
   1878 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
   1879 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
   1880 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
   1881 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
   1882 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
   1883 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
   1884 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8*
   1885 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1886 // CHECK:   [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
   1887 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
   1888 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0
   1889 // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
   1890 // CHECK:   [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
   1891 // CHECK:   ret %struct.int32x2x2_t [[TMP12]]
   1892 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
   1893   return vtrn_s32(a, b);
   1894 }
   1895 // CHECK-LABEL: define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) #0 {
   1896 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
   1897 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
   1898 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
   1899 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
   1900 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
   1901 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   1902 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
   1903 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
   1904 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   1905 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
   1906 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
   1907 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8*
   1908 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
   1909 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
   1910 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
   1911 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
   1912 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
   1913 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
   1914 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
   1915 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
   1916   return vtrn_u8(a, b);
   1917 }
   1918 // CHECK-LABEL: define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) #0 {
   1919 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
   1920 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
   1921 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
   1922 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
   1923 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1924 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   1925 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
   1926 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   1927 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
   1928 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   1929 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
   1930 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
   1931 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   1932 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
   1933 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
   1934 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8*
   1935 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1936 // CHECK:   [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
   1937 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
   1938 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0
   1939 // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
   1940 // CHECK:   [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
   1941 // CHECK:   ret %struct.uint16x4x2_t [[TMP12]]
   1942 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
   1943   return vtrn_u16(a, b);
   1944 }
   1945 // CHECK-LABEL: define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) #0 {
   1946 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
   1947 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
   1948 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
   1949 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
   1950 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1951 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
   1952 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
   1953 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
   1954 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
   1955 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 0, i32 2>
   1956 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
   1957 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
   1958 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 3>
   1959 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]]
   1960 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
   1961 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8*
   1962 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1963 // CHECK:   [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
   1964 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
   1965 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0
   1966 // CHECK:   store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8
   1967 // CHECK:   [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
   1968 // CHECK:   ret %struct.uint32x2x2_t [[TMP12]]
   1969 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
   1970   return vtrn_u32(a, b);
   1971 }
   1972 // CHECK-LABEL: define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) #0 {
   1973 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
   1974 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8
   1975 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
   1976 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
   1977 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
   1978 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
   1979 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
   1980 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
   1981 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
   1982 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
   1983 // CHECK:   store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]]
   1984 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
   1985 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3>
   1986 // CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP6]]
   1987 // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
   1988 // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8*
   1989 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   1990 // CHECK:   [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
   1991 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
   1992 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0
   1993 // CHECK:   store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8
   1994 // CHECK:   [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
   1995 // CHECK:   ret %struct.float32x2x2_t [[TMP12]]
   1996 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
   1997   return vtrn_f32(a, b);
   1998 }
   1999 // CHECK-LABEL: define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) #0 {
   2000 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
   2001 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
   2002 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
   2003 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
   2004 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
   2005 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   2006 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
   2007 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
   2008 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   2009 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
   2010 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
   2011 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8*
   2012 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 16, i32 8, i1 false) #2
   2013 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
   2014 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
   2015 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
   2016 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
   2017 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
   2018 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
   2019 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
   2020   return vtrn_p8(a, b);
   2021 }
   2022 // CHECK-LABEL: define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) #0 {
   2023 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
   2024 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
   2025 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
   2026 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
   2027 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   2028 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   2029 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
   2030 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   2031 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
   2032 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   2033 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
   2034 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
   2035 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   2036 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]]
   2037 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
   2038 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8*
   2039 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 16, i32 8, i1 false) #2
   2040 // CHECK:   [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
   2041 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
   2042 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0
   2043 // CHECK:   store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8
   2044 // CHECK:   [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
   2045 // CHECK:   ret %struct.poly16x4x2_t [[TMP12]]
   2046 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
   2047   return vtrn_p16(a, b);
   2048 }
   2049 // CHECK-LABEL: define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
   2050 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
   2051 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16
   2052 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
   2053 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
   2054 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
   2055 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
   2056 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
   2057 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
   2058 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
   2059 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
   2060 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
   2061 // CHECK:   [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8*
   2062 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
   2063 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
   2064 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
   2065 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
   2066 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
   2067 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
   2068 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
   2069 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
   2070   return vtrnq_s8(a, b);
   2071 }
   2072 // CHECK-LABEL: define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
   2073 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
   2074 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16
   2075 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
   2076 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
   2077 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   2078 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   2079 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
   2080 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   2081 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
   2082 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   2083 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
   2084 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
   2085 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   2086 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
   2087 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
   2088 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8*
   2089 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   2090 // CHECK:   [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
   2091 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
   2092 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0
   2093 // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
   2094 // CHECK:   [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
   2095 // CHECK:   ret %struct.int16x8x2_t [[TMP12]]
   2096 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
   2097   return vtrnq_s16(a, b);
   2098 }
   2099 // CHECK-LABEL: define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
   2100 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
   2101 // CHECK:   [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16
   2102 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
   2103 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
   2104 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   2105 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   2106 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
   2107 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
   2108 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
   2109 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   2110 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
   2111 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
   2112 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   2113 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
   2114 // CHECK:   [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
   2115 // CHECK:   [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8*
   2116 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   2117 // CHECK:   [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
   2118 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
   2119 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0
   2120 // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
   2121 // CHECK:   [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
   2122 // CHECK:   ret %struct.int32x4x2_t [[TMP12]]
   2123 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
   2124   return vtrnq_s32(a, b);
   2125 }
   2126 // CHECK-LABEL: define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
   2127 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
   2128 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
   2129 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
   2130 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
   2131 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
   2132 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
   2133 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
   2134 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
   2135 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
   2136 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
   2137 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
   2138 // CHECK:   [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8*
   2139 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
   2140 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
   2141 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
   2142 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
   2143 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
   2144 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
   2145 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
   2146 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
   2147   return vtrnq_u8(a, b);
   2148 }
   2149 // CHECK-LABEL: define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
   2150 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
   2151 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
   2152 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
   2153 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
   2154 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   2155 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   2156 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
   2157 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   2158 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
   2159 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   2160 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
   2161 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
   2162 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   2163 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
   2164 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
   2165 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8*
   2166 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   2167 // CHECK:   [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
   2168 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
   2169 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0
   2170 // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
   2171 // CHECK:   [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
   2172 // CHECK:   ret %struct.uint16x8x2_t [[TMP12]]
   2173 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
   2174   return vtrnq_u16(a, b);
   2175 }
   2176 // CHECK-LABEL: define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
   2177 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
   2178 // CHECK:   [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
   2179 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
   2180 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
   2181 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   2182 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   2183 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
   2184 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
   2185 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
   2186 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   2187 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
   2188 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
   2189 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   2190 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]]
   2191 // CHECK:   [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
   2192 // CHECK:   [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8*
   2193 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   2194 // CHECK:   [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
   2195 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
   2196 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0
   2197 // CHECK:   store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16
   2198 // CHECK:   [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
   2199 // CHECK:   ret %struct.uint32x4x2_t [[TMP12]]
   2200 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
   2201   return vtrnq_u32(a, b);
   2202 }
   2203 // CHECK-LABEL: define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) #0 {
   2204 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
   2205 // CHECK:   [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16
   2206 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
   2207 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
   2208 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2209 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
   2210 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
   2211 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
   2212 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
   2213 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   2214 // CHECK:   store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]]
   2215 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
   2216 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   2217 // CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP6]]
   2218 // CHECK:   [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
   2219 // CHECK:   [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8*
   2220 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   2221 // CHECK:   [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
   2222 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
   2223 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0
   2224 // CHECK:   store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16
   2225 // CHECK:   [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
   2226 // CHECK:   ret %struct.float32x4x2_t [[TMP12]]
   2227 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
   2228   return vtrnq_f32(a, b);
   2229 }
   2230 // CHECK-LABEL: define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) #0 {
   2231 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
   2232 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
   2233 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
   2234 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
   2235 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
   2236 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
   2237 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
   2238 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
   2239 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
   2240 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
   2241 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
   2242 // CHECK:   [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8*
   2243 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP3]], i8* [[TMP4]], i64 32, i32 16, i1 false) #2
   2244 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
   2245 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
   2246 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
   2247 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
   2248 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
   2249 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
   2250 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
   2251   return vtrnq_p8(a, b);
   2252 }
   2253 // CHECK-LABEL: define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) #0 {
   2254 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
   2255 // CHECK:   [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
   2256 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
   2257 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
   2258 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   2259 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   2260 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
   2261 // CHECK:   [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   2262 // CHECK:   [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
   2263 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   2264 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
   2265 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
   2266 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   2267 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]]
   2268 // CHECK:   [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
   2269 // CHECK:   [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8*
   2270 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP7]], i8* [[TMP8]], i64 32, i32 16, i1 false) #2
   2271 // CHECK:   [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
   2272 // CHECK:   [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
   2273 // CHECK:   [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0
   2274 // CHECK:   store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16
   2275 // CHECK:   [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
   2276 // CHECK:   ret %struct.poly16x8x2_t [[TMP12]]
   2277 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
   2278   return vtrnq_p16(a, b);
   2279 }
   2280