Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
      2 // RUN:  -fallow-half-arguments-and-returns -emit-llvm -o - %s \
      3 // RUN: | opt -S -mem2reg | FileCheck %s
      4 
      5 // Test new aarch64 intrinsics and types
      6 
      7 #include <arm_neon.h>
      8 
      9 // CHECK-LABEL: define <8 x i8> @test_vceqz_s8(<8 x i8> %a) #0 {
     10 // CHECK:   [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer
     11 // CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
     12 // CHECK:   ret <8 x i8> [[VCEQZ_I]]
     13 uint8x8_t test_vceqz_s8(int8x8_t a) {
     14   return vceqz_s8(a);
     15 }
     16 
     17 // CHECK-LABEL: define <4 x i16> @test_vceqz_s16(<4 x i16> %a) #0 {
     18 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
     19 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
     20 // CHECK:   [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
     21 // CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
     22 // CHECK:   ret <4 x i16> [[VCEQZ_I]]
     23 uint16x4_t test_vceqz_s16(int16x4_t a) {
     24   return vceqz_s16(a);
     25 }
     26 
     27 // CHECK-LABEL: define <2 x i32> @test_vceqz_s32(<2 x i32> %a) #0 {
     28 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
     29 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
     30 // CHECK:   [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
     31 // CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
     32 // CHECK:   ret <2 x i32> [[VCEQZ_I]]
     33 uint32x2_t test_vceqz_s32(int32x2_t a) {
     34   return vceqz_s32(a);
     35 }
     36 
     37 // CHECK-LABEL: define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 {
     38 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
     39 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
     40 // CHECK:   [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
     41 // CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
     42 // CHECK:   ret <1 x i64> [[VCEQZ_I]]
     43 uint64x1_t test_vceqz_s64(int64x1_t a) {
     44   return vceqz_s64(a);
     45 }
     46 
     47 // CHECK-LABEL: define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 {
     48 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
     49 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
     50 // CHECK:   [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
     51 // CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
     52 // CHECK:   ret <1 x i64> [[VCEQZ_I]]
     53 uint64x1_t test_vceqz_u64(uint64x1_t a) {
     54   return vceqz_u64(a);
     55 }
     56 
     57 // CHECK-LABEL: define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 {
     58 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
     59 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
     60 // CHECK:   [[TMP2:%.*]] = icmp eq <1 x i64> [[TMP1]], zeroinitializer
     61 // CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
     62 // CHECK:   ret <1 x i64> [[VCEQZ_I]]
     63 uint64x1_t test_vceqz_p64(poly64x1_t a) {
     64   return vceqz_p64(a);
     65 }
     66 
     67 // CHECK-LABEL: define <16 x i8> @test_vceqzq_s8(<16 x i8> %a) #0 {
     68 // CHECK:   [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer
     69 // CHECK:   [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
     70 // CHECK:   ret <16 x i8> [[VCEQZ_I]]
     71 uint8x16_t test_vceqzq_s8(int8x16_t a) {
     72   return vceqzq_s8(a);
     73 }
     74 
     75 // CHECK-LABEL: define <8 x i16> @test_vceqzq_s16(<8 x i16> %a) #0 {
     76 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
     77 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
     78 // CHECK:   [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
     79 // CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
     80 // CHECK:   ret <8 x i16> [[VCEQZ_I]]
     81 uint16x8_t test_vceqzq_s16(int16x8_t a) {
     82   return vceqzq_s16(a);
     83 }
     84 
     85 // CHECK-LABEL: define <4 x i32> @test_vceqzq_s32(<4 x i32> %a) #0 {
     86 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
     87 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
     88 // CHECK:   [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
     89 // CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
     90 // CHECK:   ret <4 x i32> [[VCEQZ_I]]
     91 uint32x4_t test_vceqzq_s32(int32x4_t a) {
     92   return vceqzq_s32(a);
     93 }
     94 
     95 // CHECK-LABEL: define <2 x i64> @test_vceqzq_s64(<2 x i64> %a) #0 {
     96 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
     97 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
     98 // CHECK:   [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
     99 // CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    100 // CHECK:   ret <2 x i64> [[VCEQZ_I]]
    101 uint64x2_t test_vceqzq_s64(int64x2_t a) {
    102   return vceqzq_s64(a);
    103 }
    104 
    105 // CHECK-LABEL: define <8 x i8> @test_vceqz_u8(<8 x i8> %a) #0 {
    106 // CHECK:   [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer
    107 // CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
    108 // CHECK:   ret <8 x i8> [[VCEQZ_I]]
    109 uint8x8_t test_vceqz_u8(uint8x8_t a) {
    110   return vceqz_u8(a);
    111 }
    112 
    113 // CHECK-LABEL: define <4 x i16> @test_vceqz_u16(<4 x i16> %a) #0 {
    114 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    115 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    116 // CHECK:   [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
    117 // CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
    118 // CHECK:   ret <4 x i16> [[VCEQZ_I]]
    119 uint16x4_t test_vceqz_u16(uint16x4_t a) {
    120   return vceqz_u16(a);
    121 }
    122 
    123 // CHECK-LABEL: define <2 x i32> @test_vceqz_u32(<2 x i32> %a) #0 {
    124 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
    125 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
    126 // CHECK:   [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
    127 // CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    128 // CHECK:   ret <2 x i32> [[VCEQZ_I]]
    129 uint32x2_t test_vceqz_u32(uint32x2_t a) {
    130   return vceqz_u32(a);
    131 }
    132 
    133 // CHECK-LABEL: define <16 x i8> @test_vceqzq_u8(<16 x i8> %a) #0 {
    134 // CHECK:   [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer
    135 // CHECK:   [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
    136 // CHECK:   ret <16 x i8> [[VCEQZ_I]]
    137 uint8x16_t test_vceqzq_u8(uint8x16_t a) {
    138   return vceqzq_u8(a);
    139 }
    140 
    141 // CHECK-LABEL: define <8 x i16> @test_vceqzq_u16(<8 x i16> %a) #0 {
    142 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    143 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    144 // CHECK:   [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
    145 // CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
    146 // CHECK:   ret <8 x i16> [[VCEQZ_I]]
    147 uint16x8_t test_vceqzq_u16(uint16x8_t a) {
    148   return vceqzq_u16(a);
    149 }
    150 
    151 // CHECK-LABEL: define <4 x i32> @test_vceqzq_u32(<4 x i32> %a) #0 {
    152 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    153 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    154 // CHECK:   [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
    155 // CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    156 // CHECK:   ret <4 x i32> [[VCEQZ_I]]
    157 uint32x4_t test_vceqzq_u32(uint32x4_t a) {
    158   return vceqzq_u32(a);
    159 }
    160 
    161 // CHECK-LABEL: define <2 x i64> @test_vceqzq_u64(<2 x i64> %a) #0 {
    162 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
    163 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    164 // CHECK:   [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
    165 // CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    166 // CHECK:   ret <2 x i64> [[VCEQZ_I]]
    167 uint64x2_t test_vceqzq_u64(uint64x2_t a) {
    168   return vceqzq_u64(a);
    169 }
    170 
    171 // CHECK-LABEL: define <2 x i32> @test_vceqz_f32(<2 x float> %a) #0 {
    172 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
    173 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
    174 // CHECK:   [[TMP2:%.*]] = fcmp oeq <2 x float> [[TMP1]], zeroinitializer
    175 // CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    176 // CHECK:   ret <2 x i32> [[VCEQZ_I]]
    177 uint32x2_t test_vceqz_f32(float32x2_t a) {
    178   return vceqz_f32(a);
    179 }
    180 
    181 // CHECK-LABEL: define <1 x i64> @test_vceqz_f64(<1 x double> %a) #0 {
    182 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
    183 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
    184 // CHECK:   [[TMP2:%.*]] = fcmp oeq <1 x double> [[TMP1]], zeroinitializer
    185 // CHECK:   [[VCEQZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
    186 // CHECK:   ret <1 x i64> [[VCEQZ_I]]
    187 uint64x1_t test_vceqz_f64(float64x1_t a) {
    188   return vceqz_f64(a);
    189 }
    190 
    191 // CHECK-LABEL: define <4 x i32> @test_vceqzq_f32(<4 x float> %a) #0 {
    192 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    193 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    194 // CHECK:   [[TMP2:%.*]] = fcmp oeq <4 x float> [[TMP1]], zeroinitializer
    195 // CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    196 // CHECK:   ret <4 x i32> [[VCEQZ_I]]
    197 uint32x4_t test_vceqzq_f32(float32x4_t a) {
    198   return vceqzq_f32(a);
    199 }
    200 
    201 // CHECK-LABEL: define <8 x i8> @test_vceqz_p8(<8 x i8> %a) #0 {
    202 // CHECK:   [[TMP0:%.*]] = icmp eq <8 x i8> %a, zeroinitializer
    203 // CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
    204 // CHECK:   ret <8 x i8> [[VCEQZ_I]]
    205 uint8x8_t test_vceqz_p8(poly8x8_t a) {
    206   return vceqz_p8(a);
    207 }
    208 
    209 // CHECK-LABEL: define <16 x i8> @test_vceqzq_p8(<16 x i8> %a) #0 {
    210 // CHECK:   [[TMP0:%.*]] = icmp eq <16 x i8> %a, zeroinitializer
    211 // CHECK:   [[VCEQZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
    212 // CHECK:   ret <16 x i8> [[VCEQZ_I]]
    213 uint8x16_t test_vceqzq_p8(poly8x16_t a) {
    214   return vceqzq_p8(a);
    215 }
    216 
    217 // CHECK-LABEL: define <4 x i16> @test_vceqz_p16(<4 x i16> %a) #0 {
    218 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    219 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    220 // CHECK:   [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
    221 // CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
    222 // CHECK:   ret <4 x i16> [[VCEQZ_I]]
    223 uint16x4_t test_vceqz_p16(poly16x4_t a) {
    224   return vceqz_p16(a);
    225 }
    226 
    227 // CHECK-LABEL: define <8 x i16> @test_vceqzq_p16(<8 x i16> %a) #0 {
    228 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    229 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    230 // CHECK:   [[TMP2:%.*]] = icmp eq <8 x i16> [[TMP1]], zeroinitializer
    231 // CHECK:   [[VCEQZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
    232 // CHECK:   ret <8 x i16> [[VCEQZ_I]]
    233 uint16x8_t test_vceqzq_p16(poly16x8_t a) {
    234   return vceqzq_p16(a);
    235 }
    236 
    237 // CHECK-LABEL: define <2 x i64> @test_vceqzq_f64(<2 x double> %a) #0 {
    238 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
    239 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
    240 // CHECK:   [[TMP2:%.*]] = fcmp oeq <2 x double> [[TMP1]], zeroinitializer
    241 // CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    242 // CHECK:   ret <2 x i64> [[VCEQZ_I]]
    243 uint64x2_t test_vceqzq_f64(float64x2_t a) {
    244   return vceqzq_f64(a);
    245 }
    246 
    247 // CHECK-LABEL: define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 {
    248 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
    249 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    250 // CHECK:   [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
    251 // CHECK:   [[VCEQZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    252 // CHECK:   ret <2 x i64> [[VCEQZ_I]]
    253 uint64x2_t test_vceqzq_p64(poly64x2_t a) {
    254   return vceqzq_p64(a);
    255 }
    256 
    257 // CHECK-LABEL: define <8 x i8> @test_vcgez_s8(<8 x i8> %a) #0 {
    258 // CHECK:   [[TMP0:%.*]] = icmp sge <8 x i8> %a, zeroinitializer
    259 // CHECK:   [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
    260 // CHECK:   ret <8 x i8> [[VCGEZ_I]]
    261 uint8x8_t test_vcgez_s8(int8x8_t a) {
    262   return vcgez_s8(a);
    263 }
    264 
    265 // CHECK-LABEL: define <4 x i16> @test_vcgez_s16(<4 x i16> %a) #0 {
    266 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    267 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    268 // CHECK:   [[TMP2:%.*]] = icmp sge <4 x i16> [[TMP1]], zeroinitializer
    269 // CHECK:   [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
    270 // CHECK:   ret <4 x i16> [[VCGEZ_I]]
    271 uint16x4_t test_vcgez_s16(int16x4_t a) {
    272   return vcgez_s16(a);
    273 }
    274 
    275 // CHECK-LABEL: define <2 x i32> @test_vcgez_s32(<2 x i32> %a) #0 {
    276 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
    277 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
    278 // CHECK:   [[TMP2:%.*]] = icmp sge <2 x i32> [[TMP1]], zeroinitializer
    279 // CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    280 // CHECK:   ret <2 x i32> [[VCGEZ_I]]
    281 uint32x2_t test_vcgez_s32(int32x2_t a) {
    282   return vcgez_s32(a);
    283 }
    284 
    285 // CHECK-LABEL: define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 {
    286 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
    287 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
    288 // CHECK:   [[TMP2:%.*]] = icmp sge <1 x i64> [[TMP1]], zeroinitializer
    289 // CHECK:   [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
    290 // CHECK:   ret <1 x i64> [[VCGEZ_I]]
    291 uint64x1_t test_vcgez_s64(int64x1_t a) {
    292   return vcgez_s64(a);
    293 }
    294 
    295 // CHECK-LABEL: define <16 x i8> @test_vcgezq_s8(<16 x i8> %a) #0 {
    296 // CHECK:   [[TMP0:%.*]] = icmp sge <16 x i8> %a, zeroinitializer
    297 // CHECK:   [[VCGEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
    298 // CHECK:   ret <16 x i8> [[VCGEZ_I]]
    299 uint8x16_t test_vcgezq_s8(int8x16_t a) {
    300   return vcgezq_s8(a);
    301 }
    302 
    303 // CHECK-LABEL: define <8 x i16> @test_vcgezq_s16(<8 x i16> %a) #0 {
    304 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    305 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    306 // CHECK:   [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], zeroinitializer
    307 // CHECK:   [[VCGEZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
    308 // CHECK:   ret <8 x i16> [[VCGEZ_I]]
    309 uint16x8_t test_vcgezq_s16(int16x8_t a) {
    310   return vcgezq_s16(a);
    311 }
    312 
    313 // CHECK-LABEL: define <4 x i32> @test_vcgezq_s32(<4 x i32> %a) #0 {
    314 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    315 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    316 // CHECK:   [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], zeroinitializer
    317 // CHECK:   [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    318 // CHECK:   ret <4 x i32> [[VCGEZ_I]]
    319 uint32x4_t test_vcgezq_s32(int32x4_t a) {
    320   return vcgezq_s32(a);
    321 }
    322 
    323 // CHECK-LABEL: define <2 x i64> @test_vcgezq_s64(<2 x i64> %a) #0 {
    324 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
    325 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    326 // CHECK:   [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], zeroinitializer
    327 // CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    328 // CHECK:   ret <2 x i64> [[VCGEZ_I]]
    329 uint64x2_t test_vcgezq_s64(int64x2_t a) {
    330   return vcgezq_s64(a);
    331 }
    332 
    333 // CHECK-LABEL: define <2 x i32> @test_vcgez_f32(<2 x float> %a) #0 {
    334 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
    335 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
    336 // CHECK:   [[TMP2:%.*]] = fcmp oge <2 x float> [[TMP1]], zeroinitializer
    337 // CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    338 // CHECK:   ret <2 x i32> [[VCGEZ_I]]
    339 uint32x2_t test_vcgez_f32(float32x2_t a) {
    340   return vcgez_f32(a);
    341 }
    342 
    343 // CHECK-LABEL: define <1 x i64> @test_vcgez_f64(<1 x double> %a) #0 {
    344 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
    345 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
    346 // CHECK:   [[TMP2:%.*]] = fcmp oge <1 x double> [[TMP1]], zeroinitializer
    347 // CHECK:   [[VCGEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
    348 // CHECK:   ret <1 x i64> [[VCGEZ_I]]
    349 uint64x1_t test_vcgez_f64(float64x1_t a) {
    350   return vcgez_f64(a);
    351 }
    352 
    353 // CHECK-LABEL: define <4 x i32> @test_vcgezq_f32(<4 x float> %a) #0 {
    354 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    355 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    356 // CHECK:   [[TMP2:%.*]] = fcmp oge <4 x float> [[TMP1]], zeroinitializer
    357 // CHECK:   [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    358 // CHECK:   ret <4 x i32> [[VCGEZ_I]]
    359 uint32x4_t test_vcgezq_f32(float32x4_t a) {
    360   return vcgezq_f32(a);
    361 }
    362 
    363 // CHECK-LABEL: define <2 x i64> @test_vcgezq_f64(<2 x double> %a) #0 {
    364 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
    365 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
    366 // CHECK:   [[TMP2:%.*]] = fcmp oge <2 x double> [[TMP1]], zeroinitializer
    367 // CHECK:   [[VCGEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    368 // CHECK:   ret <2 x i64> [[VCGEZ_I]]
    369 uint64x2_t test_vcgezq_f64(float64x2_t a) {
    370   return vcgezq_f64(a);
    371 }
    372 
    373 // CHECK-LABEL: define <8 x i8> @test_vclez_s8(<8 x i8> %a) #0 {
    374 // CHECK:   [[TMP0:%.*]] = icmp sle <8 x i8> %a, zeroinitializer
    375 // CHECK:   [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
    376 // CHECK:   ret <8 x i8> [[VCLEZ_I]]
    377 uint8x8_t test_vclez_s8(int8x8_t a) {
    378   return vclez_s8(a);
    379 }
    380 
    381 // CHECK-LABEL: define <4 x i16> @test_vclez_s16(<4 x i16> %a) #0 {
    382 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    383 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    384 // CHECK:   [[TMP2:%.*]] = icmp sle <4 x i16> [[TMP1]], zeroinitializer
    385 // CHECK:   [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
    386 // CHECK:   ret <4 x i16> [[VCLEZ_I]]
    387 uint16x4_t test_vclez_s16(int16x4_t a) {
    388   return vclez_s16(a);
    389 }
    390 
    391 // CHECK-LABEL: define <2 x i32> @test_vclez_s32(<2 x i32> %a) #0 {
    392 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
    393 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
    394 // CHECK:   [[TMP2:%.*]] = icmp sle <2 x i32> [[TMP1]], zeroinitializer
    395 // CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    396 // CHECK:   ret <2 x i32> [[VCLEZ_I]]
    397 uint32x2_t test_vclez_s32(int32x2_t a) {
    398   return vclez_s32(a);
    399 }
    400 
    401 // CHECK-LABEL: define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 {
    402 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
    403 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
    404 // CHECK:   [[TMP2:%.*]] = icmp sle <1 x i64> [[TMP1]], zeroinitializer
    405 // CHECK:   [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
    406 // CHECK:   ret <1 x i64> [[VCLEZ_I]]
    407 uint64x1_t test_vclez_s64(int64x1_t a) {
    408   return vclez_s64(a);
    409 }
    410 
    411 // CHECK-LABEL: define <16 x i8> @test_vclezq_s8(<16 x i8> %a) #0 {
    412 // CHECK:   [[TMP0:%.*]] = icmp sle <16 x i8> %a, zeroinitializer
    413 // CHECK:   [[VCLEZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
    414 // CHECK:   ret <16 x i8> [[VCLEZ_I]]
    415 uint8x16_t test_vclezq_s8(int8x16_t a) {
    416   return vclezq_s8(a);
    417 }
    418 
    419 // CHECK-LABEL: define <8 x i16> @test_vclezq_s16(<8 x i16> %a) #0 {
    420 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    421 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    422 // CHECK:   [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], zeroinitializer
    423 // CHECK:   [[VCLEZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
    424 // CHECK:   ret <8 x i16> [[VCLEZ_I]]
    425 uint16x8_t test_vclezq_s16(int16x8_t a) {
    426   return vclezq_s16(a);
    427 }
    428 
    429 // CHECK-LABEL: define <4 x i32> @test_vclezq_s32(<4 x i32> %a) #0 {
    430 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    431 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    432 // CHECK:   [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], zeroinitializer
    433 // CHECK:   [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    434 // CHECK:   ret <4 x i32> [[VCLEZ_I]]
    435 uint32x4_t test_vclezq_s32(int32x4_t a) {
    436   return vclezq_s32(a);
    437 }
    438 
    439 // CHECK-LABEL: define <2 x i64> @test_vclezq_s64(<2 x i64> %a) #0 {
    440 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
    441 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    442 // CHECK:   [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], zeroinitializer
    443 // CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    444 // CHECK:   ret <2 x i64> [[VCLEZ_I]]
    445 uint64x2_t test_vclezq_s64(int64x2_t a) {
    446   return vclezq_s64(a);
    447 }
    448 
    449 // CHECK-LABEL: define <2 x i32> @test_vclez_f32(<2 x float> %a) #0 {
    450 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
    451 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
    452 // CHECK:   [[TMP2:%.*]] = fcmp ole <2 x float> [[TMP1]], zeroinitializer
    453 // CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    454 // CHECK:   ret <2 x i32> [[VCLEZ_I]]
    455 uint32x2_t test_vclez_f32(float32x2_t a) {
    456   return vclez_f32(a);
    457 }
    458 
    459 // CHECK-LABEL: define <1 x i64> @test_vclez_f64(<1 x double> %a) #0 {
    460 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
    461 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
    462 // CHECK:   [[TMP2:%.*]] = fcmp ole <1 x double> [[TMP1]], zeroinitializer
    463 // CHECK:   [[VCLEZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
    464 // CHECK:   ret <1 x i64> [[VCLEZ_I]]
    465 uint64x1_t test_vclez_f64(float64x1_t a) {
    466   return vclez_f64(a);
    467 }
    468 
    469 // CHECK-LABEL: define <4 x i32> @test_vclezq_f32(<4 x float> %a) #0 {
    470 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    471 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    472 // CHECK:   [[TMP2:%.*]] = fcmp ole <4 x float> [[TMP1]], zeroinitializer
    473 // CHECK:   [[VCLEZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    474 // CHECK:   ret <4 x i32> [[VCLEZ_I]]
    475 uint32x4_t test_vclezq_f32(float32x4_t a) {
    476   return vclezq_f32(a);
    477 }
    478 
    479 // CHECK-LABEL: define <2 x i64> @test_vclezq_f64(<2 x double> %a) #0 {
    480 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
    481 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
    482 // CHECK:   [[TMP2:%.*]] = fcmp ole <2 x double> [[TMP1]], zeroinitializer
    483 // CHECK:   [[VCLEZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    484 // CHECK:   ret <2 x i64> [[VCLEZ_I]]
    485 uint64x2_t test_vclezq_f64(float64x2_t a) {
    486   return vclezq_f64(a);
    487 }
    488 
    489 // CHECK-LABEL: define <8 x i8> @test_vcgtz_s8(<8 x i8> %a) #0 {
    490 // CHECK:   [[TMP0:%.*]] = icmp sgt <8 x i8> %a, zeroinitializer
    491 // CHECK:   [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
    492 // CHECK:   ret <8 x i8> [[VCGTZ_I]]
    493 uint8x8_t test_vcgtz_s8(int8x8_t a) {
    494   return vcgtz_s8(a);
    495 }
    496 
    497 // CHECK-LABEL: define <4 x i16> @test_vcgtz_s16(<4 x i16> %a) #0 {
    498 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    499 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    500 // CHECK:   [[TMP2:%.*]] = icmp sgt <4 x i16> [[TMP1]], zeroinitializer
    501 // CHECK:   [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
    502 // CHECK:   ret <4 x i16> [[VCGTZ_I]]
    503 uint16x4_t test_vcgtz_s16(int16x4_t a) {
    504   return vcgtz_s16(a);
    505 }
    506 
    507 // CHECK-LABEL: define <2 x i32> @test_vcgtz_s32(<2 x i32> %a) #0 {
    508 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
    509 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
    510 // CHECK:   [[TMP2:%.*]] = icmp sgt <2 x i32> [[TMP1]], zeroinitializer
    511 // CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    512 // CHECK:   ret <2 x i32> [[VCGTZ_I]]
    513 uint32x2_t test_vcgtz_s32(int32x2_t a) {
    514   return vcgtz_s32(a);
    515 }
    516 
    517 // CHECK-LABEL: define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 {
    518 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
    519 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
    520 // CHECK:   [[TMP2:%.*]] = icmp sgt <1 x i64> [[TMP1]], zeroinitializer
    521 // CHECK:   [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
    522 // CHECK:   ret <1 x i64> [[VCGTZ_I]]
    523 uint64x1_t test_vcgtz_s64(int64x1_t a) {
    524   return vcgtz_s64(a);
    525 }
    526 
    527 // CHECK-LABEL: define <16 x i8> @test_vcgtzq_s8(<16 x i8> %a) #0 {
    528 // CHECK:   [[TMP0:%.*]] = icmp sgt <16 x i8> %a, zeroinitializer
    529 // CHECK:   [[VCGTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
    530 // CHECK:   ret <16 x i8> [[VCGTZ_I]]
    531 uint8x16_t test_vcgtzq_s8(int8x16_t a) {
    532   return vcgtzq_s8(a);
    533 }
    534 
    535 // CHECK-LABEL: define <8 x i16> @test_vcgtzq_s16(<8 x i16> %a) #0 {
    536 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    537 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    538 // CHECK:   [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], zeroinitializer
    539 // CHECK:   [[VCGTZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
    540 // CHECK:   ret <8 x i16> [[VCGTZ_I]]
    541 uint16x8_t test_vcgtzq_s16(int16x8_t a) {
    542   return vcgtzq_s16(a);
    543 }
    544 
    545 // CHECK-LABEL: define <4 x i32> @test_vcgtzq_s32(<4 x i32> %a) #0 {
    546 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    547 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    548 // CHECK:   [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], zeroinitializer
    549 // CHECK:   [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    550 // CHECK:   ret <4 x i32> [[VCGTZ_I]]
    551 uint32x4_t test_vcgtzq_s32(int32x4_t a) {
    552   return vcgtzq_s32(a);
    553 }
    554 
    555 // CHECK-LABEL: define <2 x i64> @test_vcgtzq_s64(<2 x i64> %a) #0 {
    556 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
    557 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    558 // CHECK:   [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], zeroinitializer
    559 // CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    560 // CHECK:   ret <2 x i64> [[VCGTZ_I]]
    561 uint64x2_t test_vcgtzq_s64(int64x2_t a) {
    562   return vcgtzq_s64(a);
    563 }
    564 
    565 // CHECK-LABEL: define <2 x i32> @test_vcgtz_f32(<2 x float> %a) #0 {
    566 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
    567 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
    568 // CHECK:   [[TMP2:%.*]] = fcmp ogt <2 x float> [[TMP1]], zeroinitializer
    569 // CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    570 // CHECK:   ret <2 x i32> [[VCGTZ_I]]
    571 uint32x2_t test_vcgtz_f32(float32x2_t a) {
    572   return vcgtz_f32(a);
    573 }
    574 
    575 // CHECK-LABEL: define <1 x i64> @test_vcgtz_f64(<1 x double> %a) #0 {
    576 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
    577 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
    578 // CHECK:   [[TMP2:%.*]] = fcmp ogt <1 x double> [[TMP1]], zeroinitializer
    579 // CHECK:   [[VCGTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
    580 // CHECK:   ret <1 x i64> [[VCGTZ_I]]
    581 uint64x1_t test_vcgtz_f64(float64x1_t a) {
    582   return vcgtz_f64(a);
    583 }
    584 
    585 // CHECK-LABEL: define <4 x i32> @test_vcgtzq_f32(<4 x float> %a) #0 {
    586 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    587 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    588 // CHECK:   [[TMP2:%.*]] = fcmp ogt <4 x float> [[TMP1]], zeroinitializer
    589 // CHECK:   [[VCGTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    590 // CHECK:   ret <4 x i32> [[VCGTZ_I]]
    591 uint32x4_t test_vcgtzq_f32(float32x4_t a) {
    592   return vcgtzq_f32(a);
    593 }
    594 
    595 // CHECK-LABEL: define <2 x i64> @test_vcgtzq_f64(<2 x double> %a) #0 {
    596 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
    597 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
    598 // CHECK:   [[TMP2:%.*]] = fcmp ogt <2 x double> [[TMP1]], zeroinitializer
    599 // CHECK:   [[VCGTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    600 // CHECK:   ret <2 x i64> [[VCGTZ_I]]
    601 uint64x2_t test_vcgtzq_f64(float64x2_t a) {
    602   return vcgtzq_f64(a);
    603 }
    604 
    605 // CHECK-LABEL: define <8 x i8> @test_vcltz_s8(<8 x i8> %a) #0 {
    606 // CHECK:   [[TMP0:%.*]] = icmp slt <8 x i8> %a, zeroinitializer
    607 // CHECK:   [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
    608 // CHECK:   ret <8 x i8> [[VCLTZ_I]]
    609 uint8x8_t test_vcltz_s8(int8x8_t a) {
    610   return vcltz_s8(a);
    611 }
    612 
    613 // CHECK-LABEL: define <4 x i16> @test_vcltz_s16(<4 x i16> %a) #0 {
    614 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    615 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    616 // CHECK:   [[TMP2:%.*]] = icmp slt <4 x i16> [[TMP1]], zeroinitializer
    617 // CHECK:   [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i16>
    618 // CHECK:   ret <4 x i16> [[VCLTZ_I]]
    619 uint16x4_t test_vcltz_s16(int16x4_t a) {
    620   return vcltz_s16(a);
    621 }
    622 
    623 // CHECK-LABEL: define <2 x i32> @test_vcltz_s32(<2 x i32> %a) #0 {
    624 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
    625 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
    626 // CHECK:   [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
    627 // CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    628 // CHECK:   ret <2 x i32> [[VCLTZ_I]]
    629 uint32x2_t test_vcltz_s32(int32x2_t a) {
    630   return vcltz_s32(a);
    631 }
    632 
    633 // CHECK-LABEL: define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 {
    634 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
    635 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
    636 // CHECK:   [[TMP2:%.*]] = icmp slt <1 x i64> [[TMP1]], zeroinitializer
    637 // CHECK:   [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
    638 // CHECK:   ret <1 x i64> [[VCLTZ_I]]
    639 uint64x1_t test_vcltz_s64(int64x1_t a) {
    640   return vcltz_s64(a);
    641 }
    642 
    643 // CHECK-LABEL: define <16 x i8> @test_vcltzq_s8(<16 x i8> %a) #0 {
    644 // CHECK:   [[TMP0:%.*]] = icmp slt <16 x i8> %a, zeroinitializer
    645 // CHECK:   [[VCLTZ_I:%.*]] = sext <16 x i1> [[TMP0]] to <16 x i8>
    646 // CHECK:   ret <16 x i8> [[VCLTZ_I]]
    647 uint8x16_t test_vcltzq_s8(int8x16_t a) {
    648   return vcltzq_s8(a);
    649 }
    650 
    651 // CHECK-LABEL: define <8 x i16> @test_vcltzq_s16(<8 x i16> %a) #0 {
    652 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    653 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    654 // CHECK:   [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], zeroinitializer
    655 // CHECK:   [[VCLTZ_I:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16>
    656 // CHECK:   ret <8 x i16> [[VCLTZ_I]]
    657 uint16x8_t test_vcltzq_s16(int16x8_t a) {
    658   return vcltzq_s16(a);
    659 }
    660 
    661 // CHECK-LABEL: define <4 x i32> @test_vcltzq_s32(<4 x i32> %a) #0 {
    662 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    663 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    664 // CHECK:   [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
    665 // CHECK:   [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    666 // CHECK:   ret <4 x i32> [[VCLTZ_I]]
    667 uint32x4_t test_vcltzq_s32(int32x4_t a) {
    668   return vcltzq_s32(a);
    669 }
    670 
    671 // CHECK-LABEL: define <2 x i64> @test_vcltzq_s64(<2 x i64> %a) #0 {
    672 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
    673 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
    674 // CHECK:   [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], zeroinitializer
    675 // CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    676 // CHECK:   ret <2 x i64> [[VCLTZ_I]]
    677 uint64x2_t test_vcltzq_s64(int64x2_t a) {
    678   return vcltzq_s64(a);
    679 }
    680 
    681 // CHECK-LABEL: define <2 x i32> @test_vcltz_f32(<2 x float> %a) #0 {
    682 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
    683 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
    684 // CHECK:   [[TMP2:%.*]] = fcmp olt <2 x float> [[TMP1]], zeroinitializer
    685 // CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
    686 // CHECK:   ret <2 x i32> [[VCLTZ_I]]
    687 uint32x2_t test_vcltz_f32(float32x2_t a) {
    688   return vcltz_f32(a);
    689 }
    690 
    691 // CHECK-LABEL: define <1 x i64> @test_vcltz_f64(<1 x double> %a) #0 {
    692 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
    693 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
    694 // CHECK:   [[TMP2:%.*]] = fcmp olt <1 x double> [[TMP1]], zeroinitializer
    695 // CHECK:   [[VCLTZ_I:%.*]] = sext <1 x i1> [[TMP2]] to <1 x i64>
    696 // CHECK:   ret <1 x i64> [[VCLTZ_I]]
    697 uint64x1_t test_vcltz_f64(float64x1_t a) {
    698   return vcltz_f64(a);
    699 }
    700 
    701 // CHECK-LABEL: define <4 x i32> @test_vcltzq_f32(<4 x float> %a) #0 {
    702 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    703 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    704 // CHECK:   [[TMP2:%.*]] = fcmp olt <4 x float> [[TMP1]], zeroinitializer
    705 // CHECK:   [[VCLTZ_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
    706 // CHECK:   ret <4 x i32> [[VCLTZ_I]]
    707 uint32x4_t test_vcltzq_f32(float32x4_t a) {
    708   return vcltzq_f32(a);
    709 }
    710 
    711 // CHECK-LABEL: define <2 x i64> @test_vcltzq_f64(<2 x double> %a) #0 {
    712 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
    713 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
    714 // CHECK:   [[TMP2:%.*]] = fcmp olt <2 x double> [[TMP1]], zeroinitializer
    715 // CHECK:   [[VCLTZ_I:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i64>
    716 // CHECK:   ret <2 x i64> [[VCLTZ_I]]
    717 uint64x2_t test_vcltzq_f64(float64x2_t a) {
    718   return vcltzq_f64(a);
    719 }
    720 
    721 // CHECK-LABEL: define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
    722 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    723 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    724 int8x8_t test_vrev16_s8(int8x8_t a) {
    725   return vrev16_s8(a);
    726 }
    727 
    728 // CHECK-LABEL: define <8 x i8> @test_vrev16_u8(<8 x i8> %a) #0 {
    729 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    730 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    731 uint8x8_t test_vrev16_u8(uint8x8_t a) {
    732   return vrev16_u8(a);
    733 }
    734 
    735 // CHECK-LABEL: define <8 x i8> @test_vrev16_p8(<8 x i8> %a) #0 {
    736 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    737 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    738 poly8x8_t test_vrev16_p8(poly8x8_t a) {
    739   return vrev16_p8(a);
    740 }
    741 
    742 // CHECK-LABEL: define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
    743 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
    744 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    745 int8x16_t test_vrev16q_s8(int8x16_t a) {
    746   return vrev16q_s8(a);
    747 }
    748 
    749 // CHECK-LABEL: define <16 x i8> @test_vrev16q_u8(<16 x i8> %a) #0 {
    750 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
    751 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    752 uint8x16_t test_vrev16q_u8(uint8x16_t a) {
    753   return vrev16q_u8(a);
    754 }
    755 
    756 // CHECK-LABEL: define <16 x i8> @test_vrev16q_p8(<16 x i8> %a) #0 {
    757 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
    758 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    759 poly8x16_t test_vrev16q_p8(poly8x16_t a) {
    760   return vrev16q_p8(a);
    761 }
    762 
    763 // CHECK-LABEL: define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
    764 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    765 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    766 int8x8_t test_vrev32_s8(int8x8_t a) {
    767   return vrev32_s8(a);
    768 }
    769 
    770 // CHECK-LABEL: define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
    771 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    772 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    773 int16x4_t test_vrev32_s16(int16x4_t a) {
    774   return vrev32_s16(a);
    775 }
    776 
    777 // CHECK-LABEL: define <8 x i8> @test_vrev32_u8(<8 x i8> %a) #0 {
    778 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    779 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    780 uint8x8_t test_vrev32_u8(uint8x8_t a) {
    781   return vrev32_u8(a);
    782 }
    783 
    784 // CHECK-LABEL: define <4 x i16> @test_vrev32_u16(<4 x i16> %a) #0 {
    785 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    786 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    787 uint16x4_t test_vrev32_u16(uint16x4_t a) {
    788   return vrev32_u16(a);
    789 }
    790 
    791 // CHECK-LABEL: define <8 x i8> @test_vrev32_p8(<8 x i8> %a) #0 {
    792 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    793 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    794 poly8x8_t test_vrev32_p8(poly8x8_t a) {
    795   return vrev32_p8(a);
    796 }
    797 
    798 // CHECK-LABEL: define <4 x i16> @test_vrev32_p16(<4 x i16> %a) #0 {
    799 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    800 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    801 poly16x4_t test_vrev32_p16(poly16x4_t a) {
    802   return vrev32_p16(a);
    803 }
    804 
    805 // CHECK-LABEL: define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
    806 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
    807 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    808 int8x16_t test_vrev32q_s8(int8x16_t a) {
    809   return vrev32q_s8(a);
    810 }
    811 
    812 // CHECK-LABEL: define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
    813 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    814 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    815 int16x8_t test_vrev32q_s16(int16x8_t a) {
    816   return vrev32q_s16(a);
    817 }
    818 
    819 // CHECK-LABEL: define <16 x i8> @test_vrev32q_u8(<16 x i8> %a) #0 {
    820 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
    821 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    822 uint8x16_t test_vrev32q_u8(uint8x16_t a) {
    823   return vrev32q_u8(a);
    824 }
    825 
    826 // CHECK-LABEL: define <8 x i16> @test_vrev32q_u16(<8 x i16> %a) #0 {
    827 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    828 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    829 uint16x8_t test_vrev32q_u16(uint16x8_t a) {
    830   return vrev32q_u16(a);
    831 }
    832 
    833 // CHECK-LABEL: define <16 x i8> @test_vrev32q_p8(<16 x i8> %a) #0 {
    834 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
    835 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    836 poly8x16_t test_vrev32q_p8(poly8x16_t a) {
    837   return vrev32q_p8(a);
    838 }
    839 
    840 // CHECK-LABEL: define <8 x i16> @test_vrev32q_p16(<8 x i16> %a) #0 {
    841 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    842 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    843 poly16x8_t test_vrev32q_p16(poly16x8_t a) {
    844   return vrev32q_p16(a);
    845 }
    846 
    847 // CHECK-LABEL: define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
    848 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
    849 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    850 int8x8_t test_vrev64_s8(int8x8_t a) {
    851   return vrev64_s8(a);
    852 }
    853 
    854 // CHECK-LABEL: define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
    855 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    856 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    857 int16x4_t test_vrev64_s16(int16x4_t a) {
    858   return vrev64_s16(a);
    859 }
    860 
    861 // CHECK-LABEL: define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
    862 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
    863 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    864 int32x2_t test_vrev64_s32(int32x2_t a) {
    865   return vrev64_s32(a);
    866 }
    867 
    868 // CHECK-LABEL: define <8 x i8> @test_vrev64_u8(<8 x i8> %a) #0 {
    869 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
    870 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    871 uint8x8_t test_vrev64_u8(uint8x8_t a) {
    872   return vrev64_u8(a);
    873 }
    874 
    875 // CHECK-LABEL: define <4 x i16> @test_vrev64_u16(<4 x i16> %a) #0 {
    876 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    877 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    878 uint16x4_t test_vrev64_u16(uint16x4_t a) {
    879   return vrev64_u16(a);
    880 }
    881 
    882 // CHECK-LABEL: define <2 x i32> @test_vrev64_u32(<2 x i32> %a) #0 {
    883 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
    884 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
    885 uint32x2_t test_vrev64_u32(uint32x2_t a) {
    886   return vrev64_u32(a);
    887 }
    888 
    889 // CHECK-LABEL: define <8 x i8> @test_vrev64_p8(<8 x i8> %a) #0 {
    890 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
    891 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
    892 poly8x8_t test_vrev64_p8(poly8x8_t a) {
    893   return vrev64_p8(a);
    894 }
    895 
    896 // CHECK-LABEL: define <4 x i16> @test_vrev64_p16(<4 x i16> %a) #0 {
    897 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    898 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
    899 poly16x4_t test_vrev64_p16(poly16x4_t a) {
    900   return vrev64_p16(a);
    901 }
    902 
    903 // CHECK-LABEL: define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
    904 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
    905 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
    906 float32x2_t test_vrev64_f32(float32x2_t a) {
    907   return vrev64_f32(a);
    908 }
    909 
    910 // CHECK-LABEL: define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
    911 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
    912 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    913 int8x16_t test_vrev64q_s8(int8x16_t a) {
    914   return vrev64q_s8(a);
    915 }
    916 
    917 // CHECK-LABEL: define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
    918 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    919 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    920 int16x8_t test_vrev64q_s16(int16x8_t a) {
    921   return vrev64q_s16(a);
    922 }
    923 
    924 // CHECK-LABEL: define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
    925 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    926 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    927 int32x4_t test_vrev64q_s32(int32x4_t a) {
    928   return vrev64q_s32(a);
    929 }
    930 
    931 // CHECK-LABEL: define <16 x i8> @test_vrev64q_u8(<16 x i8> %a) #0 {
    932 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
    933 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    934 uint8x16_t test_vrev64q_u8(uint8x16_t a) {
    935   return vrev64q_u8(a);
    936 }
    937 
    938 // CHECK-LABEL: define <8 x i16> @test_vrev64q_u16(<8 x i16> %a) #0 {
    939 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    940 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    941 uint16x8_t test_vrev64q_u16(uint16x8_t a) {
    942   return vrev64q_u16(a);
    943 }
    944 
    945 // CHECK-LABEL: define <4 x i32> @test_vrev64q_u32(<4 x i32> %a) #0 {
    946 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    947 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
    948 uint32x4_t test_vrev64q_u32(uint32x4_t a) {
    949   return vrev64q_u32(a);
    950 }
    951 
    952 // CHECK-LABEL: define <16 x i8> @test_vrev64q_p8(<16 x i8> %a) #0 {
    953 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
    954 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
    955 poly8x16_t test_vrev64q_p8(poly8x16_t a) {
    956   return vrev64q_p8(a);
    957 }
    958 
    959 // CHECK-LABEL: define <8 x i16> @test_vrev64q_p16(<8 x i16> %a) #0 {
    960 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    961 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
    962 poly16x8_t test_vrev64q_p16(poly16x8_t a) {
    963   return vrev64q_p16(a);
    964 }
    965 
    966 // CHECK-LABEL: define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
    967 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    968 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
    969 float32x4_t test_vrev64q_f32(float32x4_t a) {
    970   return vrev64q_f32(a);
    971 }
    972 
    973 // CHECK-LABEL: define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
    974 // CHECK:   [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %a) #2
    975 // CHECK:   ret <4 x i16> [[VPADDL_I]]
    976 int16x4_t test_vpaddl_s8(int8x8_t a) {
    977   return vpaddl_s8(a);
    978 }
    979 
    980 // CHECK-LABEL: define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
    981 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    982 // CHECK:   [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    983 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #2
    984 // CHECK:   ret <2 x i32> [[VPADDL1_I]]
    985 int32x2_t test_vpaddl_s16(int16x4_t a) {
    986   return vpaddl_s16(a);
    987 }
    988 
    989 // CHECK-LABEL: define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
    990 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
    991 // CHECK:   [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
    992 // CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #2
    993 // CHECK:   ret <1 x i64> [[VPADDL1_I]]
    994 int64x1_t test_vpaddl_s32(int32x2_t a) {
    995   return vpaddl_s32(a);
    996 }
    997 
    998 // CHECK-LABEL: define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
    999 // CHECK:   [[VPADDL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %a) #2
   1000 // CHECK:   ret <4 x i16> [[VPADDL_I]]
   1001 uint16x4_t test_vpaddl_u8(uint8x8_t a) {
   1002   return vpaddl_u8(a);
   1003 }
   1004 
   1005 // CHECK-LABEL: define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
   1006 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1007 // CHECK:   [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1008 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #2
   1009 // CHECK:   ret <2 x i32> [[VPADDL1_I]]
   1010 uint32x2_t test_vpaddl_u16(uint16x4_t a) {
   1011   return vpaddl_u16(a);
   1012 }
   1013 
   1014 // CHECK-LABEL: define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
   1015 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1016 // CHECK:   [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1017 // CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #2
   1018 // CHECK:   ret <1 x i64> [[VPADDL1_I]]
   1019 uint64x1_t test_vpaddl_u32(uint32x2_t a) {
   1020   return vpaddl_u32(a);
   1021 }
   1022 
   1023 // CHECK-LABEL: define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
   1024 // CHECK:   [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %a) #2
   1025 // CHECK:   ret <8 x i16> [[VPADDL_I]]
   1026 int16x8_t test_vpaddlq_s8(int8x16_t a) {
   1027   return vpaddlq_s8(a);
   1028 }
   1029 
   1030 // CHECK-LABEL: define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
   1031 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1032 // CHECK:   [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1033 // CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #2
   1034 // CHECK:   ret <4 x i32> [[VPADDL1_I]]
   1035 int32x4_t test_vpaddlq_s16(int16x8_t a) {
   1036   return vpaddlq_s16(a);
   1037 }
   1038 
   1039 // CHECK-LABEL: define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
   1040 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1041 // CHECK:   [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1042 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #2
   1043 // CHECK:   ret <2 x i64> [[VPADDL1_I]]
   1044 int64x2_t test_vpaddlq_s32(int32x4_t a) {
   1045   return vpaddlq_s32(a);
   1046 }
   1047 
   1048 // CHECK-LABEL: define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
   1049 // CHECK:   [[VPADDL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a) #2
   1050 // CHECK:   ret <8 x i16> [[VPADDL_I]]
   1051 uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
   1052   return vpaddlq_u8(a);
   1053 }
   1054 
   1055 // CHECK-LABEL: define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
   1056 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1057 // CHECK:   [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1058 // CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #2
   1059 // CHECK:   ret <4 x i32> [[VPADDL1_I]]
   1060 uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
   1061   return vpaddlq_u16(a);
   1062 }
   1063 
   1064 // CHECK-LABEL: define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
   1065 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1066 // CHECK:   [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1067 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #2
   1068 // CHECK:   ret <2 x i64> [[VPADDL1_I]]
   1069 uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
   1070   return vpaddlq_u32(a);
   1071 }
   1072 
   1073 // CHECK-LABEL: define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
   1074 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1075 // CHECK:   [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %b) #2
   1076 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1077 // CHECK:   [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]]
   1078 // CHECK:   ret <4 x i16> [[TMP2]]
   1079 int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
   1080   return vpadal_s8(a, b);
   1081 }
   1082 
   1083 // CHECK-LABEL: define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
   1084 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1085 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   1086 // CHECK:   [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   1087 // CHECK:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]]) #2
   1088 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1089 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]]
   1090 // CHECK:   ret <2 x i32> [[TMP3]]
   1091 int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
   1092   return vpadal_s16(a, b);
   1093 }
   1094 
   1095 // CHECK-LABEL: define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
   1096 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
   1097 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
   1098 // CHECK:   [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
   1099 // CHECK:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]]) #2
   1100 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
   1101 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]]
   1102 // CHECK:   ret <1 x i64> [[TMP3]]
   1103 int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
   1104   return vpadal_s32(a, b);
   1105 }
   1106 
   1107 // CHECK-LABEL: define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
   1108 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1109 // CHECK:   [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %b) #2
   1110 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1111 // CHECK:   [[TMP2:%.*]] = add <4 x i16> [[VPADAL_I]], [[TMP1]]
   1112 // CHECK:   ret <4 x i16> [[TMP2]]
   1113 uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
   1114   return vpadal_u8(a, b);
   1115 }
   1116 
   1117 // CHECK-LABEL: define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
   1118 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1119 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   1120 // CHECK:   [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   1121 // CHECK:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[VPADAL_I]]) #2
   1122 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1123 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[VPADAL1_I]], [[TMP2]]
   1124 // CHECK:   ret <2 x i32> [[TMP3]]
   1125 uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
   1126   return vpadal_u16(a, b);
   1127 }
   1128 
   1129 // CHECK-LABEL: define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
   1130 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
   1131 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
   1132 // CHECK:   [[VPADAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
   1133 // CHECK:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[VPADAL_I]]) #2
   1134 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
   1135 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[VPADAL1_I]], [[TMP2]]
   1136 // CHECK:   ret <1 x i64> [[TMP3]]
   1137 uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
   1138   return vpadal_u32(a, b);
   1139 }
   1140 
   1141 // CHECK-LABEL: define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
   1142 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1143 // CHECK:   [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %b) #2
   1144 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1145 // CHECK:   [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]]
   1146 // CHECK:   ret <8 x i16> [[TMP2]]
   1147 int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
   1148   return vpadalq_s8(a, b);
   1149 }
   1150 
   1151 // CHECK-LABEL: define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
   1152 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1153 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1154 // CHECK:   [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   1155 // CHECK:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]]) #2
   1156 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1157 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]]
   1158 // CHECK:   ret <4 x i32> [[TMP3]]
   1159 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
   1160   return vpadalq_s16(a, b);
   1161 }
   1162 
   1163 // CHECK-LABEL: define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
   1164 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   1165 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   1166 // CHECK:   [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
   1167 // CHECK:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]]) #2
   1168 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   1169 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]]
   1170 // CHECK:   ret <2 x i64> [[TMP3]]
   1171 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
   1172   return vpadalq_s32(a, b);
   1173 }
   1174 
   1175 // CHECK-LABEL: define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
   1176 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1177 // CHECK:   [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %b) #2
   1178 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1179 // CHECK:   [[TMP2:%.*]] = add <8 x i16> [[VPADAL_I]], [[TMP1]]
   1180 // CHECK:   ret <8 x i16> [[TMP2]]
   1181 uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
   1182   return vpadalq_u8(a, b);
   1183 }
   1184 
   1185 // CHECK-LABEL: define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
   1186 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1187 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1188 // CHECK:   [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   1189 // CHECK:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[VPADAL_I]]) #2
   1190 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1191 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[VPADAL1_I]], [[TMP2]]
   1192 // CHECK:   ret <4 x i32> [[TMP3]]
   1193 uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
   1194   return vpadalq_u16(a, b);
   1195 }
   1196 
   1197 // CHECK-LABEL: define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
   1198 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   1199 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   1200 // CHECK:   [[VPADAL_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
   1201 // CHECK:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[VPADAL_I]]) #2
   1202 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   1203 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[VPADAL1_I]], [[TMP2]]
   1204 // CHECK:   ret <2 x i64> [[TMP3]]
   1205 uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
   1206   return vpadalq_u32(a, b);
   1207 }
   1208 
   1209 // CHECK-LABEL: define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
   1210 // CHECK:   [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %a) #2
   1211 // CHECK:   ret <8 x i8> [[VQABS_V_I]]
   1212 int8x8_t test_vqabs_s8(int8x8_t a) {
   1213   return vqabs_s8(a);
   1214 }
   1215 
   1216 // CHECK-LABEL: define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
   1217 // CHECK:   [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %a) #2
   1218 // CHECK:   ret <16 x i8> [[VQABSQ_V_I]]
   1219 int8x16_t test_vqabsq_s8(int8x16_t a) {
   1220   return vqabsq_s8(a);
   1221 }
   1222 
   1223 // CHECK-LABEL: define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
   1224 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1225 // CHECK:   [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1226 // CHECK:   [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[VQABS_V_I]]) #2
   1227 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
   1228 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16>
   1229 // CHECK:   ret <4 x i16> [[TMP1]]
   1230 int16x4_t test_vqabs_s16(int16x4_t a) {
   1231   return vqabs_s16(a);
   1232 }
   1233 
   1234 // CHECK-LABEL: define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
   1235 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1236 // CHECK:   [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1237 // CHECK:   [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> [[VQABSQ_V_I]]) #2
   1238 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
   1239 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16>
   1240 // CHECK:   ret <8 x i16> [[TMP1]]
   1241 int16x8_t test_vqabsq_s16(int16x8_t a) {
   1242   return vqabsq_s16(a);
   1243 }
   1244 
   1245 // CHECK-LABEL: define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
   1246 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1247 // CHECK:   [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1248 // CHECK:   [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> [[VQABS_V_I]]) #2
   1249 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
   1250 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32>
   1251 // CHECK:   ret <2 x i32> [[TMP1]]
   1252 int32x2_t test_vqabs_s32(int32x2_t a) {
   1253   return vqabs_s32(a);
   1254 }
   1255 
   1256 // CHECK-LABEL: define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
   1257 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1258 // CHECK:   [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1259 // CHECK:   [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> [[VQABSQ_V_I]]) #2
   1260 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
   1261 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32>
   1262 // CHECK:   ret <4 x i32> [[TMP1]]
   1263 int32x4_t test_vqabsq_s32(int32x4_t a) {
   1264   return vqabsq_s32(a);
   1265 }
   1266 
   1267 // CHECK-LABEL: define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 {
   1268 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   1269 // CHECK:   [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   1270 // CHECK:   [[VQABSQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqabs.v2i64(<2 x i64> [[VQABSQ_V_I]]) #2
   1271 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <2 x i64> [[VQABSQ_V1_I]] to <16 x i8>
   1272 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <2 x i64>
   1273 // CHECK:   ret <2 x i64> [[TMP1]]
   1274 int64x2_t test_vqabsq_s64(int64x2_t a) {
   1275   return vqabsq_s64(a);
   1276 }
   1277 
   1278 // CHECK-LABEL: define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
   1279 // CHECK:   [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %a) #2
   1280 // CHECK:   ret <8 x i8> [[VQNEG_V_I]]
   1281 int8x8_t test_vqneg_s8(int8x8_t a) {
   1282   return vqneg_s8(a);
   1283 }
   1284 
   1285 // CHECK-LABEL: define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
   1286 // CHECK:   [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %a) #2
   1287 // CHECK:   ret <16 x i8> [[VQNEGQ_V_I]]
   1288 int8x16_t test_vqnegq_s8(int8x16_t a) {
   1289   return vqnegq_s8(a);
   1290 }
   1291 
   1292 // CHECK-LABEL: define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
   1293 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1294 // CHECK:   [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1295 // CHECK:   [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[VQNEG_V_I]]) #2
   1296 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
   1297 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16>
   1298 // CHECK:   ret <4 x i16> [[TMP1]]
   1299 int16x4_t test_vqneg_s16(int16x4_t a) {
   1300   return vqneg_s16(a);
   1301 }
   1302 
   1303 // CHECK-LABEL: define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
   1304 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1305 // CHECK:   [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1306 // CHECK:   [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]]) #2
   1307 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
   1308 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16>
   1309 // CHECK:   ret <8 x i16> [[TMP1]]
   1310 int16x8_t test_vqnegq_s16(int16x8_t a) {
   1311   return vqnegq_s16(a);
   1312 }
   1313 
   1314 // CHECK-LABEL: define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
   1315 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1316 // CHECK:   [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1317 // CHECK:   [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> [[VQNEG_V_I]]) #2
   1318 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
   1319 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32>
   1320 // CHECK:   ret <2 x i32> [[TMP1]]
   1321 int32x2_t test_vqneg_s32(int32x2_t a) {
   1322   return vqneg_s32(a);
   1323 }
   1324 
   1325 // CHECK-LABEL: define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
   1326 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1327 // CHECK:   [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1328 // CHECK:   [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]]) #2
   1329 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
   1330 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32>
   1331 // CHECK:   ret <4 x i32> [[TMP1]]
   1332 int32x4_t test_vqnegq_s32(int32x4_t a) {
   1333   return vqnegq_s32(a);
   1334 }
   1335 
   1336 // CHECK-LABEL: define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 {
   1337 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   1338 // CHECK:   [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   1339 // CHECK:   [[VQNEGQ_V1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqneg.v2i64(<2 x i64> [[VQNEGQ_V_I]]) #2
   1340 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <2 x i64> [[VQNEGQ_V1_I]] to <16 x i8>
   1341 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <2 x i64>
   1342 // CHECK:   ret <2 x i64> [[TMP1]]
   1343 int64x2_t test_vqnegq_s64(int64x2_t a) {
   1344   return vqnegq_s64(a);
   1345 }
   1346 
   1347 // CHECK-LABEL: define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
   1348 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
   1349 // CHECK:   ret <8 x i8> [[SUB_I]]
   1350 int8x8_t test_vneg_s8(int8x8_t a) {
   1351   return vneg_s8(a);
   1352 }
   1353 
   1354 // CHECK-LABEL: define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
   1355 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
   1356 // CHECK:   ret <16 x i8> [[SUB_I]]
   1357 int8x16_t test_vnegq_s8(int8x16_t a) {
   1358   return vnegq_s8(a);
   1359 }
   1360 
   1361 // CHECK-LABEL: define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
   1362 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
   1363 // CHECK:   ret <4 x i16> [[SUB_I]]
   1364 int16x4_t test_vneg_s16(int16x4_t a) {
   1365   return vneg_s16(a);
   1366 }
   1367 
   1368 // CHECK-LABEL: define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
   1369 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
   1370 // CHECK:   ret <8 x i16> [[SUB_I]]
   1371 int16x8_t test_vnegq_s16(int16x8_t a) {
   1372   return vnegq_s16(a);
   1373 }
   1374 
   1375 // CHECK-LABEL: define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
   1376 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
   1377 // CHECK:   ret <2 x i32> [[SUB_I]]
   1378 int32x2_t test_vneg_s32(int32x2_t a) {
   1379   return vneg_s32(a);
   1380 }
   1381 
   1382 // CHECK-LABEL: define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
   1383 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
   1384 // CHECK:   ret <4 x i32> [[SUB_I]]
   1385 int32x4_t test_vnegq_s32(int32x4_t a) {
   1386   return vnegq_s32(a);
   1387 }
   1388 
   1389 // CHECK-LABEL: define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 {
   1390 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, %a
   1391 // CHECK:   ret <2 x i64> [[SUB_I]]
   1392 int64x2_t test_vnegq_s64(int64x2_t a) {
   1393   return vnegq_s64(a);
   1394 }
   1395 
   1396 // CHECK-LABEL: define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
   1397 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
   1398 // CHECK:   ret <2 x float> [[SUB_I]]
   1399 float32x2_t test_vneg_f32(float32x2_t a) {
   1400   return vneg_f32(a);
   1401 }
   1402 
   1403 // CHECK-LABEL: define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
   1404 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
   1405 // CHECK:   ret <4 x float> [[SUB_I]]
   1406 float32x4_t test_vnegq_f32(float32x4_t a) {
   1407   return vnegq_f32(a);
   1408 }
   1409 
   1410 // CHECK-LABEL: define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 {
   1411 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
   1412 // CHECK:   ret <2 x double> [[SUB_I]]
   1413 float64x2_t test_vnegq_f64(float64x2_t a) {
   1414   return vnegq_f64(a);
   1415 }
   1416 
   1417 // CHECK-LABEL: define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
   1418 // CHECK:   [[VABS_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %a) #2
   1419 // CHECK:   ret <8 x i8> [[VABS_I]]
   1420 int8x8_t test_vabs_s8(int8x8_t a) {
   1421   return vabs_s8(a);
   1422 }
   1423 
   1424 // CHECK-LABEL: define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
   1425 // CHECK:   [[VABS_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %a) #2
   1426 // CHECK:   ret <16 x i8> [[VABS_I]]
   1427 int8x16_t test_vabsq_s8(int8x16_t a) {
   1428   return vabsq_s8(a);
   1429 }
   1430 
   1431 // CHECK-LABEL: define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
   1432 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1433 // CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1434 // CHECK:   [[VABS1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> [[VABS_I]]) #2
   1435 // CHECK:   ret <4 x i16> [[VABS1_I]]
   1436 int16x4_t test_vabs_s16(int16x4_t a) {
   1437   return vabs_s16(a);
   1438 }
   1439 
   1440 // CHECK-LABEL: define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
   1441 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1442 // CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1443 // CHECK:   [[VABS1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> [[VABS_I]]) #2
   1444 // CHECK:   ret <8 x i16> [[VABS1_I]]
   1445 int16x8_t test_vabsq_s16(int16x8_t a) {
   1446   return vabsq_s16(a);
   1447 }
   1448 
   1449 // CHECK-LABEL: define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
   1450 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1451 // CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1452 // CHECK:   [[VABS1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> [[VABS_I]]) #2
   1453 // CHECK:   ret <2 x i32> [[VABS1_I]]
   1454 int32x2_t test_vabs_s32(int32x2_t a) {
   1455   return vabs_s32(a);
   1456 }
   1457 
   1458 // CHECK-LABEL: define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
   1459 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1460 // CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1461 // CHECK:   [[VABS1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> [[VABS_I]]) #2
   1462 // CHECK:   ret <4 x i32> [[VABS1_I]]
   1463 int32x4_t test_vabsq_s32(int32x4_t a) {
   1464   return vabsq_s32(a);
   1465 }
   1466 
   1467 // CHECK-LABEL: define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 {
   1468 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   1469 // CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   1470 // CHECK:   [[VABS1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.abs.v2i64(<2 x i64> [[VABS_I]]) #2
   1471 // CHECK:   ret <2 x i64> [[VABS1_I]]
   1472 int64x2_t test_vabsq_s64(int64x2_t a) {
   1473   return vabsq_s64(a);
   1474 }
   1475 
   1476 // CHECK-LABEL: define <2 x float> @test_vabs_f32(<2 x float> %a) #0 {
   1477 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   1478 // CHECK:   [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   1479 // CHECK:   [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]]) #2
   1480 // CHECK:   ret <2 x float> [[VABS1_I]]
   1481 float32x2_t test_vabs_f32(float32x2_t a) {
   1482   return vabs_f32(a);
   1483 }
   1484 
   1485 // CHECK-LABEL: define <4 x float> @test_vabsq_f32(<4 x float> %a) #0 {
   1486 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   1487 // CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   1488 // CHECK:   [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]]) #2
   1489 // CHECK:   ret <4 x float> [[VABS1_I]]
   1490 float32x4_t test_vabsq_f32(float32x4_t a) {
   1491   return vabsq_f32(a);
   1492 }
   1493 
   1494 // CHECK-LABEL: define <2 x double> @test_vabsq_f64(<2 x double> %a) #0 {
   1495 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   1496 // CHECK:   [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   1497 // CHECK:   [[VABS1_I:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[VABS_I]]) #2
   1498 // CHECK:   ret <2 x double> [[VABS1_I]]
   1499 float64x2_t test_vabsq_f64(float64x2_t a) {
   1500   return vabsq_f64(a);
   1501 }
   1502 
   1503 // CHECK-LABEL: define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
   1504 // CHECK:   [[VUQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #2
   1505 // CHECK:   ret <8 x i8> [[VUQADD_I]]
   1506 int8x8_t test_vuqadd_s8(int8x8_t a, int8x8_t b) {
   1507   return vuqadd_s8(a, b);
   1508 }
   1509 
   1510 // CHECK-LABEL: define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
   1511 // CHECK:   [[VUQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #2
   1512 // CHECK:   ret <16 x i8> [[VUQADD_I]]
   1513 int8x16_t test_vuqaddq_s8(int8x16_t a, int8x16_t b) {
   1514   return vuqaddq_s8(a, b);
   1515 }
   1516 
   1517 // CHECK-LABEL: define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
   1518 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1519 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
   1520 // CHECK:   [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1521 // CHECK:   [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
   1522 // CHECK:   [[VUQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[VUQADD_I]], <4 x i16> [[VUQADD1_I]]) #2
   1523 // CHECK:   ret <4 x i16> [[VUQADD2_I]]
   1524 int16x4_t test_vuqadd_s16(int16x4_t a, int16x4_t b) {
   1525   return vuqadd_s16(a, b);
   1526 }
   1527 
   1528 // CHECK-LABEL: define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
   1529 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1530 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1531 // CHECK:   [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1532 // CHECK:   [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
   1533 // CHECK:   [[VUQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> [[VUQADD_I]], <8 x i16> [[VUQADD1_I]]) #2
   1534 // CHECK:   ret <8 x i16> [[VUQADD2_I]]
   1535 int16x8_t test_vuqaddq_s16(int16x8_t a, int16x8_t b) {
   1536   return vuqaddq_s16(a, b);
   1537 }
   1538 
   1539 // CHECK-LABEL: define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
   1540 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1541 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
   1542 // CHECK:   [[VUQADD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1543 // CHECK:   [[VUQADD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
   1544 // CHECK:   [[VUQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> [[VUQADD_I]], <2 x i32> [[VUQADD1_I]]) #2
   1545 // CHECK:   ret <2 x i32> [[VUQADD2_I]]
   1546 int32x2_t test_vuqadd_s32(int32x2_t a, int32x2_t b) {
   1547   return vuqadd_s32(a, b);
   1548 }
   1549 
   1550 // CHECK-LABEL: define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
   1551 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1552 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   1553 // CHECK:   [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1554 // CHECK:   [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
   1555 // CHECK:   [[VUQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> [[VUQADD_I]], <4 x i32> [[VUQADD1_I]]) #2
   1556 // CHECK:   ret <4 x i32> [[VUQADD2_I]]
   1557 int32x4_t test_vuqaddq_s32(int32x4_t a, int32x4_t b) {
   1558   return vuqaddq_s32(a, b);
   1559 }
   1560 
   1561 // CHECK-LABEL: define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
   1562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   1563 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
   1564 // CHECK:   [[VUQADD_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   1565 // CHECK:   [[VUQADD1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
   1566 // CHECK:   [[VUQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> [[VUQADD_I]], <2 x i64> [[VUQADD1_I]]) #2
   1567 // CHECK:   ret <2 x i64> [[VUQADD2_I]]
   1568 int64x2_t test_vuqaddq_s64(int64x2_t a, int64x2_t b) {
   1569   return vuqaddq_s64(a, b);
   1570 }
   1571 
   1572 // CHECK-LABEL: define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
   1573 // CHECK:   [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %a) #2
   1574 // CHECK:   ret <8 x i8> [[VCLS_V_I]]
   1575 int8x8_t test_vcls_s8(int8x8_t a) {
   1576   return vcls_s8(a);
   1577 }
   1578 
   1579 // CHECK-LABEL: define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
   1580 // CHECK:   [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %a) #2
   1581 // CHECK:   ret <16 x i8> [[VCLSQ_V_I]]
   1582 int8x16_t test_vclsq_s8(int8x16_t a) {
   1583   return vclsq_s8(a);
   1584 }
   1585 
   1586 // CHECK-LABEL: define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
   1587 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1588 // CHECK:   [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1589 // CHECK:   [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> [[VCLS_V_I]]) #2
   1590 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
   1591 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16>
   1592 // CHECK:   ret <4 x i16> [[TMP1]]
   1593 int16x4_t test_vcls_s16(int16x4_t a) {
   1594   return vcls_s16(a);
   1595 }
   1596 
   1597 // CHECK-LABEL: define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
   1598 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1599 // CHECK:   [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1600 // CHECK:   [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> [[VCLSQ_V_I]]) #2
   1601 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
   1602 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16>
   1603 // CHECK:   ret <8 x i16> [[TMP1]]
   1604 int16x8_t test_vclsq_s16(int16x8_t a) {
   1605   return vclsq_s16(a);
   1606 }
   1607 
   1608 // CHECK-LABEL: define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
   1609 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1610 // CHECK:   [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1611 // CHECK:   [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> [[VCLS_V_I]]) #2
   1612 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
   1613 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32>
   1614 // CHECK:   ret <2 x i32> [[TMP1]]
   1615 int32x2_t test_vcls_s32(int32x2_t a) {
   1616   return vcls_s32(a);
   1617 }
   1618 
   1619 // CHECK-LABEL: define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
   1620 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1621 // CHECK:   [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1622 // CHECK:   [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> [[VCLSQ_V_I]]) #2
   1623 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
   1624 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32>
   1625 // CHECK:   ret <4 x i32> [[TMP1]]
   1626 int32x4_t test_vclsq_s32(int32x4_t a) {
   1627   return vclsq_s32(a);
   1628 }
   1629 
   1630 // CHECK-LABEL: define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
   1631 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #2
   1632 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
   1633 int8x8_t test_vclz_s8(int8x8_t a) {
   1634   return vclz_s8(a);
   1635 }
   1636 
   1637 // CHECK-LABEL: define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
   1638 // CHECK:   [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #2
   1639 // CHECK:   ret <16 x i8> [[VCLZQ_V_I]]
   1640 int8x16_t test_vclzq_s8(int8x16_t a) {
   1641   return vclzq_s8(a);
   1642 }
   1643 
   1644 // CHECK-LABEL: define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
   1645 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1646 // CHECK:   [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1647 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #2
   1648 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
   1649 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
   1650 // CHECK:   ret <4 x i16> [[TMP1]]
   1651 int16x4_t test_vclz_s16(int16x4_t a) {
   1652   return vclz_s16(a);
   1653 }
   1654 
   1655 // CHECK-LABEL: define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
   1656 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1657 // CHECK:   [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1658 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #2
   1659 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
   1660 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
   1661 // CHECK:   ret <8 x i16> [[TMP1]]
   1662 int16x8_t test_vclzq_s16(int16x8_t a) {
   1663   return vclzq_s16(a);
   1664 }
   1665 
   1666 // CHECK-LABEL: define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
   1667 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1668 // CHECK:   [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1669 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #2
   1670 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
   1671 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
   1672 // CHECK:   ret <2 x i32> [[TMP1]]
   1673 int32x2_t test_vclz_s32(int32x2_t a) {
   1674   return vclz_s32(a);
   1675 }
   1676 
   1677 // CHECK-LABEL: define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
   1678 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1679 // CHECK:   [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1680 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #2
   1681 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
   1682 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
   1683 // CHECK:   ret <4 x i32> [[TMP1]]
   1684 int32x4_t test_vclzq_s32(int32x4_t a) {
   1685   return vclzq_s32(a);
   1686 }
   1687 
   1688 // CHECK-LABEL: define <8 x i8> @test_vclz_u8(<8 x i8> %a) #0 {
   1689 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #2
   1690 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
   1691 uint8x8_t test_vclz_u8(uint8x8_t a) {
   1692   return vclz_u8(a);
   1693 }
   1694 
   1695 // CHECK-LABEL: define <16 x i8> @test_vclzq_u8(<16 x i8> %a) #0 {
   1696 // CHECK:   [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #2
   1697 // CHECK:   ret <16 x i8> [[VCLZQ_V_I]]
   1698 uint8x16_t test_vclzq_u8(uint8x16_t a) {
   1699   return vclzq_u8(a);
   1700 }
   1701 
   1702 // CHECK-LABEL: define <4 x i16> @test_vclz_u16(<4 x i16> %a) #0 {
   1703 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   1704 // CHECK:   [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   1705 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #2
   1706 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
   1707 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16>
   1708 // CHECK:   ret <4 x i16> [[TMP1]]
   1709 uint16x4_t test_vclz_u16(uint16x4_t a) {
   1710   return vclz_u16(a);
   1711 }
   1712 
   1713 // CHECK-LABEL: define <8 x i16> @test_vclzq_u16(<8 x i16> %a) #0 {
   1714 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1715 // CHECK:   [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1716 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #2
   1717 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
   1718 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16>
   1719 // CHECK:   ret <8 x i16> [[TMP1]]
   1720 uint16x8_t test_vclzq_u16(uint16x8_t a) {
   1721   return vclzq_u16(a);
   1722 }
   1723 
   1724 // CHECK-LABEL: define <2 x i32> @test_vclz_u32(<2 x i32> %a) #0 {
   1725 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   1726 // CHECK:   [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   1727 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #2
   1728 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
   1729 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32>
   1730 // CHECK:   ret <2 x i32> [[TMP1]]
   1731 uint32x2_t test_vclz_u32(uint32x2_t a) {
   1732   return vclz_u32(a);
   1733 }
   1734 
   1735 // CHECK-LABEL: define <4 x i32> @test_vclzq_u32(<4 x i32> %a) #0 {
   1736 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1737 // CHECK:   [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1738 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #2
   1739 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
   1740 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32>
   1741 // CHECK:   ret <4 x i32> [[TMP1]]
   1742 uint32x4_t test_vclzq_u32(uint32x4_t a) {
   1743   return vclzq_u32(a);
   1744 }
   1745 
   1746 // CHECK-LABEL: define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
   1747 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2
   1748 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
   1749 int8x8_t test_vcnt_s8(int8x8_t a) {
   1750   return vcnt_s8(a);
   1751 }
   1752 
   1753 // CHECK-LABEL: define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
   1754 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2
   1755 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
   1756 int8x16_t test_vcntq_s8(int8x16_t a) {
   1757   return vcntq_s8(a);
   1758 }
   1759 
   1760 // CHECK-LABEL: define <8 x i8> @test_vcnt_u8(<8 x i8> %a) #0 {
   1761 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2
   1762 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
   1763 uint8x8_t test_vcnt_u8(uint8x8_t a) {
   1764   return vcnt_u8(a);
   1765 }
   1766 
   1767 // CHECK-LABEL: define <16 x i8> @test_vcntq_u8(<16 x i8> %a) #0 {
   1768 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2
   1769 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
   1770 uint8x16_t test_vcntq_u8(uint8x16_t a) {
   1771   return vcntq_u8(a);
   1772 }
   1773 
   1774 // CHECK-LABEL: define <8 x i8> @test_vcnt_p8(<8 x i8> %a) #0 {
   1775 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #2
   1776 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
   1777 poly8x8_t test_vcnt_p8(poly8x8_t a) {
   1778   return vcnt_p8(a);
   1779 }
   1780 
   1781 // CHECK-LABEL: define <16 x i8> @test_vcntq_p8(<16 x i8> %a) #0 {
   1782 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #2
   1783 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
   1784 poly8x16_t test_vcntq_p8(poly8x16_t a) {
   1785   return vcntq_p8(a);
   1786 }
   1787 
   1788 // CHECK-LABEL: define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
   1789 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1790 // CHECK:   ret <8 x i8> [[NEG_I]]
   1791 int8x8_t test_vmvn_s8(int8x8_t a) {
   1792   return vmvn_s8(a);
   1793 }
   1794 
   1795 // CHECK-LABEL: define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
   1796 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1797 // CHECK:   ret <16 x i8> [[NEG_I]]
   1798 int8x16_t test_vmvnq_s8(int8x16_t a) {
   1799   return vmvnq_s8(a);
   1800 }
   1801 
   1802 // CHECK-LABEL: define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
   1803 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
   1804 // CHECK:   ret <4 x i16> [[NEG_I]]
   1805 int16x4_t test_vmvn_s16(int16x4_t a) {
   1806   return vmvn_s16(a);
   1807 }
   1808 
   1809 // CHECK-LABEL: define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
   1810 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   1811 // CHECK:   ret <8 x i16> [[NEG_I]]
   1812 int16x8_t test_vmvnq_s16(int16x8_t a) {
   1813   return vmvnq_s16(a);
   1814 }
   1815 
   1816 // CHECK-LABEL: define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
   1817 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
   1818 // CHECK:   ret <2 x i32> [[NEG_I]]
   1819 int32x2_t test_vmvn_s32(int32x2_t a) {
   1820   return vmvn_s32(a);
   1821 }
   1822 
   1823 // CHECK-LABEL: define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
   1824 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
   1825 // CHECK:   ret <4 x i32> [[NEG_I]]
   1826 int32x4_t test_vmvnq_s32(int32x4_t a) {
   1827   return vmvnq_s32(a);
   1828 }
   1829 
   1830 // CHECK-LABEL: define <8 x i8> @test_vmvn_u8(<8 x i8> %a) #0 {
   1831 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1832 // CHECK:   ret <8 x i8> [[NEG_I]]
   1833 uint8x8_t test_vmvn_u8(uint8x8_t a) {
   1834   return vmvn_u8(a);
   1835 }
   1836 
   1837 // CHECK-LABEL: define <16 x i8> @test_vmvnq_u8(<16 x i8> %a) #0 {
   1838 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1839 // CHECK:   ret <16 x i8> [[NEG_I]]
   1840 uint8x16_t test_vmvnq_u8(uint8x16_t a) {
   1841   return vmvnq_u8(a);
   1842 }
   1843 
   1844 // CHECK-LABEL: define <4 x i16> @test_vmvn_u16(<4 x i16> %a) #0 {
   1845 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
   1846 // CHECK:   ret <4 x i16> [[NEG_I]]
   1847 uint16x4_t test_vmvn_u16(uint16x4_t a) {
   1848   return vmvn_u16(a);
   1849 }
   1850 
   1851 // CHECK-LABEL: define <8 x i16> @test_vmvnq_u16(<8 x i16> %a) #0 {
   1852 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   1853 // CHECK:   ret <8 x i16> [[NEG_I]]
   1854 uint16x8_t test_vmvnq_u16(uint16x8_t a) {
   1855   return vmvnq_u16(a);
   1856 }
   1857 
   1858 // CHECK-LABEL: define <2 x i32> @test_vmvn_u32(<2 x i32> %a) #0 {
   1859 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
   1860 // CHECK:   ret <2 x i32> [[NEG_I]]
   1861 uint32x2_t test_vmvn_u32(uint32x2_t a) {
   1862   return vmvn_u32(a);
   1863 }
   1864 
   1865 // CHECK-LABEL: define <4 x i32> @test_vmvnq_u32(<4 x i32> %a) #0 {
   1866 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
   1867 // CHECK:   ret <4 x i32> [[NEG_I]]
   1868 uint32x4_t test_vmvnq_u32(uint32x4_t a) {
   1869   return vmvnq_u32(a);
   1870 }
   1871 
   1872 // CHECK-LABEL: define <8 x i8> @test_vmvn_p8(<8 x i8> %a) #0 {
   1873 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1874 // CHECK:   ret <8 x i8> [[NEG_I]]
   1875 poly8x8_t test_vmvn_p8(poly8x8_t a) {
   1876   return vmvn_p8(a);
   1877 }
   1878 
   1879 // CHECK-LABEL: define <16 x i8> @test_vmvnq_p8(<16 x i8> %a) #0 {
   1880 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   1881 // CHECK:   ret <16 x i8> [[NEG_I]]
   1882 poly8x16_t test_vmvnq_p8(poly8x16_t a) {
   1883   return vmvnq_p8(a);
   1884 }
   1885 
   1886 // CHECK-LABEL: define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 {
   1887 // CHECK:   [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2
   1888 // CHECK:   ret <8 x i8> [[VRBIT_I]]
   1889 int8x8_t test_vrbit_s8(int8x8_t a) {
   1890   return vrbit_s8(a);
   1891 }
   1892 
   1893 // CHECK-LABEL: define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 {
   1894 // CHECK:   [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2
   1895 // CHECK:   ret <16 x i8> [[VRBIT_I]]
   1896 int8x16_t test_vrbitq_s8(int8x16_t a) {
   1897   return vrbitq_s8(a);
   1898 }
   1899 
   1900 // CHECK-LABEL: define <8 x i8> @test_vrbit_u8(<8 x i8> %a) #0 {
   1901 // CHECK:   [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2
   1902 // CHECK:   ret <8 x i8> [[VRBIT_I]]
   1903 uint8x8_t test_vrbit_u8(uint8x8_t a) {
   1904   return vrbit_u8(a);
   1905 }
   1906 
   1907 // CHECK-LABEL: define <16 x i8> @test_vrbitq_u8(<16 x i8> %a) #0 {
   1908 // CHECK:   [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2
   1909 // CHECK:   ret <16 x i8> [[VRBIT_I]]
   1910 uint8x16_t test_vrbitq_u8(uint8x16_t a) {
   1911   return vrbitq_u8(a);
   1912 }
   1913 
   1914 // CHECK-LABEL: define <8 x i8> @test_vrbit_p8(<8 x i8> %a) #0 {
   1915 // CHECK:   [[VRBIT_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %a) #2
   1916 // CHECK:   ret <8 x i8> [[VRBIT_I]]
   1917 poly8x8_t test_vrbit_p8(poly8x8_t a) {
   1918   return vrbit_p8(a);
   1919 }
   1920 
   1921 // CHECK-LABEL: define <16 x i8> @test_vrbitq_p8(<16 x i8> %a) #0 {
   1922 // CHECK:   [[VRBIT_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %a) #2
   1923 // CHECK:   ret <16 x i8> [[VRBIT_I]]
   1924 poly8x16_t test_vrbitq_p8(poly8x16_t a) {
   1925   return vrbitq_p8(a);
   1926 }
   1927 
   1928 // CHECK-LABEL: define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
   1929 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1930 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1931 // CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
   1932 // CHECK:   ret <8 x i8> [[VMOVN_I]]
   1933 int8x8_t test_vmovn_s16(int16x8_t a) {
   1934   return vmovn_s16(a);
   1935 }
   1936 
   1937 // CHECK-LABEL: define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
   1938 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1939 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1940 // CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
   1941 // CHECK:   ret <4 x i16> [[VMOVN_I]]
   1942 int16x4_t test_vmovn_s32(int32x4_t a) {
   1943   return vmovn_s32(a);
   1944 }
   1945 
   1946 // CHECK-LABEL: define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
   1947 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   1948 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   1949 // CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
   1950 // CHECK:   ret <2 x i32> [[VMOVN_I]]
   1951 int32x2_t test_vmovn_s64(int64x2_t a) {
   1952   return vmovn_s64(a);
   1953 }
   1954 
   1955 // CHECK-LABEL: define <8 x i8> @test_vmovn_u16(<8 x i16> %a) #0 {
   1956 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   1957 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1958 // CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
   1959 // CHECK:   ret <8 x i8> [[VMOVN_I]]
   1960 uint8x8_t test_vmovn_u16(uint16x8_t a) {
   1961   return vmovn_u16(a);
   1962 }
   1963 
   1964 // CHECK-LABEL: define <4 x i16> @test_vmovn_u32(<4 x i32> %a) #0 {
   1965 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   1966 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1967 // CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
   1968 // CHECK:   ret <4 x i16> [[VMOVN_I]]
   1969 uint16x4_t test_vmovn_u32(uint32x4_t a) {
   1970   return vmovn_u32(a);
   1971 }
   1972 
   1973 // CHECK-LABEL: define <2 x i32> @test_vmovn_u64(<2 x i64> %a) #0 {
   1974 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   1975 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   1976 // CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
   1977 // CHECK:   ret <2 x i32> [[VMOVN_I]]
   1978 uint32x2_t test_vmovn_u64(uint64x2_t a) {
   1979   return vmovn_u64(a);
   1980 }
   1981 
   1982 // CHECK-LABEL: define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
   1983 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   1984 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   1985 // CHECK:   [[VMOVN_I_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
   1986 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VMOVN_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   1987 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
   1988 int8x16_t test_vmovn_high_s16(int8x8_t a, int16x8_t b) {
   1989   return vmovn_high_s16(a, b);
   1990 }
   1991 
   1992 // CHECK-LABEL: define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
   1993 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   1994 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   1995 // CHECK:   [[VMOVN_I_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
   1996 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VMOVN_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1997 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
   1998 int16x8_t test_vmovn_high_s32(int16x4_t a, int32x4_t b) {
   1999   return vmovn_high_s32(a, b);
   2000 }
   2001 
   2002 // CHECK-LABEL: define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
   2003 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
   2004 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   2005 // CHECK:   [[VMOVN_I_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
   2006 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VMOVN_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2007 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
   2008 int32x4_t test_vmovn_high_s64(int32x2_t a, int64x2_t b) {
   2009   return vmovn_high_s64(a, b);
   2010 }
   2011 
   2012 // CHECK-LABEL: define <16 x i8> @test_vmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
   2013 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   2014 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   2015 // CHECK:   [[VMOVN_I_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8>
   2016 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VMOVN_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2017 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
   2018 int8x16_t test_vmovn_high_u16(int8x8_t a, int16x8_t b) {
   2019   return vmovn_high_u16(a, b);
   2020 }
   2021 
   2022 // CHECK-LABEL: define <8 x i16> @test_vmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
   2023 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   2024 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   2025 // CHECK:   [[VMOVN_I_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16>
   2026 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VMOVN_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2027 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
   2028 int16x8_t test_vmovn_high_u32(int16x4_t a, int32x4_t b) {
   2029   return vmovn_high_u32(a, b);
   2030 }
   2031 
   2032 // CHECK-LABEL: define <4 x i32> @test_vmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
   2033 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
   2034 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   2035 // CHECK:   [[VMOVN_I_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
   2036 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VMOVN_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2037 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
   2038 int32x4_t test_vmovn_high_u64(int32x2_t a, int64x2_t b) {
   2039   return vmovn_high_u64(a, b);
   2040 }
   2041 
   2042 // CHECK-LABEL: define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
   2043 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   2044 // CHECK:   [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   2045 // CHECK:   [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[VQMOVUN_V_I]]) #2
   2046 // CHECK:   ret <8 x i8> [[VQMOVUN_V1_I]]
   2047 int8x8_t test_vqmovun_s16(int16x8_t a) {
   2048   return vqmovun_s16(a);
   2049 }
   2050 
   2051 // CHECK-LABEL: define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
   2052 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   2053 // CHECK:   [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   2054 // CHECK:   [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[VQMOVUN_V_I]]) #2
   2055 // CHECK:   [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
   2056 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16>
   2057 // CHECK:   ret <4 x i16> [[TMP1]]
   2058 int16x4_t test_vqmovun_s32(int32x4_t a) {
   2059   return vqmovun_s32(a);
   2060 }
   2061 
   2062 // CHECK-LABEL: define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
   2063 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   2064 // CHECK:   [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   2065 // CHECK:   [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[VQMOVUN_V_I]]) #2
   2066 // CHECK:   [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
   2067 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32>
   2068 // CHECK:   ret <2 x i32> [[TMP1]]
   2069 int32x2_t test_vqmovun_s64(int64x2_t a) {
   2070   return vqmovun_s64(a);
   2071 }
   2072 
   2073 // CHECK-LABEL: define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
   2074 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   2075 // CHECK:   [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   2076 // CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[VQMOVUN_V_I_I]]) #2
   2077 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVUN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2078 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
   2079 int8x16_t test_vqmovun_high_s16(int8x8_t a, int16x8_t b) {
   2080   return vqmovun_high_s16(a, b);
   2081 }
   2082 
   2083 // CHECK-LABEL: define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
   2084 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   2085 // CHECK:   [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   2086 // CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[VQMOVUN_V_I_I]]) #2
   2087 // CHECK:   [[VQMOVUN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I_I]] to <8 x i8>
   2088 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I_I]] to <4 x i16>
   2089 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2090 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
   2091 int16x8_t test_vqmovun_high_s32(int16x4_t a, int32x4_t b) {
   2092   return vqmovun_high_s32(a, b);
   2093 }
   2094 
   2095 // CHECK-LABEL: define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
   2096 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
   2097 // CHECK:   [[VQMOVUN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   2098 // CHECK:   [[VQMOVUN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> [[VQMOVUN_V_I_I]]) #2
   2099 // CHECK:   [[VQMOVUN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I_I]] to <8 x i8>
   2100 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I_I]] to <2 x i32>
   2101 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2102 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
   2103 int32x4_t test_vqmovun_high_s64(int32x2_t a, int64x2_t b) {
   2104   return vqmovun_high_s64(a, b);
   2105 }
   2106 
   2107 // CHECK-LABEL: define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
   2108 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   2109 // CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   2110 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]]) #2
   2111 // CHECK:   ret <8 x i8> [[VQMOVN_V1_I]]
   2112 int8x8_t test_vqmovn_s16(int16x8_t a) {
   2113   return vqmovn_s16(a);
   2114 }
   2115 
   2116 // CHECK-LABEL: define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
   2117 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   2118 // CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   2119 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]]) #2
   2120 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
   2121 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
   2122 // CHECK:   ret <4 x i16> [[TMP1]]
   2123 int16x4_t test_vqmovn_s32(int32x4_t a) {
   2124   return vqmovn_s32(a);
   2125 }
   2126 
   2127 // CHECK-LABEL: define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
   2128 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   2129 // CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   2130 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]]) #2
   2131 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
   2132 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
   2133 // CHECK:   ret <2 x i32> [[TMP1]]
   2134 int32x2_t test_vqmovn_s64(int64x2_t a) {
   2135   return vqmovn_s64(a);
   2136 }
   2137 
   2138 // CHECK-LABEL: define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
   2139 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   2140 // CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   2141 // CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[VQMOVN_V_I_I]]) #2
   2142 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2143 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
   2144 int8x16_t test_vqmovn_high_s16(int8x8_t a, int16x8_t b) {
   2145   return vqmovn_high_s16(a, b);
   2146 }
   2147 
   2148 // CHECK-LABEL: define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
   2149 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   2150 // CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   2151 // CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[VQMOVN_V_I_I]]) #2
   2152 // CHECK:   [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8>
   2153 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <4 x i16>
   2154 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2155 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
   2156 int16x8_t test_vqmovn_high_s32(int16x4_t a, int32x4_t b) {
   2157   return vqmovn_high_s32(a, b);
   2158 }
   2159 
   2160 // CHECK-LABEL: define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
   2161 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
   2162 // CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   2163 // CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> [[VQMOVN_V_I_I]]) #2
   2164 // CHECK:   [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8>
   2165 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <2 x i32>
   2166 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2167 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
   2168 int32x4_t test_vqmovn_high_s64(int32x2_t a, int64x2_t b) {
   2169   return vqmovn_high_s64(a, b);
   2170 }
   2171 
   2172 // CHECK-LABEL: define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
   2173 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
   2174 // CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   2175 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[VQMOVN_V_I]]) #2
   2176 // CHECK:   ret <8 x i8> [[VQMOVN_V1_I]]
   2177 uint8x8_t test_vqmovn_u16(uint16x8_t a) {
   2178   return vqmovn_u16(a);
   2179 }
   2180 
   2181 // CHECK-LABEL: define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
   2182 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   2183 // CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   2184 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[VQMOVN_V_I]]) #2
   2185 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
   2186 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16>
   2187 // CHECK:   ret <4 x i16> [[TMP1]]
   2188 uint16x4_t test_vqmovn_u32(uint32x4_t a) {
   2189   return vqmovn_u32(a);
   2190 }
   2191 
   2192 // CHECK-LABEL: define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
   2193 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   2194 // CHECK:   [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   2195 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[VQMOVN_V_I]]) #2
   2196 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
   2197 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32>
   2198 // CHECK:   ret <2 x i32> [[TMP1]]
   2199 uint32x2_t test_vqmovn_u64(uint64x2_t a) {
   2200   return vqmovn_u64(a);
   2201 }
   2202 
   2203 // CHECK-LABEL: define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
   2204 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
   2205 // CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
   2206 // CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[VQMOVN_V_I_I]]) #2
   2207 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQMOVN_V1_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2208 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
   2209 uint8x16_t test_vqmovn_high_u16(uint8x8_t a, uint16x8_t b) {
   2210   return vqmovn_high_u16(a, b);
   2211 }
   2212 
   2213 // CHECK-LABEL: define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
   2214 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
   2215 // CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   2216 // CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[VQMOVN_V_I_I]]) #2
   2217 // CHECK:   [[VQMOVN_V2_I_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I_I]] to <8 x i8>
   2218 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <4 x i16>
   2219 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2220 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
   2221 uint16x8_t test_vqmovn_high_u32(uint16x4_t a, uint32x4_t b) {
   2222   return vqmovn_high_u32(a, b);
   2223 }
   2224 
   2225 // CHECK-LABEL: define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
   2226 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
   2227 // CHECK:   [[VQMOVN_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   2228 // CHECK:   [[VQMOVN_V1_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> [[VQMOVN_V_I_I]]) #2
   2229 // CHECK:   [[VQMOVN_V2_I_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I_I]] to <8 x i8>
   2230 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I_I]] to <2 x i32>
   2231 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2232 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
   2233 uint32x4_t test_vqmovn_high_u64(uint32x2_t a, uint64x2_t b) {
   2234   return vqmovn_high_u64(a, b);
   2235 }
   2236 
   2237 // CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
   2238 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
   2239 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   2240 // CHECK:   ret <8 x i16> [[VSHLL_N]]
   2241 int16x8_t test_vshll_n_s8(int8x8_t a) {
   2242   return vshll_n_s8(a, 8);
   2243 }
   2244 
   2245 // CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
   2246 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   2247 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   2248 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
   2249 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
   2250 // CHECK:   ret <4 x i32> [[VSHLL_N]]
   2251 int32x4_t test_vshll_n_s16(int16x4_t a) {
   2252   return vshll_n_s16(a, 16);
   2253 }
   2254 
   2255 // CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
   2256 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   2257 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   2258 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
   2259 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 32, i64 32>
   2260 // CHECK:   ret <2 x i64> [[VSHLL_N]]
   2261 int64x2_t test_vshll_n_s32(int32x2_t a) {
   2262   return vshll_n_s32(a, 32);
   2263 }
   2264 
   2265 // CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
   2266 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
   2267 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   2268 // CHECK:   ret <8 x i16> [[VSHLL_N]]
   2269 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
   2270   return vshll_n_u8(a, 8);
   2271 }
   2272 
   2273 // CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
   2274 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
   2275 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   2276 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
   2277 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
   2278 // CHECK:   ret <4 x i32> [[VSHLL_N]]
   2279 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
   2280   return vshll_n_u16(a, 16);
   2281 }
   2282 
   2283 // CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
   2284 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   2285 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   2286 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
   2287 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 32, i64 32>
   2288 // CHECK:   ret <2 x i64> [[VSHLL_N]]
   2289 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
   2290   return vshll_n_u32(a, 32);
   2291 }
   2292 
   2293 // CHECK-LABEL: define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
   2294 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2295 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
   2296 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   2297 // CHECK:   ret <8 x i16> [[VSHLL_N]]
   2298 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
   2299   return vshll_high_n_s8(a, 8);
   2300 }
   2301 
   2302 // CHECK-LABEL: define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
   2303 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2304 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
   2305 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   2306 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
   2307 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
   2308 // CHECK:   ret <4 x i32> [[VSHLL_N]]
   2309 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
   2310   return vshll_high_n_s16(a, 16);
   2311 }
   2312 
   2313 // CHECK-LABEL: define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
   2314 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
   2315 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
   2316 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   2317 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
   2318 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 32, i64 32>
   2319 // CHECK:   ret <2 x i64> [[VSHLL_N]]
   2320 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
   2321   return vshll_high_n_s32(a, 32);
   2322 }
   2323 
   2324 // CHECK-LABEL: define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
   2325 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2326 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
   2327 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   2328 // CHECK:   ret <8 x i16> [[VSHLL_N]]
   2329 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
   2330   return vshll_high_n_u8(a, 8);
   2331 }
   2332 
   2333 // CHECK-LABEL: define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
   2334 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2335 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
   2336 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   2337 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
   2338 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
   2339 // CHECK:   ret <4 x i32> [[VSHLL_N]]
   2340 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
   2341   return vshll_high_n_u16(a, 16);
   2342 }
   2343 
   2344 // CHECK-LABEL: define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
   2345 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
   2346 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
   2347 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   2348 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
   2349 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 32, i64 32>
   2350 // CHECK:   ret <2 x i64> [[VSHLL_N]]
   2351 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
   2352   return vshll_high_n_u32(a, 32);
   2353 }
   2354 
   2355 // CHECK-LABEL: define <4 x half> @test_vcvt_f16_f32(<4 x float> %a) #0 {
   2356 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2357 // CHECK:   [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2358 // CHECK:   [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]]) #2
   2359 // CHECK:   [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
   2360 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
   2361 // CHECK:   ret <4 x half> [[TMP1]]
   2362 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
   2363   return vcvt_f16_f32(a);
   2364 }
   2365 
   2366 // CHECK-LABEL: define <8 x half> @test_vcvt_high_f16_f32(<4 x half> %a, <4 x float> %b) #0 {
   2367 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
   2368 // CHECK:   [[VCVT_F16_F32_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2369 // CHECK:   [[VCVT_F16_F321_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I_I]]) #2
   2370 // CHECK:   [[VCVT_F16_F322_I_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I_I]] to <8 x i8>
   2371 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I_I]] to <4 x half>
   2372 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x half> %a, <4 x half> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2373 // CHECK:   ret <8 x half> [[SHUFFLE_I_I]]
   2374 float16x8_t test_vcvt_high_f16_f32(float16x4_t a, float32x4_t b) {
   2375   return vcvt_high_f16_f32(a, b);
   2376 }
   2377 
   2378 // CHECK-LABEL: define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 {
   2379 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2380 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2381 // CHECK:   [[VCVT_I:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
   2382 // CHECK:   ret <2 x float> [[VCVT_I]]
   2383 float32x2_t test_vcvt_f32_f64(float64x2_t a) {
   2384   return vcvt_f32_f64(a);
   2385 }
   2386 
   2387 // CHECK-LABEL: define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
   2388 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
   2389 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2390 // CHECK:   [[VCVT_I_I:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
   2391 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x float> %a, <2 x float> [[VCVT_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2392 // CHECK:   ret <4 x float> [[SHUFFLE_I_I]]
   2393 float32x4_t test_vcvt_high_f32_f64(float32x2_t a, float64x2_t b) {
   2394   return vcvt_high_f32_f64(a, b);
   2395 }
   2396 
   2397 // CHECK-LABEL: define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
   2398 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2399 // CHECK:   [[VCVTX_F32_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2400 // CHECK:   [[VCVTX_F32_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[VCVTX_F32_V_I]]) #2
   2401 // CHECK:   ret <2 x float> [[VCVTX_F32_V1_I]]
   2402 float32x2_t test_vcvtx_f32_f64(float64x2_t a) {
   2403   return vcvtx_f32_f64(a);
   2404 }
   2405 
   2406 // CHECK-LABEL: define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
   2407 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
   2408 // CHECK:   [[VCVTX_F32_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2409 // CHECK:   [[VCVTX_F32_V1_I_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[VCVTX_F32_V_I_I]]) #2
   2410 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x float> %a, <2 x float> [[VCVTX_F32_V1_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2411 // CHECK:   ret <4 x float> [[SHUFFLE_I_I]]
   2412 float32x4_t test_vcvtx_high_f32_f64(float32x2_t a, float64x2_t b) {
   2413   return vcvtx_high_f32_f64(a, b);
   2414 }
   2415 
   2416 // CHECK-LABEL: define <4 x float> @test_vcvt_f32_f16(<4 x half> %a) #0 {
   2417 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
   2418 // CHECK:   [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   2419 // CHECK:   [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #2
   2420 // CHECK:   [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
   2421 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x float>
   2422 // CHECK:   ret <4 x float> [[TMP1]]
   2423 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
   2424   return vcvt_f32_f16(a);
   2425 }
   2426 
   2427 // CHECK-LABEL: define <4 x float> @test_vcvt_high_f32_f16(<8 x half> %a) #0 {
   2428 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2429 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> [[SHUFFLE_I_I]] to <8 x i8>
   2430 // CHECK:   [[VCVT_F32_F16_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
   2431 // CHECK:   [[VCVT_F32_F161_I_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I_I]]) #2
   2432 // CHECK:   [[VCVT_F32_F162_I_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I_I]] to <16 x i8>
   2433 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I_I]] to <4 x float>
   2434 // CHECK:   ret <4 x float> [[TMP1]]
   2435 float32x4_t test_vcvt_high_f32_f16(float16x8_t a) {
   2436   return vcvt_high_f32_f16(a);
   2437 }
   2438 
   2439 // CHECK-LABEL: define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 {
   2440 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2441 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2442 // CHECK:   [[VCVT_I:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
   2443 // CHECK:   ret <2 x double> [[VCVT_I]]
   2444 float64x2_t test_vcvt_f64_f32(float32x2_t a) {
   2445   return vcvt_f64_f32(a);
   2446 }
   2447 
   2448 // CHECK-LABEL: define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 {
   2449 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
   2450 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[SHUFFLE_I_I]] to <8 x i8>
   2451 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2452 // CHECK:   [[VCVT_I_I:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
   2453 // CHECK:   ret <2 x double> [[VCVT_I_I]]
   2454 float64x2_t test_vcvt_high_f64_f32(float32x4_t a) {
   2455   return vcvt_high_f64_f32(a);
   2456 }
   2457 
   2458 // CHECK-LABEL: define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 {
   2459 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2460 // CHECK:   [[VRNDN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2461 // CHECK:   [[VRNDN1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float> [[VRNDN_I]]) #2
   2462 // CHECK:   ret <2 x float> [[VRNDN1_I]]
   2463 float32x2_t test_vrndn_f32(float32x2_t a) {
   2464   return vrndn_f32(a);
   2465 }
   2466 
   2467 // CHECK-LABEL: define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 {
   2468 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2469 // CHECK:   [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2470 // CHECK:   [[VRNDN1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float> [[VRNDN_I]]) #2
   2471 // CHECK:   ret <4 x float> [[VRNDN1_I]]
   2472 float32x4_t test_vrndnq_f32(float32x4_t a) {
   2473   return vrndnq_f32(a);
   2474 }
   2475 
   2476 // CHECK-LABEL: define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 {
   2477 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2478 // CHECK:   [[VRNDN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2479 // CHECK:   [[VRNDN1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frintn.v2f64(<2 x double> [[VRNDN_I]]) #2
   2480 // CHECK:   ret <2 x double> [[VRNDN1_I]]
   2481 float64x2_t test_vrndnq_f64(float64x2_t a) {
   2482   return vrndnq_f64(a);
   2483 }
   2484 
   2485 // CHECK-LABEL: define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 {
   2486 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2487 // CHECK:   [[VRNDA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2488 // CHECK:   [[VRNDA1_I:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[VRNDA_I]]) #2
   2489 // CHECK:   ret <2 x float> [[VRNDA1_I]]
   2490 float32x2_t test_vrnda_f32(float32x2_t a) {
   2491   return vrnda_f32(a);
   2492 }
   2493 
   2494 // CHECK-LABEL: define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 {
   2495 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2496 // CHECK:   [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2497 // CHECK:   [[VRNDA1_I:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[VRNDA_I]]) #2
   2498 // CHECK:   ret <4 x float> [[VRNDA1_I]]
   2499 float32x4_t test_vrndaq_f32(float32x4_t a) {
   2500   return vrndaq_f32(a);
   2501 }
   2502 
   2503 // CHECK-LABEL: define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 {
   2504 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2505 // CHECK:   [[VRNDA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2506 // CHECK:   [[VRNDA1_I:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[VRNDA_I]]) #2
   2507 // CHECK:   ret <2 x double> [[VRNDA1_I]]
   2508 float64x2_t test_vrndaq_f64(float64x2_t a) {
   2509   return vrndaq_f64(a);
   2510 }
   2511 
   2512 // CHECK-LABEL: define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 {
   2513 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2514 // CHECK:   [[VRNDP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2515 // CHECK:   [[VRNDP1_I:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[VRNDP_I]]) #2
   2516 // CHECK:   ret <2 x float> [[VRNDP1_I]]
   2517 float32x2_t test_vrndp_f32(float32x2_t a) {
   2518   return vrndp_f32(a);
   2519 }
   2520 
   2521 // CHECK-LABEL: define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 {
   2522 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2523 // CHECK:   [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2524 // CHECK:   [[VRNDP1_I:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[VRNDP_I]]) #2
   2525 // CHECK:   ret <4 x float> [[VRNDP1_I]]
   2526 float32x4_t test_vrndpq_f32(float32x4_t a) {
   2527   return vrndpq_f32(a);
   2528 }
   2529 
   2530 // CHECK-LABEL: define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 {
   2531 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2532 // CHECK:   [[VRNDP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2533 // CHECK:   [[VRNDP1_I:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[VRNDP_I]]) #2
   2534 // CHECK:   ret <2 x double> [[VRNDP1_I]]
   2535 float64x2_t test_vrndpq_f64(float64x2_t a) {
   2536   return vrndpq_f64(a);
   2537 }
   2538 
   2539 // CHECK-LABEL: define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 {
   2540 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2541 // CHECK:   [[VRNDM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2542 // CHECK:   [[VRNDM1_I:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[VRNDM_I]]) #2
   2543 // CHECK:   ret <2 x float> [[VRNDM1_I]]
   2544 float32x2_t test_vrndm_f32(float32x2_t a) {
   2545   return vrndm_f32(a);
   2546 }
   2547 
   2548 // CHECK-LABEL: define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 {
   2549 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2550 // CHECK:   [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2551 // CHECK:   [[VRNDM1_I:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[VRNDM_I]]) #2
   2552 // CHECK:   ret <4 x float> [[VRNDM1_I]]
   2553 float32x4_t test_vrndmq_f32(float32x4_t a) {
   2554   return vrndmq_f32(a);
   2555 }
   2556 
   2557 // CHECK-LABEL: define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 {
   2558 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2559 // CHECK:   [[VRNDM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2560 // CHECK:   [[VRNDM1_I:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[VRNDM_I]]) #2
   2561 // CHECK:   ret <2 x double> [[VRNDM1_I]]
   2562 float64x2_t test_vrndmq_f64(float64x2_t a) {
   2563   return vrndmq_f64(a);
   2564 }
   2565 
   2566 // CHECK-LABEL: define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 {
   2567 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2568 // CHECK:   [[VRNDX_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2569 // CHECK:   [[VRNDX1_I:%.*]] = call <2 x float> @llvm.rint.v2f32(<2 x float> [[VRNDX_I]]) #2
   2570 // CHECK:   ret <2 x float> [[VRNDX1_I]]
   2571 float32x2_t test_vrndx_f32(float32x2_t a) {
   2572   return vrndx_f32(a);
   2573 }
   2574 
   2575 // CHECK-LABEL: define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 {
   2576 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2577 // CHECK:   [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2578 // CHECK:   [[VRNDX1_I:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[VRNDX_I]]) #2
   2579 // CHECK:   ret <4 x float> [[VRNDX1_I]]
   2580 float32x4_t test_vrndxq_f32(float32x4_t a) {
   2581   return vrndxq_f32(a);
   2582 }
   2583 
   2584 // CHECK-LABEL: define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 {
   2585 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2586 // CHECK:   [[VRNDX_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2587 // CHECK:   [[VRNDX1_I:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[VRNDX_I]]) #2
   2588 // CHECK:   ret <2 x double> [[VRNDX1_I]]
   2589 float64x2_t test_vrndxq_f64(float64x2_t a) {
   2590   return vrndxq_f64(a);
   2591 }
   2592 
   2593 // CHECK-LABEL: define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 {
   2594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2595 // CHECK:   [[VRNDZ_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2596 // CHECK:   [[VRNDZ1_I:%.*]] = call <2 x float> @llvm.trunc.v2f32(<2 x float> [[VRNDZ_I]]) #2
   2597 // CHECK:   ret <2 x float> [[VRNDZ1_I]]
   2598 float32x2_t test_vrnd_f32(float32x2_t a) {
   2599   return vrnd_f32(a);
   2600 }
   2601 
   2602 // CHECK-LABEL: define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 {
   2603 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2604 // CHECK:   [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2605 // CHECK:   [[VRNDZ1_I:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[VRNDZ_I]]) #2
   2606 // CHECK:   ret <4 x float> [[VRNDZ1_I]]
   2607 float32x4_t test_vrndq_f32(float32x4_t a) {
   2608   return vrndq_f32(a);
   2609 }
   2610 
   2611 // CHECK-LABEL: define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 {
   2612 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2613 // CHECK:   [[VRNDZ_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2614 // CHECK:   [[VRNDZ1_I:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[VRNDZ_I]]) #2
   2615 // CHECK:   ret <2 x double> [[VRNDZ1_I]]
   2616 float64x2_t test_vrndq_f64(float64x2_t a) {
   2617   return vrndq_f64(a);
   2618 }
   2619 
   2620 // CHECK-LABEL: define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 {
   2621 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2622 // CHECK:   [[VRNDI_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2623 // CHECK:   [[VRNDI1_I:%.*]] = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> [[VRNDI_I]]) #2
   2624 // CHECK:   ret <2 x float> [[VRNDI1_I]]
   2625 float32x2_t test_vrndi_f32(float32x2_t a) {
   2626   return vrndi_f32(a);
   2627 }
   2628 
   2629 // CHECK-LABEL: define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 {
   2630 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2631 // CHECK:   [[VRNDI_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2632 // CHECK:   [[VRNDI1_I:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[VRNDI_I]]) #2
   2633 // CHECK:   ret <4 x float> [[VRNDI1_I]]
   2634 float32x4_t test_vrndiq_f32(float32x4_t a) {
   2635   return vrndiq_f32(a);
   2636 }
   2637 
   2638 // CHECK-LABEL: define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 {
   2639 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2640 // CHECK:   [[VRNDI_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2641 // CHECK:   [[VRNDI1_I:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[VRNDI_I]]) #2
   2642 // CHECK:   ret <2 x double> [[VRNDI1_I]]
   2643 float64x2_t test_vrndiq_f64(float64x2_t a) {
   2644   return vrndiq_f64(a);
   2645 }
   2646 
   2647 // CHECK-LABEL: define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
   2648 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2649 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2650 // CHECK:   [[TMP2:%.*]] = fptosi <2 x float> [[TMP1]] to <2 x i32>
   2651 // CHECK:   ret <2 x i32> [[TMP2]]
   2652 int32x2_t test_vcvt_s32_f32(float32x2_t a) {
   2653   return vcvt_s32_f32(a);
   2654 }
   2655 
   2656 // CHECK-LABEL: define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
   2657 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2658 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2659 // CHECK:   [[TMP2:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
   2660 // CHECK:   ret <4 x i32> [[TMP2]]
   2661 int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
   2662   return vcvtq_s32_f32(a);
   2663 }
   2664 
   2665 // CHECK-LABEL: define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 {
   2666 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2667 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2668 // CHECK:   [[TMP2:%.*]] = fptosi <2 x double> [[TMP1]] to <2 x i64>
   2669 // CHECK:   ret <2 x i64> [[TMP2]]
   2670 int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
   2671   return vcvtq_s64_f64(a);
   2672 }
   2673 
   2674 // CHECK-LABEL: define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
   2675 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2676 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2677 // CHECK:   [[TMP2:%.*]] = fptoui <2 x float> [[TMP1]] to <2 x i32>
   2678 // CHECK:   ret <2 x i32> [[TMP2]]
   2679 uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
   2680   return vcvt_u32_f32(a);
   2681 }
   2682 
   2683 // CHECK-LABEL: define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
   2684 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2685 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2686 // CHECK:   [[TMP2:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32>
   2687 // CHECK:   ret <4 x i32> [[TMP2]]
   2688 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
   2689   return vcvtq_u32_f32(a);
   2690 }
   2691 
   2692 // CHECK-LABEL: define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 {
   2693 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2694 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2695 // CHECK:   [[TMP2:%.*]] = fptoui <2 x double> [[TMP1]] to <2 x i64>
   2696 // CHECK:   ret <2 x i64> [[TMP2]]
   2697 uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
   2698   return vcvtq_u64_f64(a);
   2699 }
   2700 
   2701 // CHECK-LABEL: define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 {
   2702 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2703 // CHECK:   [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2704 // CHECK:   [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> [[VCVTN_I]]) #2
   2705 // CHECK:   ret <2 x i32> [[VCVTN1_I]]
   2706 int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
   2707   return vcvtn_s32_f32(a);
   2708 }
   2709 
   2710 // CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 {
   2711 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2712 // CHECK:   [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2713 // CHECK:   [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> [[VCVTN_I]]) #2
   2714 // CHECK:   ret <4 x i32> [[VCVTN1_I]]
   2715 int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
   2716   return vcvtnq_s32_f32(a);
   2717 }
   2718 
   2719 // CHECK-LABEL: define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) #0 {
   2720 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2721 // CHECK:   [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2722 // CHECK:   [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> [[VCVTN_I]]) #2
   2723 // CHECK:   ret <2 x i64> [[VCVTN1_I]]
   2724 int64x2_t test_vcvtnq_s64_f64(float64x2_t a) {
   2725   return vcvtnq_s64_f64(a);
   2726 }
   2727 
   2728 // CHECK-LABEL: define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 {
   2729 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2730 // CHECK:   [[VCVTN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2731 // CHECK:   [[VCVTN1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> [[VCVTN_I]]) #2
   2732 // CHECK:   ret <2 x i32> [[VCVTN1_I]]
   2733 uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
   2734   return vcvtn_u32_f32(a);
   2735 }
   2736 
   2737 // CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 {
   2738 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2739 // CHECK:   [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2740 // CHECK:   [[VCVTN1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> [[VCVTN_I]]) #2
   2741 // CHECK:   ret <4 x i32> [[VCVTN1_I]]
   2742 uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
   2743   return vcvtnq_u32_f32(a);
   2744 }
   2745 
   2746 // CHECK-LABEL: define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) #0 {
   2747 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2748 // CHECK:   [[VCVTN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2749 // CHECK:   [[VCVTN1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> [[VCVTN_I]]) #2
   2750 // CHECK:   ret <2 x i64> [[VCVTN1_I]]
   2751 uint64x2_t test_vcvtnq_u64_f64(float64x2_t a) {
   2752   return vcvtnq_u64_f64(a);
   2753 }
   2754 
   2755 // CHECK-LABEL: define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 {
   2756 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2757 // CHECK:   [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2758 // CHECK:   [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> [[VCVTP_I]]) #2
   2759 // CHECK:   ret <2 x i32> [[VCVTP1_I]]
   2760 int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
   2761   return vcvtp_s32_f32(a);
   2762 }
   2763 
   2764 // CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 {
   2765 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2766 // CHECK:   [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2767 // CHECK:   [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> [[VCVTP_I]]) #2
   2768 // CHECK:   ret <4 x i32> [[VCVTP1_I]]
   2769 int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
   2770   return vcvtpq_s32_f32(a);
   2771 }
   2772 
   2773 // CHECK-LABEL: define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) #0 {
   2774 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2775 // CHECK:   [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2776 // CHECK:   [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> [[VCVTP_I]]) #2
   2777 // CHECK:   ret <2 x i64> [[VCVTP1_I]]
   2778 int64x2_t test_vcvtpq_s64_f64(float64x2_t a) {
   2779   return vcvtpq_s64_f64(a);
   2780 }
   2781 
   2782 // CHECK-LABEL: define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 {
   2783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2784 // CHECK:   [[VCVTP_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2785 // CHECK:   [[VCVTP1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> [[VCVTP_I]]) #2
   2786 // CHECK:   ret <2 x i32> [[VCVTP1_I]]
   2787 uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
   2788   return vcvtp_u32_f32(a);
   2789 }
   2790 
   2791 // CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 {
   2792 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2793 // CHECK:   [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2794 // CHECK:   [[VCVTP1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> [[VCVTP_I]]) #2
   2795 // CHECK:   ret <4 x i32> [[VCVTP1_I]]
   2796 uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
   2797   return vcvtpq_u32_f32(a);
   2798 }
   2799 
   2800 // CHECK-LABEL: define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) #0 {
   2801 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2802 // CHECK:   [[VCVTP_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2803 // CHECK:   [[VCVTP1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> [[VCVTP_I]]) #2
   2804 // CHECK:   ret <2 x i64> [[VCVTP1_I]]
   2805 uint64x2_t test_vcvtpq_u64_f64(float64x2_t a) {
   2806   return vcvtpq_u64_f64(a);
   2807 }
   2808 
   2809 // CHECK-LABEL: define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 {
   2810 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2811 // CHECK:   [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2812 // CHECK:   [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> [[VCVTM_I]]) #2
   2813 // CHECK:   ret <2 x i32> [[VCVTM1_I]]
   2814 int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
   2815   return vcvtm_s32_f32(a);
   2816 }
   2817 
   2818 // CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 {
   2819 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2820 // CHECK:   [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2821 // CHECK:   [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> [[VCVTM_I]]) #2
   2822 // CHECK:   ret <4 x i32> [[VCVTM1_I]]
   2823 int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
   2824   return vcvtmq_s32_f32(a);
   2825 }
   2826 
   2827 // CHECK-LABEL: define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) #0 {
   2828 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2829 // CHECK:   [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2830 // CHECK:   [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> [[VCVTM_I]]) #2
   2831 // CHECK:   ret <2 x i64> [[VCVTM1_I]]
   2832 int64x2_t test_vcvtmq_s64_f64(float64x2_t a) {
   2833   return vcvtmq_s64_f64(a);
   2834 }
   2835 
   2836 // CHECK-LABEL: define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 {
   2837 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2838 // CHECK:   [[VCVTM_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2839 // CHECK:   [[VCVTM1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> [[VCVTM_I]]) #2
   2840 // CHECK:   ret <2 x i32> [[VCVTM1_I]]
   2841 uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
   2842   return vcvtm_u32_f32(a);
   2843 }
   2844 
   2845 // CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 {
   2846 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2847 // CHECK:   [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2848 // CHECK:   [[VCVTM1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> [[VCVTM_I]]) #2
   2849 // CHECK:   ret <4 x i32> [[VCVTM1_I]]
   2850 uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
   2851   return vcvtmq_u32_f32(a);
   2852 }
   2853 
   2854 // CHECK-LABEL: define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) #0 {
   2855 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2856 // CHECK:   [[VCVTM_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2857 // CHECK:   [[VCVTM1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> [[VCVTM_I]]) #2
   2858 // CHECK:   ret <2 x i64> [[VCVTM1_I]]
   2859 uint64x2_t test_vcvtmq_u64_f64(float64x2_t a) {
   2860   return vcvtmq_u64_f64(a);
   2861 }
   2862 
   2863 // CHECK-LABEL: define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 {
   2864 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2865 // CHECK:   [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2866 // CHECK:   [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> [[VCVTA_I]]) #2
   2867 // CHECK:   ret <2 x i32> [[VCVTA1_I]]
   2868 int32x2_t test_vcvta_s32_f32(float32x2_t a) {
   2869   return vcvta_s32_f32(a);
   2870 }
   2871 
   2872 // CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 {
   2873 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2874 // CHECK:   [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2875 // CHECK:   [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> [[VCVTA_I]]) #2
   2876 // CHECK:   ret <4 x i32> [[VCVTA1_I]]
   2877 int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
   2878   return vcvtaq_s32_f32(a);
   2879 }
   2880 
   2881 // CHECK-LABEL: define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) #0 {
   2882 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2883 // CHECK:   [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2884 // CHECK:   [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> [[VCVTA_I]]) #2
   2885 // CHECK:   ret <2 x i64> [[VCVTA1_I]]
   2886 int64x2_t test_vcvtaq_s64_f64(float64x2_t a) {
   2887   return vcvtaq_s64_f64(a);
   2888 }
   2889 
   2890 // CHECK-LABEL: define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 {
   2891 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2892 // CHECK:   [[VCVTA_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2893 // CHECK:   [[VCVTA1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> [[VCVTA_I]]) #2
   2894 // CHECK:   ret <2 x i32> [[VCVTA1_I]]
   2895 uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
   2896   return vcvta_u32_f32(a);
   2897 }
   2898 
   2899 // CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 {
   2900 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2901 // CHECK:   [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2902 // CHECK:   [[VCVTA1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> [[VCVTA_I]]) #2
   2903 // CHECK:   ret <4 x i32> [[VCVTA1_I]]
   2904 uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
   2905   return vcvtaq_u32_f32(a);
   2906 }
   2907 
   2908 // CHECK-LABEL: define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) #0 {
   2909 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2910 // CHECK:   [[VCVTA_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2911 // CHECK:   [[VCVTA1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> [[VCVTA_I]]) #2
   2912 // CHECK:   ret <2 x i64> [[VCVTA1_I]]
   2913 uint64x2_t test_vcvtaq_u64_f64(float64x2_t a) {
   2914   return vcvtaq_u64_f64(a);
   2915 }
   2916 
   2917 // CHECK-LABEL: define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 {
   2918 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2919 // CHECK:   [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2920 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> [[VRSQRTE_V_I]]) #2
   2921 // CHECK:   ret <2 x float> [[VRSQRTE_V1_I]]
   2922 float32x2_t test_vrsqrte_f32(float32x2_t a) {
   2923   return vrsqrte_f32(a);
   2924 }
   2925 
   2926 // CHECK-LABEL: define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 {
   2927 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2928 // CHECK:   [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2929 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> [[VRSQRTEQ_V_I]]) #2
   2930 // CHECK:   ret <4 x float> [[VRSQRTEQ_V1_I]]
   2931 float32x4_t test_vrsqrteq_f32(float32x4_t a) {
   2932   return vrsqrteq_f32(a);
   2933 }
   2934 
   2935 // CHECK-LABEL: define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 {
   2936 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2937 // CHECK:   [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2938 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> [[VRSQRTEQ_V_I]]) #2
   2939 // CHECK:   ret <2 x double> [[VRSQRTEQ_V1_I]]
   2940 float64x2_t test_vrsqrteq_f64(float64x2_t a) {
   2941   return vrsqrteq_f64(a);
   2942 }
   2943 
   2944 // CHECK-LABEL: define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
   2945 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2946 // CHECK:   [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2947 // CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> [[VRECPE_V_I]]) #2
   2948 // CHECK:   ret <2 x float> [[VRECPE_V1_I]]
   2949 float32x2_t test_vrecpe_f32(float32x2_t a) {
   2950   return vrecpe_f32(a);
   2951 }
   2952 
   2953 // CHECK-LABEL: define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
   2954 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   2955 // CHECK:   [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   2956 // CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> [[VRECPEQ_V_I]]) #2
   2957 // CHECK:   ret <4 x float> [[VRECPEQ_V1_I]]
   2958 float32x4_t test_vrecpeq_f32(float32x4_t a) {
   2959   return vrecpeq_f32(a);
   2960 }
   2961 
   2962 // CHECK-LABEL: define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 {
   2963 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   2964 // CHECK:   [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   2965 // CHECK:   [[VRECPEQ_V1_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> [[VRECPEQ_V_I]]) #2
   2966 // CHECK:   ret <2 x double> [[VRECPEQ_V1_I]]
   2967 float64x2_t test_vrecpeq_f64(float64x2_t a) {
   2968   return vrecpeq_f64(a);
   2969 }
   2970 
   2971 // CHECK-LABEL: define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
   2972 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   2973 // CHECK:   [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   2974 // CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> [[VRECPE_V_I]]) #2
   2975 // CHECK:   ret <2 x i32> [[VRECPE_V1_I]]
   2976 uint32x2_t test_vrecpe_u32(uint32x2_t a) {
   2977   return vrecpe_u32(a);
   2978 }
   2979 
   2980 // CHECK-LABEL: define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
   2981 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   2982 // CHECK:   [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   2983 // CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]]) #2
   2984 // CHECK:   ret <4 x i32> [[VRECPEQ_V1_I]]
   2985 uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
   2986   return vrecpeq_u32(a);
   2987 }
   2988 
   2989 // CHECK-LABEL: define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 {
   2990 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
   2991 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
   2992 // CHECK:   [[VSQRT_I:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[TMP1]]) #2
   2993 // CHECK:   ret <2 x float> [[VSQRT_I]]
   2994 float32x2_t test_vsqrt_f32(float32x2_t a) {
   2995   return vsqrt_f32(a);
   2996 }
   2997 
   2998 // CHECK-LABEL: define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 {
   2999 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
   3000 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
   3001 // CHECK:   [[VSQRT_I:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]]) #2
   3002 // CHECK:   ret <4 x float> [[VSQRT_I]]
   3003 float32x4_t test_vsqrtq_f32(float32x4_t a) {
   3004   return vsqrtq_f32(a);
   3005 }
   3006 
   3007 // CHECK-LABEL: define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 {
   3008 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
   3009 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
   3010 // CHECK:   [[VSQRT_I:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]]) #2
   3011 // CHECK:   ret <2 x double> [[VSQRT_I]]
   3012 float64x2_t test_vsqrtq_f64(float64x2_t a) {
   3013   return vsqrtq_f64(a);
   3014 }
   3015 
   3016 // CHECK-LABEL: define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
   3017 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   3018 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   3019 // CHECK:   [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
   3020 // CHECK:   ret <2 x float> [[VCVT_I]]
   3021 float32x2_t test_vcvt_f32_s32(int32x2_t a) {
   3022   return vcvt_f32_s32(a);
   3023 }
   3024 
   3025 // CHECK-LABEL: define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
   3026 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
   3027 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
   3028 // CHECK:   [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float>
   3029 // CHECK:   ret <2 x float> [[VCVT_I]]
   3030 float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
   3031   return vcvt_f32_u32(a);
   3032 }
   3033 
   3034 // CHECK-LABEL: define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
   3035 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   3036 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   3037 // CHECK:   [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float>
   3038 // CHECK:   ret <4 x float> [[VCVT_I]]
   3039 float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
   3040   return vcvtq_f32_s32(a);
   3041 }
   3042 
   3043 // CHECK-LABEL: define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
   3044 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
   3045 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
   3046 // CHECK:   [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float>
   3047 // CHECK:   ret <4 x float> [[VCVT_I]]
   3048 float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
   3049   return vcvtq_f32_u32(a);
   3050 }
   3051 
   3052 // CHECK-LABEL: define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 {
   3053 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   3054 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   3055 // CHECK:   [[VCVT_I:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
   3056 // CHECK:   ret <2 x double> [[VCVT_I]]
   3057 float64x2_t test_vcvtq_f64_s64(int64x2_t a) {
   3058   return vcvtq_f64_s64(a);
   3059 }
   3060 
   3061 // CHECK-LABEL: define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 {
   3062 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
   3063 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
   3064 // CHECK:   [[VCVT_I:%.*]] = uitofp <2 x i64> [[TMP1]] to <2 x double>
   3065 // CHECK:   ret <2 x double> [[VCVT_I]]
   3066 float64x2_t test_vcvtq_f64_u64(uint64x2_t a) {
   3067   return vcvtq_f64_u64(a);
   3068 }
   3069