Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
      2 // RUN:   -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
      3 
      4 // Test new aarch64 intrinsics and types
      5 
      6 #include <arm_neon.h>
      7 
      8 // CHECK-LABEL: define i16 @test_vaddlv_s8(<8 x i8> %a) #0 {
      9 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #2
     10 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
     11 // CHECK:   ret i16 [[TMP0]]
     12 int16_t test_vaddlv_s8(int8x8_t a) {
     13   return vaddlv_s8(a);
     14 }
     15 
     16 // CHECK-LABEL: define i32 @test_vaddlv_s16(<4 x i16> %a) #0 {
     17 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
     18 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
     19 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> [[TMP1]]) #2
     20 // CHECK:   ret i32 [[VADDLV_I]]
     21 int32_t test_vaddlv_s16(int16x4_t a) {
     22   return vaddlv_s16(a);
     23 }
     24 
     25 // CHECK-LABEL: define i16 @test_vaddlv_u8(<8 x i8> %a) #0 {
     26 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #2
     27 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
     28 // CHECK:   ret i16 [[TMP0]]
     29 uint16_t test_vaddlv_u8(uint8x8_t a) {
     30   return vaddlv_u8(a);
     31 }
     32 
     33 // CHECK-LABEL: define i32 @test_vaddlv_u16(<4 x i16> %a) #0 {
     34 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
     35 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
     36 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> [[TMP1]]) #2
     37 // CHECK:   ret i32 [[VADDLV_I]]
     38 uint32_t test_vaddlv_u16(uint16x4_t a) {
     39   return vaddlv_u16(a);
     40 }
     41 
     42 // CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #0 {
     43 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #2
     44 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
     45 // CHECK:   ret i16 [[TMP0]]
     46 int16_t test_vaddlvq_s8(int8x16_t a) {
     47   return vaddlvq_s8(a);
     48 }
     49 
     50 // CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #0 {
     51 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
     52 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
     53 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> [[TMP1]]) #2
     54 // CHECK:   ret i32 [[VADDLV_I]]
     55 int32_t test_vaddlvq_s16(int16x8_t a) {
     56   return vaddlvq_s16(a);
     57 }
     58 
     59 // CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #0 {
     60 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
     61 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
     62 // CHECK:   [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> [[TMP1]]) #2
     63 // CHECK:   ret i64 [[VADDLVQ_S32_I]]
     64 int64_t test_vaddlvq_s32(int32x4_t a) {
     65   return vaddlvq_s32(a);
     66 }
     67 
     68 // CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #0 {
     69 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #2
     70 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
     71 // CHECK:   ret i16 [[TMP0]]
     72 uint16_t test_vaddlvq_u8(uint8x16_t a) {
     73   return vaddlvq_u8(a);
     74 }
     75 
     76 // CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #0 {
     77 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
     78 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
     79 // CHECK:   [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> [[TMP1]]) #2
     80 // CHECK:   ret i32 [[VADDLV_I]]
     81 uint32_t test_vaddlvq_u16(uint16x8_t a) {
     82   return vaddlvq_u16(a);
     83 }
     84 
     85 // CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #0 {
     86 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
     87 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
     88 // CHECK:   [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> [[TMP1]]) #2
     89 // CHECK:   ret i64 [[VADDLVQ_U32_I]]
     90 uint64_t test_vaddlvq_u32(uint32x4_t a) {
     91   return vaddlvq_u32(a);
     92 }
     93 
     94 // CHECK-LABEL: define i8 @test_vmaxv_s8(<8 x i8> %a) #0 {
     95 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #2
     96 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
     97 // CHECK:   ret i8 [[TMP0]]
     98 int8_t test_vmaxv_s8(int8x8_t a) {
     99   return vmaxv_s8(a);
    100 }
    101 
    102 // CHECK-LABEL: define i16 @test_vmaxv_s16(<4 x i16> %a) #0 {
    103 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    104 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    105 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[TMP1]]) #2
    106 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
    107 // CHECK:   ret i16 [[TMP2]]
    108 int16_t test_vmaxv_s16(int16x4_t a) {
    109   return vmaxv_s16(a);
    110 }
    111 
    112 // CHECK-LABEL: define i8 @test_vmaxv_u8(<8 x i8> %a) #0 {
    113 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #2
    114 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
    115 // CHECK:   ret i8 [[TMP0]]
    116 uint8_t test_vmaxv_u8(uint8x8_t a) {
    117   return vmaxv_u8(a);
    118 }
    119 
    120 // CHECK-LABEL: define i16 @test_vmaxv_u16(<4 x i16> %a) #0 {
    121 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    122 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    123 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> [[TMP1]]) #2
    124 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
    125 // CHECK:   ret i16 [[TMP2]]
    126 uint16_t test_vmaxv_u16(uint16x4_t a) {
    127   return vmaxv_u16(a);
    128 }
    129 
    130 // CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #0 {
    131 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #2
    132 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
    133 // CHECK:   ret i8 [[TMP0]]
    134 int8_t test_vmaxvq_s8(int8x16_t a) {
    135   return vmaxvq_s8(a);
    136 }
    137 
    138 // CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #0 {
    139 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    140 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    141 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[TMP1]]) #2
    142 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
    143 // CHECK:   ret i16 [[TMP2]]
    144 int16_t test_vmaxvq_s16(int16x8_t a) {
    145   return vmaxvq_s16(a);
    146 }
    147 
    148 // CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #0 {
    149 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    150 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    151 // CHECK:   [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[TMP1]]) #2
    152 // CHECK:   ret i32 [[VMAXVQ_S32_I]]
    153 int32_t test_vmaxvq_s32(int32x4_t a) {
    154   return vmaxvq_s32(a);
    155 }
    156 
    157 // CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #0 {
    158 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #2
    159 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
    160 // CHECK:   ret i8 [[TMP0]]
    161 uint8_t test_vmaxvq_u8(uint8x16_t a) {
    162   return vmaxvq_u8(a);
    163 }
    164 
    165 // CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #0 {
    166 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    167 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    168 // CHECK:   [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> [[TMP1]]) #2
    169 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
    170 // CHECK:   ret i16 [[TMP2]]
    171 uint16_t test_vmaxvq_u16(uint16x8_t a) {
    172   return vmaxvq_u16(a);
    173 }
    174 
    175 // CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #0 {
    176 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    177 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    178 // CHECK:   [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> [[TMP1]]) #2
    179 // CHECK:   ret i32 [[VMAXVQ_U32_I]]
    180 uint32_t test_vmaxvq_u32(uint32x4_t a) {
    181   return vmaxvq_u32(a);
    182 }
    183 
    184 // CHECK-LABEL: define i8 @test_vminv_s8(<8 x i8> %a) #0 {
    185 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #2
    186 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
    187 // CHECK:   ret i8 [[TMP0]]
    188 int8_t test_vminv_s8(int8x8_t a) {
    189   return vminv_s8(a);
    190 }
    191 
    192 // CHECK-LABEL: define i16 @test_vminv_s16(<4 x i16> %a) #0 {
    193 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    194 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    195 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> [[TMP1]]) #2
    196 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
    197 // CHECK:   ret i16 [[TMP2]]
    198 int16_t test_vminv_s16(int16x4_t a) {
    199   return vminv_s16(a);
    200 }
    201 
    202 // CHECK-LABEL: define i8 @test_vminv_u8(<8 x i8> %a) #0 {
    203 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #2
    204 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
    205 // CHECK:   ret i8 [[TMP0]]
    206 uint8_t test_vminv_u8(uint8x8_t a) {
    207   return vminv_u8(a);
    208 }
    209 
    210 // CHECK-LABEL: define i16 @test_vminv_u16(<4 x i16> %a) #0 {
    211 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    212 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    213 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> [[TMP1]]) #2
    214 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
    215 // CHECK:   ret i16 [[TMP2]]
    216 uint16_t test_vminv_u16(uint16x4_t a) {
    217   return vminv_u16(a);
    218 }
    219 
    220 // CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #0 {
    221 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #2
    222 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
    223 // CHECK:   ret i8 [[TMP0]]
    224 int8_t test_vminvq_s8(int8x16_t a) {
    225   return vminvq_s8(a);
    226 }
    227 
    228 // CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #0 {
    229 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    230 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    231 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> [[TMP1]]) #2
    232 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
    233 // CHECK:   ret i16 [[TMP2]]
    234 int16_t test_vminvq_s16(int16x8_t a) {
    235   return vminvq_s16(a);
    236 }
    237 
    238 // CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #0 {
    239 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    240 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    241 // CHECK:   [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> [[TMP1]]) #2
    242 // CHECK:   ret i32 [[VMINVQ_S32_I]]
    243 int32_t test_vminvq_s32(int32x4_t a) {
    244   return vminvq_s32(a);
    245 }
    246 
    247 // CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #0 {
    248 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #2
    249 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
    250 // CHECK:   ret i8 [[TMP0]]
    251 uint8_t test_vminvq_u8(uint8x16_t a) {
    252   return vminvq_u8(a);
    253 }
    254 
    255 // CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #0 {
    256 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    257 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    258 // CHECK:   [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> [[TMP1]]) #2
    259 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
    260 // CHECK:   ret i16 [[TMP2]]
    261 uint16_t test_vminvq_u16(uint16x8_t a) {
    262   return vminvq_u16(a);
    263 }
    264 
    265 // CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #0 {
    266 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    267 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    268 // CHECK:   [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> [[TMP1]]) #2
    269 // CHECK:   ret i32 [[VMINVQ_U32_I]]
    270 uint32_t test_vminvq_u32(uint32x4_t a) {
    271   return vminvq_u32(a);
    272 }
    273 
    274 // CHECK-LABEL: define i8 @test_vaddv_s8(<8 x i8> %a) #0 {
    275 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #2
    276 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
    277 // CHECK:   ret i8 [[TMP0]]
    278 int8_t test_vaddv_s8(int8x8_t a) {
    279   return vaddv_s8(a);
    280 }
    281 
    282 // CHECK-LABEL: define i16 @test_vaddv_s16(<4 x i16> %a) #0 {
    283 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    284 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    285 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[TMP1]]) #2
    286 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
    287 // CHECK:   ret i16 [[TMP2]]
    288 int16_t test_vaddv_s16(int16x4_t a) {
    289   return vaddv_s16(a);
    290 }
    291 
    292 // CHECK-LABEL: define i8 @test_vaddv_u8(<8 x i8> %a) #0 {
    293 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #2
    294 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
    295 // CHECK:   ret i8 [[TMP0]]
    296 uint8_t test_vaddv_u8(uint8x8_t a) {
    297   return vaddv_u8(a);
    298 }
    299 
    300 // CHECK-LABEL: define i16 @test_vaddv_u16(<4 x i16> %a) #0 {
    301 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
    302 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
    303 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[TMP1]]) #2
    304 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
    305 // CHECK:   ret i16 [[TMP2]]
    306 uint16_t test_vaddv_u16(uint16x4_t a) {
    307   return vaddv_u16(a);
    308 }
    309 
    310 // CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #0 {
    311 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #2
    312 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
    313 // CHECK:   ret i8 [[TMP0]]
    314 int8_t test_vaddvq_s8(int8x16_t a) {
    315   return vaddvq_s8(a);
    316 }
    317 
    318 // CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #0 {
    319 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    320 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    321 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> [[TMP1]]) #2
    322 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
    323 // CHECK:   ret i16 [[TMP2]]
    324 int16_t test_vaddvq_s16(int16x8_t a) {
    325   return vaddvq_s16(a);
    326 }
    327 
    328 // CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #0 {
    329 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    330 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    331 // CHECK:   [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[TMP1]]) #2
    332 // CHECK:   ret i32 [[VADDVQ_S32_I]]
    333 int32_t test_vaddvq_s32(int32x4_t a) {
    334   return vaddvq_s32(a);
    335 }
    336 
    337 // CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #0 {
    338 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #2
    339 // CHECK:   [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
    340 // CHECK:   ret i8 [[TMP0]]
    341 uint8_t test_vaddvq_u8(uint8x16_t a) {
    342   return vaddvq_u8(a);
    343 }
    344 
    345 // CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #0 {
    346 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
    347 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
    348 // CHECK:   [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> [[TMP1]]) #2
    349 // CHECK:   [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
    350 // CHECK:   ret i16 [[TMP2]]
    351 uint16_t test_vaddvq_u16(uint16x8_t a) {
    352   return vaddvq_u16(a);
    353 }
    354 
    355 // CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #0 {
    356 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
    357 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
    358 // CHECK:   [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> [[TMP1]]) #2
    359 // CHECK:   ret i32 [[VADDVQ_U32_I]]
    360 uint32_t test_vaddvq_u32(uint32x4_t a) {
    361   return vaddvq_u32(a);
    362 }
    363 
    364 // CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #0 {
    365 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    366 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    367 // CHECK:   [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> [[TMP1]]) #2
    368 // CHECK:   ret float [[VMAXVQ_F32_I]]
    369 float32_t test_vmaxvq_f32(float32x4_t a) {
    370   return vmaxvq_f32(a);
    371 }
    372 
    373 // CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #0 {
    374 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    375 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    376 // CHECK:   [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> [[TMP1]]) #2
    377 // CHECK:   ret float [[VMINVQ_F32_I]]
    378 float32_t test_vminvq_f32(float32x4_t a) {
    379   return vminvq_f32(a);
    380 }
    381 
    382 // CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #0 {
    383 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    384 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    385 // CHECK:   [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> [[TMP1]]) #2
    386 // CHECK:   ret float [[VMAXNMVQ_F32_I]]
    387 float32_t test_vmaxnmvq_f32(float32x4_t a) {
    388   return vmaxnmvq_f32(a);
    389 }
    390 
    391 // CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #0 {
    392 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
    393 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
    394 // CHECK:   [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> [[TMP1]]) #2
    395 // CHECK:   ret float [[VMINNMVQ_F32_I]]
    396 float32_t test_vminnmvq_f32(float32x4_t a) {
    397   return vminnmvq_f32(a);
    398 }
    399