Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT
      2 ; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD
      3 
      4 ; CHECK-LABEL: test_i64_f64:
      5 declare i64 @test_i64_f64_helper(double %p)
      6 define void @test_i64_f64(double* %p, i64* %q) {
      7 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
      8 ; SOFT: vmov r1, r0, [[REG]]
      9 ; HARD: vadd.f64 d0
     10     %1 = load double* %p
     11     %2 = fadd double %1, %1
     12     %3 = call i64 @test_i64_f64_helper(double %2)
     13     %4 = add i64 %3, %3
     14     store i64 %4, i64* %q
     15     ret void
     16 ; CHECK: adds r1
     17 ; CHECK: adc r0
     18 }
     19 
     20 ; CHECK-LABEL: test_i64_v1i64:
     21 declare i64 @test_i64_v1i64_helper(<1 x i64> %p)
     22 define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
     23 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
     24 ; SOFT: vmov r1, r0, [[REG]]
     25 ; HARD: vadd.i64 d0
     26     %1 = load <1 x i64>* %p
     27     %2 = add <1 x i64> %1, %1
     28     %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
     29     %4 = add i64 %3, %3
     30     store i64 %4, i64* %q
     31     ret void
     32 ; CHECK: adds r1
     33 ; CHECK: adc r0
     34 }
     35 
     36 ; CHECK-LABEL: test_i64_v2f32:
     37 declare i64 @test_i64_v2f32_helper(<2 x float> %p)
     38 define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
     39 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
     40 ; SOFT: vmov r1, r0, [[REG]]
     41 ; HARD: vrev64.32 d0
     42     %1 = load <2 x float>* %p
     43     %2 = fadd <2 x float> %1, %1
     44     %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
     45     %4 = add i64 %3, %3
     46     store i64 %4, i64* %q
     47     ret void
     48 ; CHECK: adds r1
     49 ; CHECK: adc r0
     50 }
     51 
     52 ; CHECK-LABEL: test_i64_v2i32:
     53 declare i64 @test_i64_v2i32_helper(<2 x i32> %p)
     54 define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
     55 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
     56 ; SOFT: vmov r1, r0, [[REG]]
     57 ; HARD: vrev64.32 d0
     58     %1 = load <2 x i32>* %p
     59     %2 = add <2 x i32> %1, %1
     60     %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
     61     %4 = add i64 %3, %3
     62     store i64 %4, i64* %q
     63     ret void
     64 ; CHECK: adds r1
     65 ; CHECK: adc r0
     66 }
     67 
     68 ; CHECK-LABEL: test_i64_v4i16:
     69 declare i64 @test_i64_v4i16_helper(<4 x i16> %p)
     70 define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
     71 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
     72 ; SOFT: vmov r1, r0, [[REG]]
     73 ; HARD: vrev64.16 d0
     74     %1 = load <4 x i16>* %p
     75     %2 = add <4 x i16> %1, %1
     76     %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
     77     %4 = add i64 %3, %3
     78     store i64 %4, i64* %q
     79     ret void
     80 ; CHECK: adds r1
     81 ; CHECK: adc r0
     82 }
     83 
     84 ; CHECK-LABEL: test_i64_v8i8:
     85 declare i64 @test_i64_v8i8_helper(<8 x i8> %p)
     86 define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
     87 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
     88 ; SOFT: vmov r1, r0, [[REG]]
     89 ; HARD: vrev64.8 d0
     90     %1 = load <8 x i8>* %p
     91     %2 = add <8 x i8> %1, %1
     92     %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
     93     %4 = add i64 %3, %3
     94     store i64 %4, i64* %q
     95     ret void
     96 ; CHECK: adds r1
     97 ; CHECK: adc r0
     98 }
     99 
    100 ; CHECK-LABEL: test_f64_i64:
    101 declare double @test_f64_i64_helper(i64 %p)
    102 define void @test_f64_i64(i64* %p, double* %q) {
    103 ; CHECK: adds r1
    104 ; CHECK: adc r0
    105     %1 = load i64* %p
    106     %2 = add i64 %1, %1
    107     %3 = call double @test_f64_i64_helper(i64 %2)
    108     %4 = fadd double %3, %3
    109     store double %4, double* %q
    110     ret void
    111 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    112 ; SOFT: vadd.f64 [[REG]]
    113 ; HARD: vadd.f64 {{d[0-9]+}}, d0
    114 }
    115 
    116 ; CHECK-LABEL: test_f64_v1i64:
    117 declare double @test_f64_v1i64_helper(<1 x i64> %p)
    118 define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
    119 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
    120 ; SOFT: vmov r1, r0, [[REG]]
    121 ; HARD: vadd.i64 d0
    122     %1 = load <1 x i64>* %p
    123     %2 = add <1 x i64> %1, %1
    124     %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
    125     %4 = fadd double %3, %3
    126     store double %4, double* %q
    127     ret void
    128 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    129 ; SOFT: vadd.f64 [[REG]]
    130 ; HARD: vadd.f64 {{d[0-9]+}}, d0
    131 }
    132 
    133 ; CHECK-LABEL: test_f64_v2f32:
    134 declare double @test_f64_v2f32_helper(<2 x float> %p)
    135 define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
    136 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
    137 ; SOFT: vmov r1, r0, [[REG]]
    138 ; HARD: vrev64.32 d0
    139     %1 = load <2 x float>* %p
    140     %2 = fadd <2 x float> %1, %1
    141     %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
    142     %4 = fadd double %3, %3
    143     store double %4, double* %q
    144     ret void
    145 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    146 ; SOFT: vadd.f64 [[REG]]
    147 ; HARD: vadd.f64 {{d[0-9]+}}, d0
    148 }
    149 
    150 ; CHECK-LABEL: test_f64_v2i32:
    151 declare double @test_f64_v2i32_helper(<2 x i32> %p)
    152 define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
    153 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
    154 ; SOFT: vmov r1, r0, [[REG]]
    155 ; HARD: vrev64.32 d0
    156     %1 = load <2 x i32>* %p
    157     %2 = add <2 x i32> %1, %1
    158     %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
    159     %4 = fadd double %3, %3
    160     store double %4, double* %q
    161     ret void
    162 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    163 ; SOFT: vadd.f64 [[REG]]
    164 ; HARD: vadd.f64 {{d[0-9]+}}, d0
    165 }
    166 
    167 ; CHECK-LABEL: test_f64_v4i16:
    168 declare double @test_f64_v4i16_helper(<4 x i16> %p)
    169 define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
    170 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
    171 ; SOFT: vmov r1, r0, [[REG]]
    172 ; HARD: vrev64.16 d0
    173     %1 = load <4 x i16>* %p
    174     %2 = add <4 x i16> %1, %1
    175     %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
    176     %4 = fadd double %3, %3
    177     store double %4, double* %q
    178     ret void
    179 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    180 ; SOFT: vadd.f64 [[REG]]
    181 ; HARD: vadd.f64 {{d[0-9]+}}, d0
    182 }
    183 
    184 ; CHECK-LABEL: test_f64_v8i8:
    185 declare double @test_f64_v8i8_helper(<8 x i8> %p)
    186 define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
    187 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
    188 ; SOFT: vmov r1, r0, [[REG]]
    189 ; HARD: vrev64.8 d0
    190     %1 = load <8 x i8>* %p
    191     %2 = add <8 x i8> %1, %1
    192     %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
    193     %4 = fadd double %3, %3
    194     store double %4, double* %q
    195     ret void
    196 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    197 ; SOFT: vadd.f64 [[REG]]
    198 ; HARD: vadd.f64 {{d[0-9]+}}, d0
    199 }
    200 
    201 ; CHECK-LABEL: test_v1i64_i64:
    202 declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
    203 define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
    204 ; CHECK: adds r1
    205 ; CHECK: adc r0
    206     %1 = load i64* %p
    207     %2 = add i64 %1, %1
    208     %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
    209     %4 = add <1 x i64> %3, %3
    210     store <1 x i64> %4, <1 x i64>* %q
    211     ret void
    212 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    213 ; SOFT: vadd.i64 [[REG]]
    214 ; HARD: vadd.i64 {{d[0-9]+}}, d0
    215 }
    216 
    217 ; CHECK-LABEL: test_v1i64_f64:
    218 declare <1 x i64> @test_v1i64_f64_helper(double %p)
    219 define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
    220 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
    221 ; SOFT: vmov r1, r0, [[REG]]
    222 ; HARD: vadd.f64 d0
    223     %1 = load double* %p
    224     %2 = fadd double %1, %1
    225     %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
    226     %4 = add <1 x i64> %3, %3
    227     store <1 x i64> %4, <1 x i64>* %q
    228     ret void
    229 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    230 ; SOFT: vadd.i64 [[REG]]
    231 ; HARD: vadd.i64 {{d[0-9]+}}, d0
    232 }
    233 
    234 ; CHECK-LABEL: test_v1i64_v2f32:
    235 declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p)
    236 define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
    237 ; HARD: vrev64.32 d0
    238 ; SOFT: vadd.f32 [[REG:d[0-9]+]]
    239 ; SOFT: vmov r1, r0, [[REG]]
    240     %1 = load <2 x float>* %p
    241     %2 = fadd <2 x float> %1, %1
    242     %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
    243     %4 = add <1 x i64> %3, %3
    244     store <1 x i64> %4, <1 x i64>* %q
    245     ret void
    246 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    247 ; SOFT: vadd.i64 [[REG]]
    248 ; HARD: vadd.i64 {{d[0-9]+}}, d0
    249 }
    250 
    251 ; CHECK-LABEL: test_v1i64_v2i32:
    252 declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p)
    253 define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
    254 ; HARD: vrev64.32 d0
    255 ; SOFT: vadd.i32 [[REG:d[0-9]+]]
    256 ; SOFT: vrev64.32 [[REG]]
    257 ; SOFT: vmov r1, r0, [[REG]]
    258     %1 = load <2 x i32>* %p
    259     %2 = add <2 x i32> %1, %1
    260     %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
    261     %4 = add <1 x i64> %3, %3
    262     store <1 x i64> %4, <1 x i64>* %q
    263     ret void
    264 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    265 ; SOFT: vadd.i64 [[REG]]
    266 ; HARD: vadd.i64 {{d[0-9]+}}, d0
    267 }
    268 
    269 ; CHECK-LABEL: test_v1i64_v4i16:
    270 declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p)
    271 define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
    272 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
    273 ; SOFT: vmov r1, r0, [[REG]]
    274 ; HARD: vrev64.16 d0
    275     %1 = load <4 x i16>* %p
    276     %2 = add <4 x i16> %1, %1
    277     %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
    278     %4 = add <1 x i64> %3, %3
    279     store <1 x i64> %4, <1 x i64>* %q
    280     ret void
    281 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    282 ; SOFT: vadd.i64 [[REG]]
    283 ; HARD: vadd.i64 {{d[0-9]+}}, d0
    284 }
    285 
    286 ; CHECK-LABEL: test_v1i64_v8i8:
    287 declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p)
    288 define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
    289 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
    290 ; SOFT: vmov r1, r0, [[REG]]
    291 ; HARD: vrev64.8 d0
    292     %1 = load <8 x i8>* %p
    293     %2 = add <8 x i8> %1, %1
    294     %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
    295     %4 = add <1 x i64> %3, %3
    296     store <1 x i64> %4, <1 x i64>* %q
    297     ret void
    298 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    299 ; SOFT: vadd.i64 [[REG]]
    300 ; HARD: vadd.i64 {{d[0-9]+}}, d0
    301 }
    302 
    303 ; CHECK-LABEL: test_v2f32_i64:
    304 declare <2 x float> @test_v2f32_i64_helper(i64 %p)
    305 define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
    306 ; CHECK: adds r1
    307 ; CHECK: adc r0
    308     %1 = load i64* %p
    309     %2 = add i64 %1, %1
    310     %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
    311     %4 = fadd <2 x float> %3, %3
    312     store <2 x float> %4, <2 x float>* %q
    313     ret void
    314 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    315 ; SOFT: vrev64.32 [[REG]]
    316 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    317 }
    318 
    319 ; CHECK-LABEL: test_v2f32_f64:
    320 declare <2 x float> @test_v2f32_f64_helper(double %p)
    321 define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
    322 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
    323 ; SOFT: vmov r1, r0, [[REG]]
    324 ; HARD: vadd.f64 d0
    325     %1 = load double* %p
    326     %2 = fadd double %1, %1
    327     %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
    328     %4 = fadd <2 x float> %3, %3
    329     store <2 x float> %4, <2 x float>* %q
    330     ret void
    331 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    332 ; SOFT: vrev64.32 [[REG]]
    333 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    334 }
    335 
    336 ; CHECK-LABEL: test_v2f32_v1i64:
    337 declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p)
    338 define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
    339 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
    340 ; SOFT: vmov r1, r0, [[REG]]
    341 ; HARD: vadd.i64 d0
    342     %1 = load <1 x i64>* %p
    343     %2 = add <1 x i64> %1, %1
    344     %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
    345     %4 = fadd <2 x float> %3, %3
    346     store <2 x float> %4, <2 x float>* %q
    347     ret void
    348 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    349 ; SOFT: vrev64.32 [[REG]]
    350 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    351 }
    352 
    353 ; CHECK-LABEL: test_v2f32_v2i32:
    354 declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p)
    355 define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
    356 ; HARD: vrev64.32 d0
    357 ; SOFT: vadd.i32 [[REG:d[0-9]+]]
    358 ; SOFT: vrev64.32 [[REG]]
    359 ; SOFT: vmov r1, r0, [[REG]]
    360     %1 = load <2 x i32>* %p
    361     %2 = add <2 x i32> %1, %1
    362     %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
    363     %4 = fadd <2 x float> %3, %3
    364     store <2 x float> %4, <2 x float>* %q
    365     ret void
    366 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    367 ; SOFT: vrev64.32 [[REG]]
    368 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    369 }
    370 
    371 ; CHECK-LABEL: test_v2f32_v4i16:
    372 declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p)
    373 define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
    374 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
    375 ; SOFT: vmov r1, r0, [[REG]]
    376 ; HARD: vrev64.16 d0
    377     %1 = load <4 x i16>* %p
    378     %2 = add <4 x i16> %1, %1
    379     %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
    380     %4 = fadd <2 x float> %3, %3
    381     store <2 x float> %4, <2 x float>* %q
    382     ret void
    383 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    384 ; SOFT: vrev64.32 [[REG]]
    385 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    386 }
    387 
    388 ; CHECK-LABEL: test_v2f32_v8i8:
    389 declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p)
    390 define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
    391 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
    392 ; SOFT: vmov r1, r0, [[REG]]
    393 ; HARD: vrev64.8 d0
    394     %1 = load <8 x i8>* %p
    395     %2 = add <8 x i8> %1, %1
    396     %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
    397     %4 = fadd <2 x float> %3, %3
    398     store <2 x float> %4, <2 x float>* %q
    399     ret void
    400 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    401 ; SOFT: vrev64.32 [[REG]]
    402 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    403 }
    404 
    405 ; CHECK-LABEL: test_v2i32_i64:
    406 declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
    407 define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
    408 ; CHECK: adds r1
    409 ; CHECK: adc r0
    410     %1 = load i64* %p
    411     %2 = add i64 %1, %1
    412     %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
    413     %4 = add <2 x i32> %3, %3
    414     store <2 x i32> %4, <2 x i32>* %q
    415     ret void
    416 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    417 ; SOFT: vrev64.32 [[REG]]
    418 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    419 }
    420 
    421 ; CHECK-LABEL: test_v2i32_f64:
    422 declare <2 x i32> @test_v2i32_f64_helper(double %p)
    423 define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
    424 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
    425 ; SOFT: vmov r1, r0, [[REG]]
    426 ; HARD: vadd.f64 d0
    427     %1 = load double* %p
    428     %2 = fadd double %1, %1
    429     %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
    430     %4 = add <2 x i32> %3, %3
    431     store <2 x i32> %4, <2 x i32>* %q
    432     ret void
    433 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    434 ; SOFT: vrev64.32 [[REG]]
    435 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    436 }
    437 
    438 ; CHECK-LABEL: test_v2i32_v1i64:
    439 declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p)
    440 define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
    441 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
    442 ; SOFT: vmov r1, r0, [[REG]]
    443 ; HARD: vadd.i64 d0
    444     %1 = load <1 x i64>* %p
    445     %2 = add <1 x i64> %1, %1
    446     %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
    447     %4 = add <2 x i32> %3, %3
    448     store <2 x i32> %4, <2 x i32>* %q
    449     ret void
    450 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    451 ; SOFT: vrev64.32 [[REG]]
    452 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    453 }
    454 
    455 ; CHECK-LABEL: test_v2i32_v2f32:
    456 declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p)
    457 define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
    458 ; HARD: vadd.f32 [[REG:d[0-9]+]]
    459 ; HARD: vrev64.32 d0, [[REG]]
    460 ; SOFT: vadd.f32 [[REG:d[0-9]+]]
    461 ; SOFT: vrev64.32 [[REG]]
    462 ; SOFT: vmov r1, r0, [[REG]]
    463     %1 = load <2 x float>* %p
    464     %2 = fadd <2 x float> %1, %1
    465     %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
    466     %4 = add <2 x i32> %3, %3
    467     store <2 x i32> %4, <2 x i32>* %q
    468     ret void
    469 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    470 ; SOFT: vrev64.32 [[REG]]
    471 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    472 }
    473 
    474 ; CHECK-LABEL: test_v2i32_v4i16:
    475 declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p)
    476 define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
    477 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
    478 ; SOFT: vmov r1, r0, [[REG]]
    479 ; HARD: vrev64.16 d0
    480     %1 = load <4 x i16>* %p
    481     %2 = add <4 x i16> %1, %1
    482     %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
    483     %4 = add <2 x i32> %3, %3
    484     store <2 x i32> %4, <2 x i32>* %q
    485     ret void
    486 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    487 ; SOFT: vrev64.32 [[REG]]
    488 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    489 }
    490 
    491 ; CHECK-LABEL: test_v2i32_v8i8:
    492 declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p)
    493 define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
    494 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
    495 ; SOFT: vmov r1, r0, [[REG]]
    496 ; HARD: vrev64.8 d0
    497     %1 = load <8 x i8>* %p
    498     %2 = add <8 x i8> %1, %1
    499     %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
    500     %4 = add <2 x i32> %3, %3
    501     store <2 x i32> %4, <2 x i32>* %q
    502     ret void
    503 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    504 ; SOFT: vrev64.32 [[REG]]
    505 ; HARD: vrev64.32 {{d[0-9]+}}, d0
    506 }
    507 
    508 ; CHECK-LABEL: test_v4i16_i64:
    509 declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
    510 define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
    511 ; CHECK: adds r1
    512 ; CHECK: adc r0
    513     %1 = load i64* %p
    514     %2 = add i64 %1, %1
    515     %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
    516     %4 = add <4 x i16> %3, %3
    517     store <4 x i16> %4, <4 x i16>* %q
    518     ret void
    519 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    520 ; SOFT: vrev64.16 [[REG]]
    521 ; HARD: vrev64.16 {{d[0-9]+}}, d0
    522 }
    523 
    524 ; CHECK-LABEL: test_v4i16_f64:
    525 declare <4 x i16> @test_v4i16_f64_helper(double %p)
    526 define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
    527 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
    528 ; SOFT: vmov r1, r0, [[REG]]
    529 ; HARD: vadd.f64 d0
    530     %1 = load double* %p
    531     %2 = fadd double %1, %1
    532     %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
    533     %4 = add <4 x i16> %3, %3
    534     store <4 x i16> %4, <4 x i16>* %q
    535     ret void
    536 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    537 ; SOFT: vrev64.16 [[REG]]
    538 ; HARD: vrev64.16 {{d[0-9]+}}, d0
    539 }
    540 
    541 ; CHECK-LABEL: test_v4i16_v1i64:
    542 declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p)
    543 define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
    544 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
    545 ; SOFT: vmov r1, r0, [[REG]]
    546 ; HARD: vadd.i64 d0
    547     %1 = load <1 x i64>* %p
    548     %2 = add <1 x i64> %1, %1
    549     %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
    550     %4 = add <4 x i16> %3, %3
    551     store <4 x i16> %4, <4 x i16>* %q
    552     ret void
    553 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    554 ; SOFT: vrev64.16 [[REG]]
    555 ; HARD: vrev64.16 {{d[0-9]+}}, d0
    556 }
    557 
    558 ; CHECK-LABEL: test_v4i16_v2f32:
    559 declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p)
    560 define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
    561 ; HARD: vadd.f32 [[REG:d[0-9]+]]
    562 ; HARD: vrev64.32 d0, [[REG]]
    563 ; SOFT: vadd.f32 [[REG:d[0-9]+]]
    564 ; SOFT: vrev64.32 [[REG]]
    565 ; SOFT: vmov r1, r0, [[REG]]
    566     %1 = load <2 x float>* %p
    567     %2 = fadd <2 x float> %1, %1
    568     %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
    569     %4 = add <4 x i16> %3, %3
    570     store <4 x i16> %4, <4 x i16>* %q
    571     ret void
    572 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    573 ; SOFT: vrev64.16 [[REG]]
    574 ; HARD: vrev64.16 {{d[0-9]+}}, d0
    575 }
    576 
    577 ; CHECK-LABEL: test_v4i16_v2i32:
    578 declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p)
    579 define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
    580 ; HARD: vadd.i32 [[REG:d[0-9]+]]
    581 ; HARD: vrev64.32 d0, [[REG]]
    582 ; SOFT: vadd.i32 [[REG:d[0-9]+]]
    583 ; SOFT: vrev64.32 [[REG]]
    584 ; SOFT: vmov r1, r0, [[REG]]
    585     %1 = load <2 x i32>* %p
    586     %2 = add <2 x i32> %1, %1
    587     %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
    588     %4 = add <4 x i16> %3, %3
    589     store <4 x i16> %4, <4 x i16>* %q
    590     ret void
    591 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    592 ; SOFT: vrev64.16 [[REG]]
    593 ; HARD: vrev64.16 {{d[0-9]+}}, d0
    594 }
    595 
    596 ; CHECK-LABEL: test_v4i16_v8i8:
    597 declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p)
    598 define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
    599 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
    600 ; SOFT: vmov r1, r0, [[REG]]
    601 ; HARD: vrev64.8 d0
    602     %1 = load <8 x i8>* %p
    603     %2 = add <8 x i8> %1, %1
    604     %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
    605     %4 = add <4 x i16> %3, %3
    606     store <4 x i16> %4, <4 x i16>* %q
    607     ret void
    608 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    609 ; SOFT: vrev64.16 [[REG]]
    610 ; HARD: vrev64.16 {{d[0-9]+}}, d0
    611 }
    612 
    613 ; CHECK-LABEL: test_v8i8_i64:
    614 declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
    615 define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
    616 ; CHECK: adds r1
    617 ; CHECK: adc r0
    618     %1 = load i64* %p
    619     %2 = add i64 %1, %1
    620     %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
    621     %4 = add <8 x i8> %3, %3
    622     store <8 x i8> %4, <8 x i8>* %q
    623     ret void
    624 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    625 ; SOFT: vrev64.8 [[REG]]
    626 ; HARD: vrev64.8 {{d[0-9]+}}, d0
    627 }
    628 
    629 ; CHECK-LABEL: test_v8i8_f64:
    630 declare <8 x i8> @test_v8i8_f64_helper(double %p)
    631 define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
    632 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
    633 ; SOFT: vmov r1, r0, [[REG]]
    634 ; HARD: vadd.f64 d0
    635     %1 = load double* %p
    636     %2 = fadd double %1, %1
    637     %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
    638     %4 = add <8 x i8> %3, %3
    639     store <8 x i8> %4, <8 x i8>* %q
    640     ret void
    641 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    642 ; SOFT: vrev64.8 [[REG]]
    643 ; HARD: vrev64.8 {{d[0-9]+}}, d0
    644 }
    645 
    646 ; CHECK-LABEL: test_v8i8_v1i64:
    647 declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p)
    648 define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
    649 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
    650 ; SOFT: vmov r1, r0, [[REG]]
    651 ; HARD: vadd.i64 d0
    652     %1 = load <1 x i64>* %p
    653     %2 = add <1 x i64> %1, %1
    654     %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
    655     %4 = add <8 x i8> %3, %3
    656     store <8 x i8> %4, <8 x i8>* %q
    657     ret void
    658 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    659 ; SOFT: vrev64.8 [[REG]]
    660 ; HARD: vrev64.8 {{d[0-9]+}}, d0
    661 }
    662 
    663 ; CHECK-LABEL: test_v8i8_v2f32:
    664 declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p)
    665 define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
    666 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
    667 ; SOFT: vmov r1, r0, [[REG]]
    668 ; HARD: vrev64.32 d0
    669     %1 = load <2 x float>* %p
    670     %2 = fadd <2 x float> %1, %1
    671     %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
    672     %4 = add <8 x i8> %3, %3
    673     store <8 x i8> %4, <8 x i8>* %q
    674     ret void
    675 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    676 ; SOFT: vrev64.8 [[REG]]
    677 ; HARD: vrev64.8 {{d[0-9]+}}, d0
    678 }
    679 
    680 ; CHECK-LABEL: test_v8i8_v2i32:
    681 declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p)
    682 define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
    683 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
    684 ; SOFT: vmov r1, r0, [[REG]]
    685 ; HARD: vrev64.32 d0
    686     %1 = load <2 x i32>* %p
    687     %2 = add <2 x i32> %1, %1
    688     %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
    689     %4 = add <8 x i8> %3, %3
    690     store <8 x i8> %4, <8 x i8>* %q
    691     ret void
    692 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    693 ; SOFT: vrev64.8 [[REG]]
    694 ; HARD: vrev64.8 {{d[0-9]+}}, d0
    695 }
    696 
    697 ; CHECK-LABEL: test_v8i8_v4i16:
    698 declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p)
    699 define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
    700 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
    701 ; SOFT: vmov r1, r0, [[REG]]
    702 ; HARD: vrev64.16 d0
    703     %1 = load <4 x i16>* %p
    704     %2 = add <4 x i16> %1, %1
    705     %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
    706     %4 = add <8 x i8> %3, %3
    707     store <8 x i8> %4, <8 x i8>* %q
    708     ret void
    709 ; SOFT: vmov [[REG:d[0-9]+]], r1, r0
    710 ; SOFT: vrev64.8 [[REG]]
    711 ; HARD: vrev64.8 {{d[0-9]+}}, d0
    712 }
    713 
    714 ; CHECK-LABEL: test_f128_v2f64:
    715 declare fp128 @test_f128_v2f64_helper(<2 x double> %p)
    716 define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
    717 ; SOFT: vadd.f64 [[REG2:d[0-9]+]]
    718 ; SOFT: vadd.f64 [[REG1:d[0-9]+]]
    719 ; SOFT: vmov r1, r0, [[REG1]]
    720 ; SOFT: vmov r3, r2, [[REG2]]
    721 ; HARD: vadd.f64 d1
    722 ; HARD: vadd.f64 d0
    723     %1 = load <2 x double>* %p
    724     %2 = fadd <2 x double> %1, %1
    725     %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
    726     %4 = fadd fp128 %3, %3
    727     store fp128 %4, fp128* %q
    728     ret void
    729 ; CHECK: stm sp, {r0, r1, r2, r3}
    730 }
    731 
    732 ; CHECK-LABEL: test_f128_v2i64:
    733 declare fp128 @test_f128_v2i64_helper(<2 x i64> %p)
    734 define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
    735 ; SOFT: vmov r1, r0
    736 ; SOFT: vmov r3, r2
    737 ; HARD: vadd.i64 q0
    738     %1 = load <2 x i64>* %p
    739     %2 = add <2 x i64> %1, %1
    740     %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
    741     %4 = fadd fp128 %3, %3
    742     store fp128 %4, fp128* %q
    743     ret void
    744 ; CHECK: stm sp, {r0, r1, r2, r3}
    745 }
    746 
    747 ; CHECK-LABEL: test_f128_v4f32:
    748 declare fp128 @test_f128_v4f32_helper(<4 x float> %p)
    749 define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
    750 ; SOFT: vmov r1, r0
    751 ; SOFT: vmov r3, r2
    752 ; HARD: vrev64.32 q0
    753     %1 = load <4 x float>* %p
    754     %2 = fadd <4 x float> %1, %1
    755     %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
    756     %4 = fadd fp128 %3, %3
    757     store fp128 %4, fp128* %q
    758     ret void
    759 ; CHECK: stm sp, {r0, r1, r2, r3}
    760 }
    761 
    762 ; CHECK-LABEL: test_f128_v4i32:
    763 declare fp128 @test_f128_v4i32_helper(<4 x i32> %p)
    764 define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
    765 ; SOFT: vmov r1, r0
    766 ; SOFT: vmov r3, r2
    767 ; HARD: vrev64.32 q0
    768     %1 = load <4 x i32>* %p
    769     %2 = add <4 x i32> %1, %1
    770     %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
    771     %4 = fadd fp128 %3, %3
    772     store fp128 %4, fp128* %q
    773     ret void
    774 ; CHECK: stm sp, {r0, r1, r2, r3}
    775 }
    776 
    777 ; CHECK-LABEL: test_f128_v8i16:
    778 declare fp128 @test_f128_v8i16_helper(<8 x i16> %p)
    779 define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
    780 ; SOFT: vmov r1, r0
    781 ; SOFT: vmov r3, r2
    782 ; HARD: vrev64.16 q0
    783     %1 = load <8 x i16>* %p
    784     %2 = add <8 x i16> %1, %1
    785     %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
    786     %4 = fadd fp128 %3, %3
    787     store fp128 %4, fp128* %q
    788     ret void
    789 ; CHECK: stm sp, {r0, r1, r2, r3}
    790 }
    791 
    792 ; CHECK-LABEL: test_f128_v16i8:
    793 declare fp128 @test_f128_v16i8_helper(<16 x i8> %p)
    794 define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
    795 ; SOFT: vmov r1, r0
    796 ; SOFT: vmov r3, r2
    797 ; HARD: vrev64.8 q0
    798     %1 = load <16 x i8>* %p
    799     %2 = add <16 x i8> %1, %1
    800     %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
    801     %4 = fadd fp128 %3, %3
    802     store fp128 %4, fp128* %q
    803     ret void
    804 ; CHECK: stm sp, {r0, r1, r2, r3}
    805 }
    806 
    807 ; CHECK-LABEL: test_v2f64_f128:
    808 declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
    809 define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
    810     %1 = load fp128* %p
    811     %2 = fadd fp128 %1, %1
    812     %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
    813     %4 = fadd <2 x double> %3, %3
    814     store <2 x double> %4, <2 x double>* %q
    815     ret void
    816 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    817 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    818 
    819 }
    820 
    821 ; CHECK-LABEL: test_v2f64_v2i64:
    822 declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p)
    823 define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
    824 ; SOFT: vmov r1, r0
    825 ; SOFT: vmov r3, r2
    826 ; HARD: vadd.i64 q0
    827     %1 = load <2 x i64>* %p
    828     %2 = add <2 x i64> %1, %1
    829     %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
    830     %4 = fadd <2 x double> %3, %3
    831     store <2 x double> %4, <2 x double>* %q
    832     ret void
    833 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    834 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    835 }
    836 
    837 ; CHECK-LABEL: test_v2f64_v4f32:
    838 declare <2 x double> @test_v2f64_v4f32_helper(<4 x float> %p)
    839 define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
    840 ; SOFT: vmov r1, r0
    841 ; SOFT: vmov r3, r2
    842 ; HARD: vrev64.32 q0
    843     %1 = load <4 x float>* %p
    844     %2 = fadd <4 x float> %1, %1
    845     %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
    846     %4 = fadd <2 x double> %3, %3
    847     store <2 x double> %4, <2 x double>* %q
    848     ret void
    849 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    850 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    851 }
    852 
    853 ; CHECK-LABEL: test_v2f64_v4i32:
    854 declare <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %p)
    855 define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
    856 ; SOFT: vmov r1, r0
    857 ; SOFT: vmov r3, r2
    858 ; HARD: vrev64.32 q0
    859     %1 = load <4 x i32>* %p
    860     %2 = add <4 x i32> %1, %1
    861     %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
    862     %4 = fadd <2 x double> %3, %3
    863     store <2 x double> %4, <2 x double>* %q
    864     ret void
    865 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    866 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    867 }
    868 
    869 ; CHECK-LABEL: test_v2f64_v8i16:
    870 declare <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %p)
    871 define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
    872 ; SOFT: vmov r1, r0
    873 ; SOFT: vmov r3, r2
    874 ; HARD: vrev64.16 q0
    875     %1 = load <8 x i16>* %p
    876     %2 = add <8 x i16> %1, %1
    877     %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
    878     %4 = fadd <2 x double> %3, %3
    879     store <2 x double> %4, <2 x double>* %q
    880     ret void
    881 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    882 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    883 }
    884 
    885 ; CHECK-LABEL: test_v2f64_v16i8:
    886 declare <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %p)
    887 define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
    888 ; SOFT: vmov r1, r0
    889 ; SOFT: vmov r3, r2
    890 ; HARD: vrev64.8 q0
    891     %1 = load <16 x i8>* %p
    892     %2 = add <16 x i8> %1, %1
    893     %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
    894     %4 = fadd <2 x double> %3, %3
    895     store <2 x double> %4, <2 x double>* %q
    896     ret void
    897 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    898 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    899 }
    900 
    901 ; CHECK-LABEL: test_v2i64_f128:
    902 declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
    903 define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
    904     %1 = load fp128* %p
    905     %2 = fadd fp128 %1, %1
    906     %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
    907     %4 = add <2 x i64> %3, %3
    908     store <2 x i64> %4, <2 x i64>* %q
    909     ret void
    910 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    911 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    912 }
    913 
    914 ; CHECK-LABEL: test_v2i64_v2f64:
    915 declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p)
    916 define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
    917 ; SOFT: vmov r1, r0, [[REG1]]
    918 ; SOFT: vmov r3, r2, [[REG2]]
    919 ; HARD: vadd.f64 d1
    920 ; HARD: vadd.f64 d0
    921     %1 = load <2 x double>* %p
    922     %2 = fadd <2 x double> %1, %1
    923     %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
    924     %4 = add <2 x i64> %3, %3
    925     store <2 x i64> %4, <2 x i64>* %q
    926     ret void
    927 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    928 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    929 }
    930 
    931 ; CHECK-LABEL: test_v2i64_v4f32:
    932 declare <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %p) 
    933 define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
    934 ; SOFT: vmov r1, r0
    935 ; SOFT: vmov r3, r2
    936 ; HARD: vrev64.32 q0
    937     %1 = load <4 x float>* %p
    938     %2 = fadd <4 x float> %1, %1
    939     %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
    940     %4 = add <2 x i64> %3, %3
    941     store <2 x i64> %4, <2 x i64>* %q
    942     ret void
    943 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    944 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    945 }
    946 
    947 ; CHECK-LABEL: test_v2i64_v4i32:
    948 declare <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %p)
    949 define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
    950 ; SOFT: vmov r1, r0
    951 ; SOFT: vmov r3, r2
    952 ; HARD: vrev64.32 q0
    953     %1 = load <4 x i32>* %p
    954     %2 = add <4 x i32> %1, %1
    955     %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
    956     %4 = add <2 x i64> %3, %3
    957     store <2 x i64> %4, <2 x i64>* %q
    958     ret void
    959 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    960 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    961 }
    962 
    963 ; CHECK-LABEL: test_v2i64_v8i16:
    964 declare <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %p)
    965 define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
    966 ; SOFT: vmov r1, r0
    967 ; SOFT: vmov r3, r2
    968 ; HARD: vrev64.16 q0
    969     %1 = load <8 x i16>* %p
    970     %2 = add <8 x i16> %1, %1
    971     %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
    972     %4 = add <2 x i64> %3, %3
    973     store <2 x i64> %4, <2 x i64>* %q
    974     ret void
    975 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    976 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    977 }
    978 
    979 ; CHECK-LABEL: test_v2i64_v16i8:
    980 declare <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %p)
    981 define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
    982 ; SOFT: vmov r1, r0
    983 ; SOFT: vmov r3, r2
    984 ; HARD: vrev64.8 q0
    985     %1 = load <16 x i8>* %p
    986     %2 = add <16 x i8> %1, %1
    987     %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
    988     %4 = add <2 x i64> %3, %3
    989     store <2 x i64> %4, <2 x i64>* %q
    990     ret void
    991 ; SOFT: vmov {{d[0-9]+}}, r3, r2
    992 ; SOFT: vmov {{d[0-9]+}}, r1, r0
    993 }
    994 
    995 ; CHECK-LABEL: test_v4f32_f128:
    996 declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
    997 define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
    998     %1 = load fp128* %p
    999     %2 = fadd fp128 %1, %1
   1000     %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
   1001     %4 = fadd <4 x float> %3, %3
   1002     store <4 x float> %4, <4 x float>* %q
   1003     ret void
   1004 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1005 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1006 }
   1007 
   1008 ; CHECK-LABEL: test_v4f32_v2f64:
   1009 declare <4 x float> @test_v4f32_v2f64_helper(<2 x double> %p)
   1010 define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
   1011 ; SOFT: vmov r1, r0
   1012 ; SOFT: vmov r3, r2
   1013 ; HARD: vadd.f64  d1
   1014 ; HARD: vadd.f64  d0
   1015     %1 = load <2 x double>* %p
   1016     %2 = fadd <2 x double> %1, %1
   1017     %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
   1018     %4 = fadd <4 x float> %3, %3
   1019     store <4 x float> %4, <4 x float>* %q
   1020     ret void
   1021 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1022 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1023 }
   1024 
   1025 ; CHECK-LABEL: test_v4f32_v2i64:
   1026 declare <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %p)
   1027 define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
   1028 ; SOFT: vmov r1, r0
   1029 ; SOFT: vmov r3, r2
   1030 ; HARD: vadd.i64 q0
   1031     %1 = load <2 x i64>* %p
   1032     %2 = add <2 x i64> %1, %1
   1033     %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
   1034     %4 = fadd <4 x float> %3, %3
   1035     store <4 x float> %4, <4 x float>* %q
   1036     ret void
   1037 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1038 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1039 }
   1040 
   1041 ; CHECK-LABEL: test_v4f32_v4i32:
   1042 declare <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %p)
   1043 define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
   1044 ; SOFT: vmov r1, r0
   1045 ; SOFT: vmov r3, r2
   1046 ; HARD: vrev64.32 q0
   1047     %1 = load <4 x i32>* %p
   1048     %2 = add <4 x i32> %1, %1
   1049     %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
   1050     %4 = fadd <4 x float> %3, %3
   1051     store <4 x float> %4, <4 x float>* %q
   1052     ret void
   1053 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1054 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1055 }
   1056 
   1057 ; CHECK-LABEL: test_v4f32_v8i16:
   1058 declare <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %p)
   1059 define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
   1060 ; SOFT: vmov r1, r0
   1061 ; SOFT: vmov r3, r2
   1062 ; HARD: vrev64.16 q0
   1063     %1 = load <8 x i16>* %p
   1064     %2 = add <8 x i16> %1, %1
   1065     %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
   1066     %4 = fadd <4 x float> %3, %3
   1067     store <4 x float> %4, <4 x float>* %q
   1068     ret void
   1069 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1070 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1071 }
   1072 
   1073 ; CHECK-LABEL: test_v4f32_v16i8:
   1074 declare <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %p)
   1075 define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
   1076 ; SOFT: vmov r1, r0
   1077 ; SOFT: vmov r3, r2
   1078 ; HARD: vrev64.8 q0
   1079     %1 = load <16 x i8>* %p
   1080     %2 = add <16 x i8> %1, %1
   1081     %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
   1082     %4 = fadd <4 x float> %3, %3
   1083     store <4 x float> %4, <4 x float>* %q
   1084     ret void
   1085 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1086 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1087 }
   1088 
   1089 ; CHECK-LABEL: test_v4i32_f128:
   1090 declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
   1091 define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
   1092     %1 = load fp128* %p
   1093     %2 = fadd fp128 %1, %1
   1094     %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
   1095     %4 = add <4 x i32> %3, %3
   1096     store <4 x i32> %4, <4 x i32>* %q
   1097     ret void
   1098 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1099 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1100 }
   1101 
   1102 ; CHECK-LABEL: test_v4i32_v2f64:
   1103 declare <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %p)
   1104 define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
   1105 ; SOFT: vmov r1, r0
   1106 ; SOFT: vmov r3, r2
   1107 ; HARD: vadd.f64 d1
   1108 ; HARD: vadd.f64 d0
   1109     %1 = load <2 x double>* %p
   1110     %2 = fadd <2 x double> %1, %1
   1111     %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
   1112     %4 = add <4 x i32> %3, %3
   1113     store <4 x i32> %4, <4 x i32>* %q
   1114     ret void
   1115 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1116 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1117 }
   1118 
   1119 ; CHECK-LABEL: test_v4i32_v2i64:
   1120 declare <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %p)
   1121 define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
   1122 ; SOFT: vmov r1, r0
   1123 ; SOFT: vmov r3, r2
   1124 ; HARD: vadd.i64 q0
   1125     %1 = load <2 x i64>* %p
   1126     %2 = add <2 x i64> %1, %1
   1127     %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
   1128     %4 = add <4 x i32> %3, %3
   1129     store <4 x i32> %4, <4 x i32>* %q
   1130     ret void
   1131 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1132 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1133 }
   1134 
   1135 ; CHECK-LABEL: test_v4i32_v4f32:
   1136 declare <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %p)
   1137 define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
   1138 ; SOFT: vmov r1, r0
   1139 ; SOFT: vmov r3, r2
   1140 ; HARD: vrev64.32 q0
   1141     %1 = load <4 x float>* %p
   1142     %2 = fadd <4 x float> %1, %1
   1143     %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
   1144     %4 = add <4 x i32> %3, %3
   1145     store <4 x i32> %4, <4 x i32>* %q
   1146     ret void
   1147 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1148 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1149 }
   1150 
   1151 ; CHECK-LABEL: test_v4i32_v8i16:
   1152 declare <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %p)
   1153 define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
   1154 ; SOFT: vmov r1, r0
   1155 ; SOFT: vmov r3, r2
   1156 ; HARD: vrev64.16 q0
   1157     %1 = load <8 x i16>* %p
   1158     %2 = add <8 x i16> %1, %1
   1159     %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
   1160     %4 = add <4 x i32> %3, %3
   1161     store <4 x i32> %4, <4 x i32>* %q
   1162     ret void
   1163 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1164 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1165 }
   1166 
   1167 ; CHECK-LABEL: test_v4i32_v16i8:
   1168 declare <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %p)
   1169 define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
   1170 ; SOFT: vmov r1, r0
   1171 ; SOFT: vmov r3, r2
   1172 ; HARD: vrev64.8 q0
   1173     %1 = load <16 x i8>* %p
   1174     %2 = add <16 x i8> %1, %1
   1175     %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
   1176     %4 = add <4 x i32> %3, %3
   1177     store <4 x i32> %4, <4 x i32>* %q
   1178     ret void
   1179 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1180 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1181 }
   1182 
   1183 ; CHECK-LABEL: test_v8i16_f128:
   1184 declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
   1185 define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
   1186     %1 = load fp128* %p
   1187     %2 = fadd fp128 %1, %1
   1188     %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
   1189     %4 = add <8 x i16> %3, %3
   1190     store <8 x i16> %4, <8 x i16>* %q
   1191     ret void
   1192 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1193 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1194 }
   1195 
   1196 ; CHECK-LABEL: test_v8i16_v2f64:
   1197 declare <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %p)
   1198 define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
   1199 ; SOFT: vmov r1, r0
   1200 ; SOFT: vmov r3, r2
   1201 ; HARD: vadd.f64 d1
   1202 ; HARD: vadd.f64 d0
   1203     %1 = load <2 x double>* %p
   1204     %2 = fadd <2 x double> %1, %1
   1205     %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
   1206     %4 = add <8 x i16> %3, %3
   1207     store <8 x i16> %4, <8 x i16>* %q
   1208     ret void
   1209 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1210 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1211 }
   1212 
   1213 ; CHECK-LABEL: test_v8i16_v2i64:
   1214 declare <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %p)
   1215 define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
   1216 ; SOFT: vmov r1, r0
   1217 ; SOFT: vmov r3, r2
   1218 ; HARD: vadd.i64 q0
   1219     %1 = load <2 x i64>* %p
   1220     %2 = add <2 x i64> %1, %1
   1221     %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
   1222     %4 = add <8 x i16> %3, %3
   1223     store <8 x i16> %4, <8 x i16>* %q
   1224     ret void
   1225 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1226 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1227 }
   1228 
   1229 ; CHECK-LABEL: test_v8i16_v4f32:
   1230 declare <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %p)
   1231 define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
   1232 ; SOFT: vmov r1, r0
   1233 ; SOFT: vmov r3, r2
   1234 ; HARD: vrev64.32 q0
   1235     %1 = load <4 x float>* %p
   1236     %2 = fadd <4 x float> %1, %1
   1237     %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
   1238     %4 = add <8 x i16> %3, %3
   1239     store <8 x i16> %4, <8 x i16>* %q
   1240     ret void
   1241 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1242 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1243 }
   1244 
   1245 ; CHECK-LABEL: test_v8i16_v4i32:
   1246 declare <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %p)
   1247 define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
   1248 ; SOFT: vmov r1, r0
   1249 ; SOFT: vmov r3, r2
   1250 ; HARD: vrev64.32 q0
   1251     %1 = load <4 x i32>* %p
   1252     %2 = add <4 x i32> %1, %1
   1253     %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
   1254     %4 = add <8 x i16> %3, %3
   1255     store <8 x i16> %4, <8 x i16>* %q
   1256     ret void
   1257 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1258 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1259 }
   1260 
   1261 ; CHECK-LABEL: test_v8i16_v16i8:
   1262 declare <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %p)
   1263 define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
   1264 ; SOFT: vmov r1, r0
   1265 ; SOFT: vmov r3, r2
   1266 ; HARD: vrev64.8 q0
   1267     %1 = load <16 x i8>* %p
   1268     %2 = add <16 x i8> %1, %1
   1269     %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
   1270     %4 = add <8 x i16> %3, %3
   1271     store <8 x i16> %4, <8 x i16>* %q
   1272     ret void
   1273 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1274 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1275 }
   1276 
   1277 ; CHECK-LABEL: test_v16i8_f128:
   1278 declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
   1279 define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
   1280     %1 = load fp128* %p
   1281     %2 = fadd fp128 %1, %1
   1282     %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
   1283     %4 = add <16 x i8> %3, %3
   1284     store <16 x i8> %4, <16 x i8>* %q
   1285     ret void
   1286 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1287 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1288 }
   1289 
   1290 ; CHECK-LABEL: test_v16i8_v2f64:
   1291 declare <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %p)
   1292 define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
   1293 ; SOFT: vmov r1, r0
   1294 ; SOFT: vmov r3, r2
   1295 ; HARD: vadd.f64 d1
   1296 ; HARD: vadd.f64 d0
   1297     %1 = load <2 x double>* %p
   1298     %2 = fadd <2 x double> %1, %1
   1299     %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
   1300     %4 = add <16 x i8> %3, %3
   1301     store <16 x i8> %4, <16 x i8>* %q
   1302     ret void
   1303 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1304 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1305 }
   1306 
   1307 ; CHECK-LABEL: test_v16i8_v2i64:
   1308 declare <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %p)
   1309 define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
   1310 ; SOFT: vmov r1, r0
   1311 ; SOFT: vmov r3, r2
   1312 ; HARD: vadd.i64 q0
   1313     %1 = load <2 x i64>* %p
   1314     %2 = add <2 x i64> %1, %1
   1315     %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
   1316     %4 = add <16 x i8> %3, %3
   1317     store <16 x i8> %4, <16 x i8>* %q
   1318     ret void
   1319 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1320 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1321 }
   1322 
   1323 ; CHECK-LABEL: test_v16i8_v4f32:
   1324 declare <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %p)
   1325 define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
   1326 ; SOFT: vmov r1, r0
   1327 ; SOFT: vmov r3, r2
   1328 ; HARD: vrev64.32 q0
   1329     %1 = load <4 x float>* %p
   1330     %2 = fadd <4 x float> %1, %1
   1331     %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
   1332     %4 = add <16 x i8> %3, %3
   1333     store <16 x i8> %4, <16 x i8>* %q
   1334     ret void
   1335 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1336 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1337 }
   1338 
   1339 ; CHECK-LABEL: test_v16i8_v4i32:
   1340 declare <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %p)
   1341 define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
   1342 ; SOFT: vmov r1, r0
   1343 ; SOFT: vmov r3, r2
   1344 ; HARD: vrev64.32 q0
   1345     %1 = load <4 x i32>* %p
   1346     %2 = add <4 x i32> %1, %1
   1347     %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
   1348     %4 = add <16 x i8> %3, %3
   1349     store <16 x i8> %4, <16 x i8>* %q
   1350     ret void
   1351 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1352 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1353 }
   1354 
   1355 ; CHECK-LABEL: test_v16i8_v8i16:
   1356 declare <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %p)
   1357 define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
   1358 ; SOFT: vmov r1, r0
   1359 ; SOFT: vmov r3, r2
   1360 ; HARD: vrev64.16 q0
   1361     %1 = load <8 x i16>* %p
   1362     %2 = add <8 x i16> %1, %1
   1363     %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
   1364     %4 = add <16 x i8> %3, %3
   1365     store <16 x i8> %4, <16 x i8>* %q
   1366     ret void
   1367 ; SOFT: vmov {{d[0-9]+}}, r3, r2
   1368 ; SOFT: vmov {{d[0-9]+}}, r1, r0
   1369 }
   1370