Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
      2 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
      3 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
      4 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
      5 
      6 define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
      7 ; CHECK-LABEL: bar:
      8 ; CHECK: add.2d	v[[REG:[0-9]+]], v0, v1
      9 ; CHECK: add	d[[REG3:[0-9]+]], d[[REG]], d1
     10 ; CHECK: sub	d[[REG2:[0-9]+]], d[[REG]], d1
     11 ; Without advanced copy optimization, we end up with cross register
     12 ; banks copies that cannot be coalesced.
     13 ; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
     14 ; With advanced copy optimization, we end up with just one copy
     15 ; to insert the computed high part into the V register. 
     16 ; CHECK-OPT-NOT: fmov
     17 ; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
     18 ; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
     19 ; CHECK-OPT-NOT: fmov
     20 ; CHECK: ins.d v0[1], [[COPY_REG2]]
     21 ; CHECK-NEXT: ret
     22 ;
     23 ; GENERIC-LABEL: bar:
     24 ; GENERIC: add	v[[REG:[0-9]+]].2d, v0.2d, v1.2d
     25 ; GENERIC: add	d[[REG3:[0-9]+]], d[[REG]], d1
     26 ; GENERIC: sub	d[[REG2:[0-9]+]], d[[REG]], d1
     27 ; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
     28 ; GENERIC-OPT-NOT: fmov
     29 ; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
     30 ; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
     31 ; GENERIC-OPT-NOT: fmov
     32 ; GENERIC: ins v0.d[1], [[COPY_REG2]]
     33 ; GENERIC-NEXT: ret
     34   %add = add <2 x i64> %a, %b
     35   %vgetq_lane = extractelement <2 x i64> %add, i32 0
     36   %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
     37   %add3 = add i64 %vgetq_lane, %vgetq_lane2
     38   %sub = sub i64 %vgetq_lane, %vgetq_lane2
     39   %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0
     40   %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1
     41   ret <2 x i64> %vecinit8
     42 }
     43 
     44 define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
     45 ; CHECK-LABEL: subdd_su64:
     46 ; CHECK: sub d0, d1, d0
     47 ; CHECK-NEXT: ret
     48 ; GENERIC-LABEL: subdd_su64:
     49 ; GENERIC: sub d0, d1, d0
     50 ; GENERIC-NEXT: ret
     51   %vecext = extractelement <2 x i64> %a, i32 0
     52   %vecext1 = extractelement <2 x i64> %b, i32 0
     53   %sub.i = sub nsw i64 %vecext1, %vecext
     54   %retval = bitcast i64 %sub.i to double
     55   ret double %retval
     56 }
     57 
     58 define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
     59 ; CHECK-LABEL: vaddd_su64:
     60 ; CHECK: add d0, d1, d0
     61 ; CHECK-NEXT: ret
     62 ; GENERIC-LABEL: vaddd_su64:
     63 ; GENERIC: add d0, d1, d0
     64 ; GENERIC-NEXT: ret
     65   %vecext = extractelement <2 x i64> %a, i32 0
     66   %vecext1 = extractelement <2 x i64> %b, i32 0
     67   %add.i = add nsw i64 %vecext1, %vecext
     68   %retval = bitcast i64 %add.i to double
     69   ret double %retval
     70 }
     71 
     72 ; sub MI doesn't access dsub register.
     73 define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
     74 ; CHECK-LABEL: add_sub_su64:
     75 ; CHECK: add d0, d1, d0
     76 ; CHECK: sub d0, {{d[0-9]+}}, d0
     77 ; CHECK-NEXT: ret
     78 ; GENERIC-LABEL: add_sub_su64:
     79 ; GENERIC: add d0, d1, d0
     80 ; GENERIC: sub d0, {{d[0-9]+}}, d0
     81 ; GENERIC-NEXT: ret
     82   %vecext = extractelement <2 x i64> %a, i32 0
     83   %vecext1 = extractelement <2 x i64> %b, i32 0
     84   %add.i = add i64 %vecext1, %vecext
     85   %sub.i = sub i64 0, %add.i
     86   %retval = bitcast i64 %sub.i to double
     87   ret double %retval
     88 }
     89 define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
     90 ; CHECK-LABEL: and_su64:
     91 ; CHECK: and.8b v0, v1, v0
     92 ; CHECK-NEXT: ret
     93 ; GENERIC-LABEL: and_su64:
     94 ; GENERIC: and v0.8b, v1.8b, v0.8b
     95 ; GENERIC-NEXT: ret
     96   %vecext = extractelement <2 x i64> %a, i32 0
     97   %vecext1 = extractelement <2 x i64> %b, i32 0
     98   %or.i = and i64 %vecext1, %vecext
     99   %retval = bitcast i64 %or.i to double
    100   ret double %retval
    101 }
    102 
    103 define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
    104 ; CHECK-LABEL: orr_su64:
    105 ; CHECK: orr.8b v0, v1, v0
    106 ; CHECK-NEXT: ret
    107 ; GENERIC-LABEL: orr_su64:
    108 ; GENERIC: orr v0.8b, v1.8b, v0.8b
    109 ; GENERIC-NEXT: ret
    110   %vecext = extractelement <2 x i64> %a, i32 0
    111   %vecext1 = extractelement <2 x i64> %b, i32 0
    112   %or.i = or i64 %vecext1, %vecext
    113   %retval = bitcast i64 %or.i to double
    114   ret double %retval
    115 }
    116 
    117 define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
    118 ; CHECK-LABEL: xorr_su64:
    119 ; CHECK: eor.8b v0, v1, v0
    120 ; CHECK-NEXT: ret
    121 ; GENERIC-LABEL: xorr_su64:
    122 ; GENERIC: eor v0.8b, v1.8b, v0.8b
    123 ; GENERIC-NEXT: ret
    124   %vecext = extractelement <2 x i64> %a, i32 0
    125   %vecext1 = extractelement <2 x i64> %b, i32 0
    126   %xor.i = xor i64 %vecext1, %vecext
    127   %retval = bitcast i64 %xor.i to double
    128   ret double %retval
    129 }
    130