1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT 2 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT 3 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT 4 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT 5 6 define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { 7 ; CHECK-LABEL: bar: 8 ; CHECK: add.2d v[[REG:[0-9]+]], v0, v1 9 ; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1 10 ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1 11 ; Without advanced copy optimization, we end up with cross register 12 ; banks copies that cannot be coalesced. 13 ; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] 14 ; With advanced copy optimization, we end up with just one copy 15 ; to insert the computed high part into the V register. 16 ; CHECK-OPT-NOT: fmov 17 ; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] 18 ; CHECK-NOOPT: fmov d0, [[COPY_REG3]] 19 ; CHECK-OPT-NOT: fmov 20 ; CHECK: ins.d v0[1], [[COPY_REG2]] 21 ; CHECK-NEXT: ret 22 ; 23 ; GENERIC-LABEL: bar: 24 ; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d 25 ; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1 26 ; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1 27 ; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] 28 ; GENERIC-OPT-NOT: fmov 29 ; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] 30 ; GENERIC-NOOPT: fmov d0, [[COPY_REG3]] 31 ; GENERIC-OPT-NOT: fmov 32 ; GENERIC: ins v0.d[1], [[COPY_REG2]] 33 ; GENERIC-NEXT: ret 34 %add = add <2 x i64> %a, %b 35 %vgetq_lane = extractelement <2 x i64> %add, i32 0 36 %vgetq_lane2 = extractelement <2 x i64> %b, i32 0 37 %add3 = add i64 %vgetq_lane, %vgetq_lane2 38 %sub = sub i64 %vgetq_lane, %vgetq_lane2 39 %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0 40 %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1 41 ret <2 x i64> %vecinit8 42 } 43 44 define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { 45 ; CHECK-LABEL: subdd_su64: 46 ; CHECK: sub d0, d1, d0 47 ; CHECK-NEXT: ret 48 ; GENERIC-LABEL: subdd_su64: 49 ; GENERIC: sub d0, d1, d0 50 ; GENERIC-NEXT: ret 51 %vecext = extractelement <2 x i64> %a, i32 0 52 %vecext1 = extractelement <2 x i64> %b, i32 0 53 %sub.i = sub nsw i64 %vecext1, %vecext 54 %retval = bitcast i64 %sub.i to double 55 ret double %retval 56 } 57 58 define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { 59 ; CHECK-LABEL: vaddd_su64: 60 ; CHECK: add d0, d1, d0 61 ; CHECK-NEXT: ret 62 ; GENERIC-LABEL: vaddd_su64: 63 ; GENERIC: add d0, d1, d0 64 ; GENERIC-NEXT: ret 65 %vecext = extractelement <2 x i64> %a, i32 0 66 %vecext1 = extractelement <2 x i64> %b, i32 0 67 %add.i = add nsw i64 %vecext1, %vecext 68 %retval = bitcast i64 %add.i to double 69 ret double %retval 70 } 71 72 ; sub MI doesn't access dsub register. 73 define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { 74 ; CHECK-LABEL: add_sub_su64: 75 ; CHECK: add d0, d1, d0 76 ; CHECK: sub d0, {{d[0-9]+}}, d0 77 ; CHECK-NEXT: ret 78 ; GENERIC-LABEL: add_sub_su64: 79 ; GENERIC: add d0, d1, d0 80 ; GENERIC: sub d0, {{d[0-9]+}}, d0 81 ; GENERIC-NEXT: ret 82 %vecext = extractelement <2 x i64> %a, i32 0 83 %vecext1 = extractelement <2 x i64> %b, i32 0 84 %add.i = add i64 %vecext1, %vecext 85 %sub.i = sub i64 0, %add.i 86 %retval = bitcast i64 %sub.i to double 87 ret double %retval 88 } 89 define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { 90 ; CHECK-LABEL: and_su64: 91 ; CHECK: and.8b v0, v1, v0 92 ; CHECK-NEXT: ret 93 ; GENERIC-LABEL: and_su64: 94 ; GENERIC: and v0.8b, v1.8b, v0.8b 95 ; GENERIC-NEXT: ret 96 %vecext = extractelement <2 x i64> %a, i32 0 97 %vecext1 = extractelement <2 x i64> %b, i32 0 98 %or.i = and i64 %vecext1, %vecext 99 %retval = bitcast i64 %or.i to double 100 ret double %retval 101 } 102 103 define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { 104 ; CHECK-LABEL: orr_su64: 105 ; CHECK: orr.8b v0, v1, v0 106 ; CHECK-NEXT: ret 107 ; GENERIC-LABEL: orr_su64: 108 ; GENERIC: orr v0.8b, v1.8b, v0.8b 109 ; GENERIC-NEXT: ret 110 %vecext = extractelement <2 x i64> %a, i32 0 111 %vecext1 = extractelement <2 x i64> %b, i32 0 112 %or.i = or i64 %vecext1, %vecext 113 %retval = bitcast i64 %or.i to double 114 ret double %retval 115 } 116 117 define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { 118 ; CHECK-LABEL: xorr_su64: 119 ; CHECK: eor.8b v0, v1, v0 120 ; CHECK-NEXT: ret 121 ; GENERIC-LABEL: xorr_su64: 122 ; GENERIC: eor v0.8b, v1.8b, v0.8b 123 ; GENERIC-NEXT: ret 124 %vecext = extractelement <2 x i64> %a, i32 0 125 %vecext1 = extractelement <2 x i64> %b, i32 0 126 %xor.i = xor i64 %vecext1, %vecext 127 %retval = bitcast i64 %xor.i to double 128 ret double %retval 129 } 130