1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2 3 define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 4 ;CHECK-LABEL: frecps_2s: 5 ;CHECK: frecps.2s 6 %tmp1 = load <2 x float>, <2 x float>* %A 7 %tmp2 = load <2 x float>, <2 x float>* %B 8 %tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 9 ret <2 x float> %tmp3 10 } 11 12 define <4 x float> @frecps_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 13 ;CHECK-LABEL: frecps_4s: 14 ;CHECK: frecps.4s 15 %tmp1 = load <4 x float>, <4 x float>* %A 16 %tmp2 = load <4 x float>, <4 x float>* %B 17 %tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 18 ret <4 x float> %tmp3 19 } 20 21 define <2 x double> @frecps_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 22 ;CHECK-LABEL: frecps_2d: 23 ;CHECK: frecps.2d 24 %tmp1 = load <2 x double>, <2 x double>* %A 25 %tmp2 = load <2 x double>, <2 x double>* %B 26 %tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 27 ret <2 x double> %tmp3 28 } 29 30 declare <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float>, <2 x float>) nounwind readnone 31 declare <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float>, <4 x float>) nounwind readnone 32 declare <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double>, <2 x double>) nounwind readnone 33 34 35 define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind { 36 ;CHECK-LABEL: frsqrts_2s: 37 ;CHECK: frsqrts.2s 38 %tmp1 = load <2 x float>, <2 x float>* %A 39 %tmp2 = load <2 x float>, <2 x float>* %B 40 %tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 41 ret <2 x float> %tmp3 42 } 43 44 define <4 x float> @frsqrts_4s(<4 x float>* %A, <4 x float>* %B) nounwind { 45 ;CHECK-LABEL: frsqrts_4s: 46 ;CHECK: frsqrts.4s 47 %tmp1 = load <4 x float>, <4 x float>* %A 48 %tmp2 = load <4 x float>, <4 x float>* %B 49 %tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 50 ret <4 x float> %tmp3 51 } 52 53 define <2 x double> @frsqrts_2d(<2 x double>* %A, <2 x double>* %B) nounwind { 54 ;CHECK-LABEL: frsqrts_2d: 55 ;CHECK: frsqrts.2d 56 %tmp1 = load <2 x double>, <2 x double>* %A 57 %tmp2 = load <2 x double>, <2 x double>* %B 58 %tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 59 ret <2 x double> %tmp3 60 } 61 62 declare <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone 63 declare <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone 64 declare <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double>, <2 x double>) nounwind readnone 65 66 define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind { 67 ;CHECK-LABEL: frecpe_2s: 68 ;CHECK: frecpe.2s 69 %tmp1 = load <2 x float>, <2 x float>* %A 70 %tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1) 71 ret <2 x float> %tmp3 72 } 73 74 define <4 x float> @frecpe_4s(<4 x float>* %A) nounwind { 75 ;CHECK-LABEL: frecpe_4s: 76 ;CHECK: frecpe.4s 77 %tmp1 = load <4 x float>, <4 x float>* %A 78 %tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1) 79 ret <4 x float> %tmp3 80 } 81 82 define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind { 83 ;CHECK-LABEL: frecpe_2d: 84 ;CHECK: frecpe.2d 85 %tmp1 = load <2 x double>, <2 x double>* %A 86 %tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1) 87 ret <2 x double> %tmp3 88 } 89 90 define float @frecpe_s(float* %A) nounwind { 91 ;CHECK-LABEL: frecpe_s: 92 ;CHECK: frecpe s0, {{s[0-9]+}} 93 %tmp1 = load float, float* %A 94 %tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1) 95 ret float %tmp3 96 } 97 98 define double @frecpe_d(double* %A) nounwind { 99 ;CHECK-LABEL: frecpe_d: 100 ;CHECK: frecpe d0, {{d[0-9]+}} 101 %tmp1 = load double, double* %A 102 %tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1) 103 ret double %tmp3 104 } 105 106 declare <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float>) nounwind readnone 107 declare <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float>) nounwind readnone 108 declare <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double>) nounwind readnone 109 declare float @llvm.aarch64.neon.frecpe.f32(float) nounwind readnone 110 declare double @llvm.aarch64.neon.frecpe.f64(double) nounwind readnone 111 112 define float @frecpx_s(float* %A) nounwind { 113 ;CHECK-LABEL: frecpx_s: 114 ;CHECK: frecpx s0, {{s[0-9]+}} 115 %tmp1 = load float, float* %A 116 %tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1) 117 ret float %tmp3 118 } 119 120 define double @frecpx_d(double* %A) nounwind { 121 ;CHECK-LABEL: frecpx_d: 122 ;CHECK: frecpx d0, {{d[0-9]+}} 123 %tmp1 = load double, double* %A 124 %tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1) 125 ret double %tmp3 126 } 127 128 declare float @llvm.aarch64.neon.frecpx.f32(float) nounwind readnone 129 declare double @llvm.aarch64.neon.frecpx.f64(double) nounwind readnone 130 131 define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind { 132 ;CHECK-LABEL: frsqrte_2s: 133 ;CHECK: frsqrte.2s 134 %tmp1 = load <2 x float>, <2 x float>* %A 135 %tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1) 136 ret <2 x float> %tmp3 137 } 138 139 define <4 x float> @frsqrte_4s(<4 x float>* %A) nounwind { 140 ;CHECK-LABEL: frsqrte_4s: 141 ;CHECK: frsqrte.4s 142 %tmp1 = load <4 x float>, <4 x float>* %A 143 %tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1) 144 ret <4 x float> %tmp3 145 } 146 147 define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind { 148 ;CHECK-LABEL: frsqrte_2d: 149 ;CHECK: frsqrte.2d 150 %tmp1 = load <2 x double>, <2 x double>* %A 151 %tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1) 152 ret <2 x double> %tmp3 153 } 154 155 define float @frsqrte_s(float* %A) nounwind { 156 ;CHECK-LABEL: frsqrte_s: 157 ;CHECK: frsqrte s0, {{s[0-9]+}} 158 %tmp1 = load float, float* %A 159 %tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1) 160 ret float %tmp3 161 } 162 163 define double @frsqrte_d(double* %A) nounwind { 164 ;CHECK-LABEL: frsqrte_d: 165 ;CHECK: frsqrte d0, {{d[0-9]+}} 166 %tmp1 = load double, double* %A 167 %tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1) 168 ret double %tmp3 169 } 170 171 declare <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float>) nounwind readnone 172 declare <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float>) nounwind readnone 173 declare <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double>) nounwind readnone 174 declare float @llvm.aarch64.neon.frsqrte.f32(float) nounwind readnone 175 declare double @llvm.aarch64.neon.frsqrte.f64(double) nounwind readnone 176 177 define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind { 178 ;CHECK-LABEL: urecpe_2s: 179 ;CHECK: urecpe.2s 180 %tmp1 = load <2 x i32>, <2 x i32>* %A 181 %tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1) 182 ret <2 x i32> %tmp3 183 } 184 185 define <4 x i32> @urecpe_4s(<4 x i32>* %A) nounwind { 186 ;CHECK-LABEL: urecpe_4s: 187 ;CHECK: urecpe.4s 188 %tmp1 = load <4 x i32>, <4 x i32>* %A 189 %tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1) 190 ret <4 x i32> %tmp3 191 } 192 193 declare <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32>) nounwind readnone 194 declare <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone 195 196 define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind { 197 ;CHECK-LABEL: ursqrte_2s: 198 ;CHECK: ursqrte.2s 199 %tmp1 = load <2 x i32>, <2 x i32>* %A 200 %tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1) 201 ret <2 x i32> %tmp3 202 } 203 204 define <4 x i32> @ursqrte_4s(<4 x i32>* %A) nounwind { 205 ;CHECK-LABEL: ursqrte_4s: 206 ;CHECK: ursqrte.4s 207 %tmp1 = load <4 x i32>, <4 x i32>* %A 208 %tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1) 209 ret <4 x i32> %tmp3 210 } 211 212 declare <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32>) nounwind readnone 213 declare <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32>) nounwind readnone 214 215 define float @f1(float %a, float %b) nounwind readnone optsize ssp { 216 ; CHECK-LABEL: f1: 217 ; CHECK: frsqrts s0, s0, s1 218 ; CHECK-NEXT: ret 219 %vrsqrtss.i = tail call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) nounwind 220 ret float %vrsqrtss.i 221 } 222 223 define double @f2(double %a, double %b) nounwind readnone optsize ssp { 224 ; CHECK-LABEL: f2: 225 ; CHECK: frsqrts d0, d0, d1 226 ; CHECK-NEXT: ret 227 %vrsqrtsd.i = tail call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) nounwind 228 ret double %vrsqrtsd.i 229 } 230 231 declare double @llvm.aarch64.neon.frsqrts.f64(double, double) nounwind readnone 232 declare float @llvm.aarch64.neon.frsqrts.f32(float, float) nounwind readnone 233