1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT 3 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s 4 5 declare float @llvm.sqrt.f32(float) #0 6 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0 7 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0 8 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0 9 declare double @llvm.sqrt.f64(double) #0 10 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0 11 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0 12 13 define float @fsqrt(float %a) #0 { 14 ; FAULT-LABEL: fsqrt: 15 ; FAULT: // %bb.0: 16 ; FAULT-NEXT: fsqrt s0, s0 17 ; FAULT-NEXT: ret 18 ; 19 ; CHECK-LABEL: fsqrt: 20 ; CHECK: // %bb.0: 21 ; CHECK-NEXT: frsqrte s1, s0 22 ; CHECK-NEXT: fmul s2, s1, s1 23 ; CHECK-NEXT: frsqrts s2, s0, s2 24 ; CHECK-NEXT: fmul s1, s1, s2 25 ; CHECK-NEXT: fmul s2, s1, s1 26 ; CHECK-NEXT: frsqrts s2, s0, s2 27 ; CHECK-NEXT: fmul s2, s2, s0 28 ; CHECK-NEXT: fmul s1, s1, s2 29 ; CHECK-NEXT: fcmp s0, #0.0 30 ; CHECK-NEXT: fcsel s0, s0, s1, eq 31 ; CHECK-NEXT: ret 32 %1 = tail call fast float @llvm.sqrt.f32(float %a) 33 ret float %1 34 } 35 36 define float @fsqrt_ieee_denorms(float %a) #1 { 37 ; FAULT-LABEL: fsqrt_ieee_denorms: 38 ; FAULT: // %bb.0: 39 ; FAULT-NEXT: fsqrt s0, s0 40 ; FAULT-NEXT: ret 41 ; 42 ; CHECK-LABEL: fsqrt_ieee_denorms: 43 ; CHECK: // %bb.0: 44 ; CHECK-NEXT: frsqrte s1, s0 45 ; CHECK-NEXT: fmul s2, s1, s1 46 ; CHECK-NEXT: frsqrts s2, s0, s2 47 ; CHECK-NEXT: fmul s1, s1, s2 48 ; CHECK-NEXT: fmul s2, s1, s1 49 ; CHECK-NEXT: frsqrts s2, s0, s2 50 ; CHECK-NEXT: fmul s2, s2, s0 51 ; CHECK-NEXT: fmul s1, s1, s2 52 ; CHECK-NEXT: fcmp s0, #0.0 53 ; CHECK-NEXT: fcsel s0, s0, s1, eq 54 ; CHECK-NEXT: ret 55 %1 = tail call fast float @llvm.sqrt.f32(float %a) 56 ret float %1 57 } 58 59 define <2 x float> @f2sqrt(<2 x float> %a) #0 { 60 ; FAULT-LABEL: f2sqrt: 61 ; FAULT: // %bb.0: 62 ; FAULT-NEXT: fsqrt v0.2s, v0.2s 63 ; FAULT-NEXT: ret 64 ; 65 ; CHECK-LABEL: f2sqrt: 66 ; CHECK: // %bb.0: 67 ; CHECK-NEXT: frsqrte v1.2s, v0.2s 68 ; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s 69 ; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s 70 ; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s 71 ; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s 72 ; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s 73 ; CHECK-NEXT: fmul v2.2s, v2.2s, v0.2s 74 ; CHECK-NEXT: fmul v2.2s, v1.2s, v2.2s 75 ; CHECK-NEXT: fcmeq v1.2s, v0.2s, #0.0 76 ; CHECK-NEXT: bsl v1.8b, v0.8b, v2.8b 77 ; CHECK-NEXT: mov v0.16b, v1.16b 78 ; CHECK-NEXT: ret 79 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) 80 ret <2 x float> %1 81 } 82 83 define <4 x float> @f4sqrt(<4 x float> %a) #0 { 84 ; FAULT-LABEL: f4sqrt: 85 ; FAULT: // %bb.0: 86 ; FAULT-NEXT: fsqrt v0.4s, v0.4s 87 ; FAULT-NEXT: ret 88 ; 89 ; CHECK-LABEL: f4sqrt: 90 ; CHECK: // %bb.0: 91 ; CHECK-NEXT: frsqrte v1.4s, v0.4s 92 ; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s 93 ; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s 94 ; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s 95 ; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s 96 ; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s 97 ; CHECK-NEXT: fmul v2.4s, v2.4s, v0.4s 98 ; CHECK-NEXT: fmul v2.4s, v1.4s, v2.4s 99 ; CHECK-NEXT: fcmeq v1.4s, v0.4s, #0.0 100 ; CHECK-NEXT: bsl v1.16b, v0.16b, v2.16b 101 ; CHECK-NEXT: mov v0.16b, v1.16b 102 ; CHECK-NEXT: ret 103 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) 104 ret <4 x float> %1 105 } 106 107 define <8 x float> @f8sqrt(<8 x float> %a) #0 { 108 ; FAULT-LABEL: f8sqrt: 109 ; FAULT: // %bb.0: 110 ; FAULT-NEXT: fsqrt v0.4s, v0.4s 111 ; FAULT-NEXT: fsqrt v1.4s, v1.4s 112 ; FAULT-NEXT: ret 113 ; 114 ; CHECK-LABEL: f8sqrt: 115 ; CHECK: // %bb.0: 116 ; CHECK-NEXT: frsqrte v2.4s, v0.4s 117 ; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s 118 ; CHECK-NEXT: frsqrts v3.4s, v0.4s, v3.4s 119 ; CHECK-NEXT: fmul v2.4s, v2.4s, v3.4s 120 ; CHECK-NEXT: fmul v3.4s, v2.4s, v2.4s 121 ; CHECK-NEXT: frsqrts v3.4s, v0.4s, v3.4s 122 ; CHECK-NEXT: fmul v3.4s, v3.4s, v0.4s 123 ; CHECK-NEXT: fmul v3.4s, v2.4s, v3.4s 124 ; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0 125 ; CHECK-NEXT: bsl v2.16b, v0.16b, v3.16b 126 ; CHECK-NEXT: frsqrte v0.4s, v1.4s 127 ; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s 128 ; CHECK-NEXT: frsqrts v3.4s, v1.4s, v3.4s 129 ; CHECK-NEXT: fmul v0.4s, v0.4s, v3.4s 130 ; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s 131 ; CHECK-NEXT: frsqrts v3.4s, v1.4s, v3.4s 132 ; CHECK-NEXT: fmul v3.4s, v3.4s, v1.4s 133 ; CHECK-NEXT: fmul v0.4s, v0.4s, v3.4s 134 ; CHECK-NEXT: fcmeq v3.4s, v1.4s, #0.0 135 ; CHECK-NEXT: bsl v3.16b, v1.16b, v0.16b 136 ; CHECK-NEXT: mov v0.16b, v2.16b 137 ; CHECK-NEXT: mov v1.16b, v3.16b 138 ; CHECK-NEXT: ret 139 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a) 140 ret <8 x float> %1 141 } 142 143 define double @dsqrt(double %a) #0 { 144 ; FAULT-LABEL: dsqrt: 145 ; FAULT: // %bb.0: 146 ; FAULT-NEXT: fsqrt d0, d0 147 ; FAULT-NEXT: ret 148 ; 149 ; CHECK-LABEL: dsqrt: 150 ; CHECK: // %bb.0: 151 ; CHECK-NEXT: frsqrte d1, d0 152 ; CHECK-NEXT: fmul d2, d1, d1 153 ; CHECK-NEXT: frsqrts d2, d0, d2 154 ; CHECK-NEXT: fmul d1, d1, d2 155 ; CHECK-NEXT: fmul d2, d1, d1 156 ; CHECK-NEXT: frsqrts d2, d0, d2 157 ; CHECK-NEXT: fmul d1, d1, d2 158 ; CHECK-NEXT: fmul d2, d1, d1 159 ; CHECK-NEXT: frsqrts d2, d0, d2 160 ; CHECK-NEXT: fmul d2, d2, d0 161 ; CHECK-NEXT: fmul d1, d1, d2 162 ; CHECK-NEXT: fcmp d0, #0.0 163 ; CHECK-NEXT: fcsel d0, d0, d1, eq 164 ; CHECK-NEXT: ret 165 %1 = tail call fast double @llvm.sqrt.f64(double %a) 166 ret double %1 167 } 168 169 define double @dsqrt_ieee_denorms(double %a) #1 { 170 ; FAULT-LABEL: dsqrt_ieee_denorms: 171 ; FAULT: // %bb.0: 172 ; FAULT-NEXT: fsqrt d0, d0 173 ; FAULT-NEXT: ret 174 ; 175 ; CHECK-LABEL: dsqrt_ieee_denorms: 176 ; CHECK: // %bb.0: 177 ; CHECK-NEXT: frsqrte d1, d0 178 ; CHECK-NEXT: fmul d2, d1, d1 179 ; CHECK-NEXT: frsqrts d2, d0, d2 180 ; CHECK-NEXT: fmul d1, d1, d2 181 ; CHECK-NEXT: fmul d2, d1, d1 182 ; CHECK-NEXT: frsqrts d2, d0, d2 183 ; CHECK-NEXT: fmul d1, d1, d2 184 ; CHECK-NEXT: fmul d2, d1, d1 185 ; CHECK-NEXT: frsqrts d2, d0, d2 186 ; CHECK-NEXT: fmul d2, d2, d0 187 ; CHECK-NEXT: fmul d1, d1, d2 188 ; CHECK-NEXT: fcmp d0, #0.0 189 ; CHECK-NEXT: fcsel d0, d0, d1, eq 190 ; CHECK-NEXT: ret 191 %1 = tail call fast double @llvm.sqrt.f64(double %a) 192 ret double %1 193 } 194 195 define <2 x double> @d2sqrt(<2 x double> %a) #0 { 196 ; FAULT-LABEL: d2sqrt: 197 ; FAULT: // %bb.0: 198 ; FAULT-NEXT: fsqrt v0.2d, v0.2d 199 ; FAULT-NEXT: ret 200 ; 201 ; CHECK-LABEL: d2sqrt: 202 ; CHECK: // %bb.0: 203 ; CHECK-NEXT: frsqrte v1.2d, v0.2d 204 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 205 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 206 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 207 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 208 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 209 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 210 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 211 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 212 ; CHECK-NEXT: fmul v2.2d, v2.2d, v0.2d 213 ; CHECK-NEXT: fmul v2.2d, v1.2d, v2.2d 214 ; CHECK-NEXT: fcmeq v1.2d, v0.2d, #0.0 215 ; CHECK-NEXT: bsl v1.16b, v0.16b, v2.16b 216 ; CHECK-NEXT: mov v0.16b, v1.16b 217 ; CHECK-NEXT: ret 218 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) 219 ret <2 x double> %1 220 } 221 222 define <4 x double> @d4sqrt(<4 x double> %a) #0 { 223 ; FAULT-LABEL: d4sqrt: 224 ; FAULT: // %bb.0: 225 ; FAULT-NEXT: fsqrt v0.2d, v0.2d 226 ; FAULT-NEXT: fsqrt v1.2d, v1.2d 227 ; FAULT-NEXT: ret 228 ; 229 ; CHECK-LABEL: d4sqrt: 230 ; CHECK: // %bb.0: 231 ; CHECK-NEXT: frsqrte v2.2d, v0.2d 232 ; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d 233 ; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d 234 ; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d 235 ; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d 236 ; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d 237 ; CHECK-NEXT: fmul v2.2d, v2.2d, v3.2d 238 ; CHECK-NEXT: fmul v3.2d, v2.2d, v2.2d 239 ; CHECK-NEXT: frsqrts v3.2d, v0.2d, v3.2d 240 ; CHECK-NEXT: fmul v3.2d, v3.2d, v0.2d 241 ; CHECK-NEXT: fmul v3.2d, v2.2d, v3.2d 242 ; CHECK-NEXT: fcmeq v2.2d, v0.2d, #0.0 243 ; CHECK-NEXT: bsl v2.16b, v0.16b, v3.16b 244 ; CHECK-NEXT: frsqrte v0.2d, v1.2d 245 ; CHECK-NEXT: fmul v3.2d, v0.2d, v0.2d 246 ; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d 247 ; CHECK-NEXT: fmul v0.2d, v0.2d, v3.2d 248 ; CHECK-NEXT: fmul v3.2d, v0.2d, v0.2d 249 ; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d 250 ; CHECK-NEXT: fmul v0.2d, v0.2d, v3.2d 251 ; CHECK-NEXT: fmul v3.2d, v0.2d, v0.2d 252 ; CHECK-NEXT: frsqrts v3.2d, v1.2d, v3.2d 253 ; CHECK-NEXT: fmul v3.2d, v3.2d, v1.2d 254 ; CHECK-NEXT: fmul v0.2d, v0.2d, v3.2d 255 ; CHECK-NEXT: fcmeq v3.2d, v1.2d, #0.0 256 ; CHECK-NEXT: bsl v3.16b, v1.16b, v0.16b 257 ; CHECK-NEXT: mov v0.16b, v2.16b 258 ; CHECK-NEXT: mov v1.16b, v3.16b 259 ; CHECK-NEXT: ret 260 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) 261 ret <4 x double> %1 262 } 263 264 define float @frsqrt(float %a) #0 { 265 ; FAULT-LABEL: frsqrt: 266 ; FAULT: // %bb.0: 267 ; FAULT-NEXT: fsqrt s0, s0 268 ; FAULT-NEXT: fmov s1, #1.00000000 269 ; FAULT-NEXT: fdiv s0, s1, s0 270 ; FAULT-NEXT: ret 271 ; 272 ; CHECK-LABEL: frsqrt: 273 ; CHECK: // %bb.0: 274 ; CHECK-NEXT: frsqrte s1, s0 275 ; CHECK-NEXT: fmul s2, s1, s1 276 ; CHECK-NEXT: frsqrts s2, s0, s2 277 ; CHECK-NEXT: fmul s1, s1, s2 278 ; CHECK-NEXT: fmul s2, s1, s1 279 ; CHECK-NEXT: frsqrts s0, s0, s2 280 ; CHECK-NEXT: fmul s0, s1, s0 281 ; CHECK-NEXT: ret 282 %1 = tail call fast float @llvm.sqrt.f32(float %a) 283 %2 = fdiv fast float 1.000000e+00, %1 284 ret float %2 285 } 286 287 define <2 x float> @f2rsqrt(<2 x float> %a) #0 { 288 ; FAULT-LABEL: f2rsqrt: 289 ; FAULT: // %bb.0: 290 ; FAULT-NEXT: fsqrt v0.2s, v0.2s 291 ; FAULT-NEXT: fmov v1.2s, #1.00000000 292 ; FAULT-NEXT: fdiv v0.2s, v1.2s, v0.2s 293 ; FAULT-NEXT: ret 294 ; 295 ; CHECK-LABEL: f2rsqrt: 296 ; CHECK: // %bb.0: 297 ; CHECK-NEXT: frsqrte v1.2s, v0.2s 298 ; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s 299 ; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s 300 ; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s 301 ; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s 302 ; CHECK-NEXT: frsqrts v0.2s, v0.2s, v2.2s 303 ; CHECK-NEXT: fmul v0.2s, v1.2s, v0.2s 304 ; CHECK-NEXT: ret 305 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) 306 %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1 307 ret <2 x float> %2 308 } 309 310 define <4 x float> @f4rsqrt(<4 x float> %a) #0 { 311 ; FAULT-LABEL: f4rsqrt: 312 ; FAULT: // %bb.0: 313 ; FAULT-NEXT: fsqrt v0.4s, v0.4s 314 ; FAULT-NEXT: fmov v1.4s, #1.00000000 315 ; FAULT-NEXT: fdiv v0.4s, v1.4s, v0.4s 316 ; FAULT-NEXT: ret 317 ; 318 ; CHECK-LABEL: f4rsqrt: 319 ; CHECK: // %bb.0: 320 ; CHECK-NEXT: frsqrte v1.4s, v0.4s 321 ; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s 322 ; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s 323 ; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s 324 ; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s 325 ; CHECK-NEXT: frsqrts v0.4s, v0.4s, v2.4s 326 ; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s 327 ; CHECK-NEXT: ret 328 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) 329 %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1 330 ret <4 x float> %2 331 } 332 333 define <8 x float> @f8rsqrt(<8 x float> %a) #0 { 334 ; FAULT-LABEL: f8rsqrt: 335 ; FAULT: // %bb.0: 336 ; FAULT-NEXT: fsqrt v1.4s, v1.4s 337 ; FAULT-NEXT: fsqrt v0.4s, v0.4s 338 ; FAULT-NEXT: fmov v2.4s, #1.00000000 339 ; FAULT-NEXT: fdiv v0.4s, v2.4s, v0.4s 340 ; FAULT-NEXT: fdiv v1.4s, v2.4s, v1.4s 341 ; FAULT-NEXT: ret 342 ; 343 ; CHECK-LABEL: f8rsqrt: 344 ; CHECK: // %bb.0: 345 ; CHECK-NEXT: frsqrte v2.4s, v0.4s 346 ; CHECK-NEXT: fmul v4.4s, v2.4s, v2.4s 347 ; CHECK-NEXT: frsqrte v3.4s, v1.4s 348 ; CHECK-NEXT: frsqrts v4.4s, v0.4s, v4.4s 349 ; CHECK-NEXT: fmul v2.4s, v2.4s, v4.4s 350 ; CHECK-NEXT: fmul v4.4s, v3.4s, v3.4s 351 ; CHECK-NEXT: frsqrts v4.4s, v1.4s, v4.4s 352 ; CHECK-NEXT: fmul v3.4s, v3.4s, v4.4s 353 ; CHECK-NEXT: fmul v4.4s, v2.4s, v2.4s 354 ; CHECK-NEXT: frsqrts v0.4s, v0.4s, v4.4s 355 ; CHECK-NEXT: fmul v4.4s, v3.4s, v3.4s 356 ; CHECK-NEXT: frsqrts v1.4s, v1.4s, v4.4s 357 ; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s 358 ; CHECK-NEXT: fmul v1.4s, v3.4s, v1.4s 359 ; CHECK-NEXT: ret 360 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a) 361 %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1 362 ret <8 x float> %2 363 } 364 365 define double @drsqrt(double %a) #0 { 366 ; FAULT-LABEL: drsqrt: 367 ; FAULT: // %bb.0: 368 ; FAULT-NEXT: fsqrt d0, d0 369 ; FAULT-NEXT: fmov d1, #1.00000000 370 ; FAULT-NEXT: fdiv d0, d1, d0 371 ; FAULT-NEXT: ret 372 ; 373 ; CHECK-LABEL: drsqrt: 374 ; CHECK: // %bb.0: 375 ; CHECK-NEXT: frsqrte d1, d0 376 ; CHECK-NEXT: fmul d2, d1, d1 377 ; CHECK-NEXT: frsqrts d2, d0, d2 378 ; CHECK-NEXT: fmul d1, d1, d2 379 ; CHECK-NEXT: fmul d2, d1, d1 380 ; CHECK-NEXT: frsqrts d2, d0, d2 381 ; CHECK-NEXT: fmul d1, d1, d2 382 ; CHECK-NEXT: fmul d2, d1, d1 383 ; CHECK-NEXT: frsqrts d0, d0, d2 384 ; CHECK-NEXT: fmul d0, d1, d0 385 ; CHECK-NEXT: ret 386 %1 = tail call fast double @llvm.sqrt.f64(double %a) 387 %2 = fdiv fast double 1.000000e+00, %1 388 ret double %2 389 } 390 391 define <2 x double> @d2rsqrt(<2 x double> %a) #0 { 392 ; FAULT-LABEL: d2rsqrt: 393 ; FAULT: // %bb.0: 394 ; FAULT-NEXT: fsqrt v0.2d, v0.2d 395 ; FAULT-NEXT: fmov v1.2d, #1.00000000 396 ; FAULT-NEXT: fdiv v0.2d, v1.2d, v0.2d 397 ; FAULT-NEXT: ret 398 ; 399 ; CHECK-LABEL: d2rsqrt: 400 ; CHECK: // %bb.0: 401 ; CHECK-NEXT: frsqrte v1.2d, v0.2d 402 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 403 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 404 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 405 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 406 ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d 407 ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d 408 ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d 409 ; CHECK-NEXT: frsqrts v0.2d, v0.2d, v2.2d 410 ; CHECK-NEXT: fmul v0.2d, v1.2d, v0.2d 411 ; CHECK-NEXT: ret 412 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) 413 %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1 414 ret <2 x double> %2 415 } 416 417 define <4 x double> @d4rsqrt(<4 x double> %a) #0 { 418 ; FAULT-LABEL: d4rsqrt: 419 ; FAULT: // %bb.0: 420 ; FAULT-NEXT: fsqrt v1.2d, v1.2d 421 ; FAULT-NEXT: fsqrt v0.2d, v0.2d 422 ; FAULT-NEXT: fmov v2.2d, #1.00000000 423 ; FAULT-NEXT: fdiv v0.2d, v2.2d, v0.2d 424 ; FAULT-NEXT: fdiv v1.2d, v2.2d, v1.2d 425 ; FAULT-NEXT: ret 426 ; 427 ; CHECK-LABEL: d4rsqrt: 428 ; CHECK: // %bb.0: 429 ; CHECK-NEXT: frsqrte v2.2d, v0.2d 430 ; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d 431 ; CHECK-NEXT: frsqrte v3.2d, v1.2d 432 ; CHECK-NEXT: frsqrts v4.2d, v0.2d, v4.2d 433 ; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d 434 ; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d 435 ; CHECK-NEXT: frsqrts v4.2d, v1.2d, v4.2d 436 ; CHECK-NEXT: fmul v3.2d, v3.2d, v4.2d 437 ; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d 438 ; CHECK-NEXT: frsqrts v4.2d, v0.2d, v4.2d 439 ; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d 440 ; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d 441 ; CHECK-NEXT: frsqrts v4.2d, v1.2d, v4.2d 442 ; CHECK-NEXT: fmul v3.2d, v3.2d, v4.2d 443 ; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d 444 ; CHECK-NEXT: frsqrts v0.2d, v0.2d, v4.2d 445 ; CHECK-NEXT: fmul v4.2d, v3.2d, v3.2d 446 ; CHECK-NEXT: frsqrts v1.2d, v1.2d, v4.2d 447 ; CHECK-NEXT: fmul v0.2d, v2.2d, v0.2d 448 ; CHECK-NEXT: fmul v1.2d, v3.2d, v1.2d 449 ; CHECK-NEXT: ret 450 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) 451 %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1 452 ret <4 x double> %2 453 } 454 455 attributes #0 = { "unsafe-fp-math"="true" } 456 attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" } 457 458