Home | History | Annotate | Download | only in AArch64
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT
      3 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s
      4 
      5 declare float @llvm.sqrt.f32(float) #0
      6 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
      7 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
      8 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
      9 declare double @llvm.sqrt.f64(double) #0
     10 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
     11 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0
     12 
     13 define float @fsqrt(float %a) #0 {
     14 ; FAULT-LABEL: fsqrt:
     15 ; FAULT:       // %bb.0:
     16 ; FAULT-NEXT:    fsqrt s0, s0
     17 ; FAULT-NEXT:    ret
     18 ;
     19 ; CHECK-LABEL: fsqrt:
     20 ; CHECK:       // %bb.0:
     21 ; CHECK-NEXT:    frsqrte s1, s0
     22 ; CHECK-NEXT:    fmul s2, s1, s1
     23 ; CHECK-NEXT:    frsqrts s2, s0, s2
     24 ; CHECK-NEXT:    fmul s1, s1, s2
     25 ; CHECK-NEXT:    fmul s2, s1, s1
     26 ; CHECK-NEXT:    frsqrts s2, s0, s2
     27 ; CHECK-NEXT:    fmul s2, s2, s0
     28 ; CHECK-NEXT:    fmul s1, s1, s2
     29 ; CHECK-NEXT:    fcmp s0, #0.0
     30 ; CHECK-NEXT:    fcsel s0, s0, s1, eq
     31 ; CHECK-NEXT:    ret
     32   %1 = tail call fast float @llvm.sqrt.f32(float %a)
     33   ret float %1
     34 }
     35 
     36 define float @fsqrt_ieee_denorms(float %a) #1 {
     37 ; FAULT-LABEL: fsqrt_ieee_denorms:
     38 ; FAULT:       // %bb.0:
     39 ; FAULT-NEXT:    fsqrt s0, s0
     40 ; FAULT-NEXT:    ret
     41 ;
     42 ; CHECK-LABEL: fsqrt_ieee_denorms:
     43 ; CHECK:       // %bb.0:
     44 ; CHECK-NEXT:    frsqrte s1, s0
     45 ; CHECK-NEXT:    fmul s2, s1, s1
     46 ; CHECK-NEXT:    frsqrts s2, s0, s2
     47 ; CHECK-NEXT:    fmul s1, s1, s2
     48 ; CHECK-NEXT:    fmul s2, s1, s1
     49 ; CHECK-NEXT:    frsqrts s2, s0, s2
     50 ; CHECK-NEXT:    fmul s2, s2, s0
     51 ; CHECK-NEXT:    fmul s1, s1, s2
     52 ; CHECK-NEXT:    fcmp s0, #0.0
     53 ; CHECK-NEXT:    fcsel s0, s0, s1, eq
     54 ; CHECK-NEXT:    ret
     55   %1 = tail call fast float @llvm.sqrt.f32(float %a)
     56   ret float %1
     57 }
     58 
     59 define <2 x float> @f2sqrt(<2 x float> %a) #0 {
     60 ; FAULT-LABEL: f2sqrt:
     61 ; FAULT:       // %bb.0:
     62 ; FAULT-NEXT:    fsqrt v0.2s, v0.2s
     63 ; FAULT-NEXT:    ret
     64 ;
     65 ; CHECK-LABEL: f2sqrt:
     66 ; CHECK:       // %bb.0:
     67 ; CHECK-NEXT:    frsqrte v1.2s, v0.2s
     68 ; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
     69 ; CHECK-NEXT:    frsqrts v2.2s, v0.2s, v2.2s
     70 ; CHECK-NEXT:    fmul v1.2s, v1.2s, v2.2s
     71 ; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
     72 ; CHECK-NEXT:    frsqrts v2.2s, v0.2s, v2.2s
     73 ; CHECK-NEXT:    fmul v2.2s, v2.2s, v0.2s
     74 ; CHECK-NEXT:    fmul v2.2s, v1.2s, v2.2s
     75 ; CHECK-NEXT:    fcmeq v1.2s, v0.2s, #0.0
     76 ; CHECK-NEXT:    bsl v1.8b, v0.8b, v2.8b
     77 ; CHECK-NEXT:    mov v0.16b, v1.16b
     78 ; CHECK-NEXT:    ret
     79   %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
     80   ret <2 x float> %1
     81 }
     82 
     83 define <4 x float> @f4sqrt(<4 x float> %a) #0 {
     84 ; FAULT-LABEL: f4sqrt:
     85 ; FAULT:       // %bb.0:
     86 ; FAULT-NEXT:    fsqrt v0.4s, v0.4s
     87 ; FAULT-NEXT:    ret
     88 ;
     89 ; CHECK-LABEL: f4sqrt:
     90 ; CHECK:       // %bb.0:
     91 ; CHECK-NEXT:    frsqrte v1.4s, v0.4s
     92 ; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
     93 ; CHECK-NEXT:    frsqrts v2.4s, v0.4s, v2.4s
     94 ; CHECK-NEXT:    fmul v1.4s, v1.4s, v2.4s
     95 ; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
     96 ; CHECK-NEXT:    frsqrts v2.4s, v0.4s, v2.4s
     97 ; CHECK-NEXT:    fmul v2.4s, v2.4s, v0.4s
     98 ; CHECK-NEXT:    fmul v2.4s, v1.4s, v2.4s
     99 ; CHECK-NEXT:    fcmeq v1.4s, v0.4s, #0.0
    100 ; CHECK-NEXT:    bsl v1.16b, v0.16b, v2.16b
    101 ; CHECK-NEXT:    mov v0.16b, v1.16b
    102 ; CHECK-NEXT:    ret
    103   %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
    104   ret <4 x float> %1
    105 }
    106 
    107 define <8 x float> @f8sqrt(<8 x float> %a) #0 {
    108 ; FAULT-LABEL: f8sqrt:
    109 ; FAULT:       // %bb.0:
    110 ; FAULT-NEXT:    fsqrt v0.4s, v0.4s
    111 ; FAULT-NEXT:    fsqrt v1.4s, v1.4s
    112 ; FAULT-NEXT:    ret
    113 ;
    114 ; CHECK-LABEL: f8sqrt:
    115 ; CHECK:       // %bb.0:
    116 ; CHECK-NEXT:    frsqrte v2.4s, v0.4s
    117 ; CHECK-NEXT:    fmul v3.4s, v2.4s, v2.4s
    118 ; CHECK-NEXT:    frsqrts v3.4s, v0.4s, v3.4s
    119 ; CHECK-NEXT:    fmul v2.4s, v2.4s, v3.4s
    120 ; CHECK-NEXT:    fmul v3.4s, v2.4s, v2.4s
    121 ; CHECK-NEXT:    frsqrts v3.4s, v0.4s, v3.4s
    122 ; CHECK-NEXT:    fmul v3.4s, v3.4s, v0.4s
    123 ; CHECK-NEXT:    fmul v3.4s, v2.4s, v3.4s
    124 ; CHECK-NEXT:    fcmeq v2.4s, v0.4s, #0.0
    125 ; CHECK-NEXT:    bsl v2.16b, v0.16b, v3.16b
    126 ; CHECK-NEXT:    frsqrte v0.4s, v1.4s
    127 ; CHECK-NEXT:    fmul v3.4s, v0.4s, v0.4s
    128 ; CHECK-NEXT:    frsqrts v3.4s, v1.4s, v3.4s
    129 ; CHECK-NEXT:    fmul v0.4s, v0.4s, v3.4s
    130 ; CHECK-NEXT:    fmul v3.4s, v0.4s, v0.4s
    131 ; CHECK-NEXT:    frsqrts v3.4s, v1.4s, v3.4s
    132 ; CHECK-NEXT:    fmul v3.4s, v3.4s, v1.4s
    133 ; CHECK-NEXT:    fmul v0.4s, v0.4s, v3.4s
    134 ; CHECK-NEXT:    fcmeq v3.4s, v1.4s, #0.0
    135 ; CHECK-NEXT:    bsl v3.16b, v1.16b, v0.16b
    136 ; CHECK-NEXT:    mov v0.16b, v2.16b
    137 ; CHECK-NEXT:    mov v1.16b, v3.16b
    138 ; CHECK-NEXT:    ret
    139   %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
    140   ret <8 x float> %1
    141 }
    142 
    143 define double @dsqrt(double %a) #0 {
    144 ; FAULT-LABEL: dsqrt:
    145 ; FAULT:       // %bb.0:
    146 ; FAULT-NEXT:    fsqrt d0, d0
    147 ; FAULT-NEXT:    ret
    148 ;
    149 ; CHECK-LABEL: dsqrt:
    150 ; CHECK:       // %bb.0:
    151 ; CHECK-NEXT:    frsqrte d1, d0
    152 ; CHECK-NEXT:    fmul d2, d1, d1
    153 ; CHECK-NEXT:    frsqrts d2, d0, d2
    154 ; CHECK-NEXT:    fmul d1, d1, d2
    155 ; CHECK-NEXT:    fmul d2, d1, d1
    156 ; CHECK-NEXT:    frsqrts d2, d0, d2
    157 ; CHECK-NEXT:    fmul d1, d1, d2
    158 ; CHECK-NEXT:    fmul d2, d1, d1
    159 ; CHECK-NEXT:    frsqrts d2, d0, d2
    160 ; CHECK-NEXT:    fmul d2, d2, d0
    161 ; CHECK-NEXT:    fmul d1, d1, d2
    162 ; CHECK-NEXT:    fcmp d0, #0.0
    163 ; CHECK-NEXT:    fcsel d0, d0, d1, eq
    164 ; CHECK-NEXT:    ret
    165   %1 = tail call fast double @llvm.sqrt.f64(double %a)
    166   ret double %1
    167 }
    168 
    169 define double @dsqrt_ieee_denorms(double %a) #1 {
    170 ; FAULT-LABEL: dsqrt_ieee_denorms:
    171 ; FAULT:       // %bb.0:
    172 ; FAULT-NEXT:    fsqrt d0, d0
    173 ; FAULT-NEXT:    ret
    174 ;
    175 ; CHECK-LABEL: dsqrt_ieee_denorms:
    176 ; CHECK:       // %bb.0:
    177 ; CHECK-NEXT:    frsqrte d1, d0
    178 ; CHECK-NEXT:    fmul d2, d1, d1
    179 ; CHECK-NEXT:    frsqrts d2, d0, d2
    180 ; CHECK-NEXT:    fmul d1, d1, d2
    181 ; CHECK-NEXT:    fmul d2, d1, d1
    182 ; CHECK-NEXT:    frsqrts d2, d0, d2
    183 ; CHECK-NEXT:    fmul d1, d1, d2
    184 ; CHECK-NEXT:    fmul d2, d1, d1
    185 ; CHECK-NEXT:    frsqrts d2, d0, d2
    186 ; CHECK-NEXT:    fmul d2, d2, d0
    187 ; CHECK-NEXT:    fmul d1, d1, d2
    188 ; CHECK-NEXT:    fcmp d0, #0.0
    189 ; CHECK-NEXT:    fcsel d0, d0, d1, eq
    190 ; CHECK-NEXT:    ret
    191   %1 = tail call fast double @llvm.sqrt.f64(double %a)
    192   ret double %1
    193 }
    194 
    195 define <2 x double> @d2sqrt(<2 x double> %a) #0 {
    196 ; FAULT-LABEL: d2sqrt:
    197 ; FAULT:       // %bb.0:
    198 ; FAULT-NEXT:    fsqrt v0.2d, v0.2d
    199 ; FAULT-NEXT:    ret
    200 ;
    201 ; CHECK-LABEL: d2sqrt:
    202 ; CHECK:       // %bb.0:
    203 ; CHECK-NEXT:    frsqrte v1.2d, v0.2d
    204 ; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
    205 ; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
    206 ; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
    207 ; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
    208 ; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
    209 ; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
    210 ; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
    211 ; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
    212 ; CHECK-NEXT:    fmul v2.2d, v2.2d, v0.2d
    213 ; CHECK-NEXT:    fmul v2.2d, v1.2d, v2.2d
    214 ; CHECK-NEXT:    fcmeq v1.2d, v0.2d, #0.0
    215 ; CHECK-NEXT:    bsl v1.16b, v0.16b, v2.16b
    216 ; CHECK-NEXT:    mov v0.16b, v1.16b
    217 ; CHECK-NEXT:    ret
    218   %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
    219   ret <2 x double> %1
    220 }
    221 
    222 define <4 x double> @d4sqrt(<4 x double> %a) #0 {
    223 ; FAULT-LABEL: d4sqrt:
    224 ; FAULT:       // %bb.0:
    225 ; FAULT-NEXT:    fsqrt v0.2d, v0.2d
    226 ; FAULT-NEXT:    fsqrt v1.2d, v1.2d
    227 ; FAULT-NEXT:    ret
    228 ;
    229 ; CHECK-LABEL: d4sqrt:
    230 ; CHECK:       // %bb.0:
    231 ; CHECK-NEXT:    frsqrte v2.2d, v0.2d
    232 ; CHECK-NEXT:    fmul v3.2d, v2.2d, v2.2d
    233 ; CHECK-NEXT:    frsqrts v3.2d, v0.2d, v3.2d
    234 ; CHECK-NEXT:    fmul v2.2d, v2.2d, v3.2d
    235 ; CHECK-NEXT:    fmul v3.2d, v2.2d, v2.2d
    236 ; CHECK-NEXT:    frsqrts v3.2d, v0.2d, v3.2d
    237 ; CHECK-NEXT:    fmul v2.2d, v2.2d, v3.2d
    238 ; CHECK-NEXT:    fmul v3.2d, v2.2d, v2.2d
    239 ; CHECK-NEXT:    frsqrts v3.2d, v0.2d, v3.2d
    240 ; CHECK-NEXT:    fmul v3.2d, v3.2d, v0.2d
    241 ; CHECK-NEXT:    fmul v3.2d, v2.2d, v3.2d
    242 ; CHECK-NEXT:    fcmeq v2.2d, v0.2d, #0.0
    243 ; CHECK-NEXT:    bsl v2.16b, v0.16b, v3.16b
    244 ; CHECK-NEXT:    frsqrte v0.2d, v1.2d
    245 ; CHECK-NEXT:    fmul v3.2d, v0.2d, v0.2d
    246 ; CHECK-NEXT:    frsqrts v3.2d, v1.2d, v3.2d
    247 ; CHECK-NEXT:    fmul v0.2d, v0.2d, v3.2d
    248 ; CHECK-NEXT:    fmul v3.2d, v0.2d, v0.2d
    249 ; CHECK-NEXT:    frsqrts v3.2d, v1.2d, v3.2d
    250 ; CHECK-NEXT:    fmul v0.2d, v0.2d, v3.2d
    251 ; CHECK-NEXT:    fmul v3.2d, v0.2d, v0.2d
    252 ; CHECK-NEXT:    frsqrts v3.2d, v1.2d, v3.2d
    253 ; CHECK-NEXT:    fmul v3.2d, v3.2d, v1.2d
    254 ; CHECK-NEXT:    fmul v0.2d, v0.2d, v3.2d
    255 ; CHECK-NEXT:    fcmeq v3.2d, v1.2d, #0.0
    256 ; CHECK-NEXT:    bsl v3.16b, v1.16b, v0.16b
    257 ; CHECK-NEXT:    mov v0.16b, v2.16b
    258 ; CHECK-NEXT:    mov v1.16b, v3.16b
    259 ; CHECK-NEXT:    ret
    260   %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
    261   ret <4 x double> %1
    262 }
    263 
    264 define float @frsqrt(float %a) #0 {
    265 ; FAULT-LABEL: frsqrt:
    266 ; FAULT:       // %bb.0:
    267 ; FAULT-NEXT:    fsqrt s0, s0
    268 ; FAULT-NEXT:    fmov s1, #1.00000000
    269 ; FAULT-NEXT:    fdiv s0, s1, s0
    270 ; FAULT-NEXT:    ret
    271 ;
    272 ; CHECK-LABEL: frsqrt:
    273 ; CHECK:       // %bb.0:
    274 ; CHECK-NEXT:    frsqrte s1, s0
    275 ; CHECK-NEXT:    fmul s2, s1, s1
    276 ; CHECK-NEXT:    frsqrts s2, s0, s2
    277 ; CHECK-NEXT:    fmul s1, s1, s2
    278 ; CHECK-NEXT:    fmul s2, s1, s1
    279 ; CHECK-NEXT:    frsqrts s0, s0, s2
    280 ; CHECK-NEXT:    fmul s0, s1, s0
    281 ; CHECK-NEXT:    ret
    282   %1 = tail call fast float @llvm.sqrt.f32(float %a)
    283   %2 = fdiv fast float 1.000000e+00, %1
    284   ret float %2
    285 }
    286 
    287 define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
    288 ; FAULT-LABEL: f2rsqrt:
    289 ; FAULT:       // %bb.0:
    290 ; FAULT-NEXT:    fsqrt v0.2s, v0.2s
    291 ; FAULT-NEXT:    fmov v1.2s, #1.00000000
    292 ; FAULT-NEXT:    fdiv v0.2s, v1.2s, v0.2s
    293 ; FAULT-NEXT:    ret
    294 ;
    295 ; CHECK-LABEL: f2rsqrt:
    296 ; CHECK:       // %bb.0:
    297 ; CHECK-NEXT:    frsqrte v1.2s, v0.2s
    298 ; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
    299 ; CHECK-NEXT:    frsqrts v2.2s, v0.2s, v2.2s
    300 ; CHECK-NEXT:    fmul v1.2s, v1.2s, v2.2s
    301 ; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
    302 ; CHECK-NEXT:    frsqrts v0.2s, v0.2s, v2.2s
    303 ; CHECK-NEXT:    fmul v0.2s, v1.2s, v0.2s
    304 ; CHECK-NEXT:    ret
    305   %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
    306   %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
    307   ret <2 x float> %2
    308 }
    309 
    310 define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
    311 ; FAULT-LABEL: f4rsqrt:
    312 ; FAULT:       // %bb.0:
    313 ; FAULT-NEXT:    fsqrt v0.4s, v0.4s
    314 ; FAULT-NEXT:    fmov v1.4s, #1.00000000
    315 ; FAULT-NEXT:    fdiv v0.4s, v1.4s, v0.4s
    316 ; FAULT-NEXT:    ret
    317 ;
    318 ; CHECK-LABEL: f4rsqrt:
    319 ; CHECK:       // %bb.0:
    320 ; CHECK-NEXT:    frsqrte v1.4s, v0.4s
    321 ; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
    322 ; CHECK-NEXT:    frsqrts v2.4s, v0.4s, v2.4s
    323 ; CHECK-NEXT:    fmul v1.4s, v1.4s, v2.4s
    324 ; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
    325 ; CHECK-NEXT:    frsqrts v0.4s, v0.4s, v2.4s
    326 ; CHECK-NEXT:    fmul v0.4s, v1.4s, v0.4s
    327 ; CHECK-NEXT:    ret
    328   %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
    329   %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
    330   ret <4 x float> %2
    331 }
    332 
    333 define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
    334 ; FAULT-LABEL: f8rsqrt:
    335 ; FAULT:       // %bb.0:
    336 ; FAULT-NEXT:    fsqrt v1.4s, v1.4s
    337 ; FAULT-NEXT:    fsqrt v0.4s, v0.4s
    338 ; FAULT-NEXT:    fmov v2.4s, #1.00000000
    339 ; FAULT-NEXT:    fdiv v0.4s, v2.4s, v0.4s
    340 ; FAULT-NEXT:    fdiv v1.4s, v2.4s, v1.4s
    341 ; FAULT-NEXT:    ret
    342 ;
    343 ; CHECK-LABEL: f8rsqrt:
    344 ; CHECK:       // %bb.0:
    345 ; CHECK-NEXT:    frsqrte v2.4s, v0.4s
    346 ; CHECK-NEXT:    fmul v4.4s, v2.4s, v2.4s
    347 ; CHECK-NEXT:    frsqrte v3.4s, v1.4s
    348 ; CHECK-NEXT:    frsqrts v4.4s, v0.4s, v4.4s
    349 ; CHECK-NEXT:    fmul v2.4s, v2.4s, v4.4s
    350 ; CHECK-NEXT:    fmul v4.4s, v3.4s, v3.4s
    351 ; CHECK-NEXT:    frsqrts v4.4s, v1.4s, v4.4s
    352 ; CHECK-NEXT:    fmul v3.4s, v3.4s, v4.4s
    353 ; CHECK-NEXT:    fmul v4.4s, v2.4s, v2.4s
    354 ; CHECK-NEXT:    frsqrts v0.4s, v0.4s, v4.4s
    355 ; CHECK-NEXT:    fmul v4.4s, v3.4s, v3.4s
    356 ; CHECK-NEXT:    frsqrts v1.4s, v1.4s, v4.4s
    357 ; CHECK-NEXT:    fmul v0.4s, v2.4s, v0.4s
    358 ; CHECK-NEXT:    fmul v1.4s, v3.4s, v1.4s
    359 ; CHECK-NEXT:    ret
    360   %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
    361   %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
    362   ret <8 x float> %2
    363 }
    364 
    365 define double @drsqrt(double %a) #0 {
    366 ; FAULT-LABEL: drsqrt:
    367 ; FAULT:       // %bb.0:
    368 ; FAULT-NEXT:    fsqrt d0, d0
    369 ; FAULT-NEXT:    fmov d1, #1.00000000
    370 ; FAULT-NEXT:    fdiv d0, d1, d0
    371 ; FAULT-NEXT:    ret
    372 ;
    373 ; CHECK-LABEL: drsqrt:
    374 ; CHECK:       // %bb.0:
    375 ; CHECK-NEXT:    frsqrte d1, d0
    376 ; CHECK-NEXT:    fmul d2, d1, d1
    377 ; CHECK-NEXT:    frsqrts d2, d0, d2
    378 ; CHECK-NEXT:    fmul d1, d1, d2
    379 ; CHECK-NEXT:    fmul d2, d1, d1
    380 ; CHECK-NEXT:    frsqrts d2, d0, d2
    381 ; CHECK-NEXT:    fmul d1, d1, d2
    382 ; CHECK-NEXT:    fmul d2, d1, d1
    383 ; CHECK-NEXT:    frsqrts d0, d0, d2
    384 ; CHECK-NEXT:    fmul d0, d1, d0
    385 ; CHECK-NEXT:    ret
    386   %1 = tail call fast double @llvm.sqrt.f64(double %a)
    387   %2 = fdiv fast double 1.000000e+00, %1
    388   ret double %2
    389 }
    390 
    391 define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
    392 ; FAULT-LABEL: d2rsqrt:
    393 ; FAULT:       // %bb.0:
    394 ; FAULT-NEXT:    fsqrt v0.2d, v0.2d
    395 ; FAULT-NEXT:    fmov v1.2d, #1.00000000
    396 ; FAULT-NEXT:    fdiv v0.2d, v1.2d, v0.2d
    397 ; FAULT-NEXT:    ret
    398 ;
    399 ; CHECK-LABEL: d2rsqrt:
    400 ; CHECK:       // %bb.0:
    401 ; CHECK-NEXT:    frsqrte v1.2d, v0.2d
    402 ; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
    403 ; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
    404 ; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
    405 ; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
    406 ; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
    407 ; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
    408 ; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
    409 ; CHECK-NEXT:    frsqrts v0.2d, v0.2d, v2.2d
    410 ; CHECK-NEXT:    fmul v0.2d, v1.2d, v0.2d
    411 ; CHECK-NEXT:    ret
    412   %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
    413   %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1
    414   ret <2 x double> %2
    415 }
    416 
    417 define <4 x double> @d4rsqrt(<4 x double> %a) #0 {
    418 ; FAULT-LABEL: d4rsqrt:
    419 ; FAULT:       // %bb.0:
    420 ; FAULT-NEXT:    fsqrt v1.2d, v1.2d
    421 ; FAULT-NEXT:    fsqrt v0.2d, v0.2d
    422 ; FAULT-NEXT:    fmov v2.2d, #1.00000000
    423 ; FAULT-NEXT:    fdiv v0.2d, v2.2d, v0.2d
    424 ; FAULT-NEXT:    fdiv v1.2d, v2.2d, v1.2d
    425 ; FAULT-NEXT:    ret
    426 ;
    427 ; CHECK-LABEL: d4rsqrt:
    428 ; CHECK:       // %bb.0:
    429 ; CHECK-NEXT:    frsqrte v2.2d, v0.2d
    430 ; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
    431 ; CHECK-NEXT:    frsqrte v3.2d, v1.2d
    432 ; CHECK-NEXT:    frsqrts v4.2d, v0.2d, v4.2d
    433 ; CHECK-NEXT:    fmul v2.2d, v2.2d, v4.2d
    434 ; CHECK-NEXT:    fmul v4.2d, v3.2d, v3.2d
    435 ; CHECK-NEXT:    frsqrts v4.2d, v1.2d, v4.2d
    436 ; CHECK-NEXT:    fmul v3.2d, v3.2d, v4.2d
    437 ; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
    438 ; CHECK-NEXT:    frsqrts v4.2d, v0.2d, v4.2d
    439 ; CHECK-NEXT:    fmul v2.2d, v2.2d, v4.2d
    440 ; CHECK-NEXT:    fmul v4.2d, v3.2d, v3.2d
    441 ; CHECK-NEXT:    frsqrts v4.2d, v1.2d, v4.2d
    442 ; CHECK-NEXT:    fmul v3.2d, v3.2d, v4.2d
    443 ; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
    444 ; CHECK-NEXT:    frsqrts v0.2d, v0.2d, v4.2d
    445 ; CHECK-NEXT:    fmul v4.2d, v3.2d, v3.2d
    446 ; CHECK-NEXT:    frsqrts v1.2d, v1.2d, v4.2d
    447 ; CHECK-NEXT:    fmul v0.2d, v2.2d, v0.2d
    448 ; CHECK-NEXT:    fmul v1.2d, v3.2d, v1.2d
    449 ; CHECK-NEXT:    ret
    450   %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
    451   %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1
    452   ret <4 x double> %2
    453 }
    454 
    455 attributes #0 = { "unsafe-fp-math"="true" }
    456 attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" }
    457 
    458