Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \
      2 ; RUN:   | FileCheck %s
      3 
      4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
      5 
      6 declare float @llvm.sqrt.f32(float)
      7 declare double @llvm.sqrt.f64(double)
      8 
      9 ; -- reciprocal sqrt --
     10 
     11 ; CHECK-LABEL test_rsqrt32
     12 define float @test_rsqrt32(float %a) #0 {
     13 ; CHECK: rsqrt.approx.f32
     14   %val = tail call float @llvm.sqrt.f32(float %a)
     15   %ret = fdiv float 1.0, %val
     16   ret float %ret
     17 }
     18 
     19 ; CHECK-LABEL test_rsqrt_ftz
     20 define float @test_rsqrt_ftz(float %a) #0 #1 {
     21 ; CHECK: rsqrt.approx.ftz.f32
     22   %val = tail call float @llvm.sqrt.f32(float %a)
     23   %ret = fdiv float 1.0, %val
     24   ret float %ret
     25 }
     26 
     27 ; CHECK-LABEL test_rsqrt64
     28 define double @test_rsqrt64(double %a) #0 {
     29 ; CHECK: rsqrt.approx.f64
     30   %val = tail call double @llvm.sqrt.f64(double %a)
     31   %ret = fdiv double 1.0, %val
     32   ret double %ret
     33 }
     34 
     35 ; CHECK-LABEL test_rsqrt64_ftz
     36 define double @test_rsqrt64_ftz(double %a) #0 #1 {
     37 ; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
     38 ; CHECK: rsqrt.approx.f64
     39   %val = tail call double @llvm.sqrt.f64(double %a)
     40   %ret = fdiv double 1.0, %val
     41   ret double %ret
     42 }
     43 
     44 ; -- sqrt --
     45 
     46 ; CHECK-LABEL test_sqrt32
     47 define float @test_sqrt32(float %a) #0 {
     48 ; CHECK: sqrt.approx.f32
     49   %ret = tail call float @llvm.sqrt.f32(float %a)
     50   ret float %ret
     51 }
     52 
     53 ; CHECK-LABEL test_sqrt_ftz
     54 define float @test_sqrt_ftz(float %a) #0 #1 {
     55 ; CHECK: sqrt.approx.ftz.f32
     56   %ret = tail call float @llvm.sqrt.f32(float %a)
     57   ret float %ret
     58 }
     59 
     60 ; CHECK-LABEL test_sqrt64
     61 define double @test_sqrt64(double %a) #0 {
     62 ; There's no sqrt.approx.f64 instruction; we emit
     63 ; reciprocal(rsqrt.approx.f64(x)).  There's no non-ftz approximate reciprocal,
     64 ; so we just use the ftz version.
     65 ; CHECK: rsqrt.approx.f64
     66 ; CHECK: rcp.approx.ftz.f64
     67   %ret = tail call double @llvm.sqrt.f64(double %a)
     68   ret double %ret
     69 }
     70 
     71 ; CHECK-LABEL test_sqrt64_ftz
     72 define double @test_sqrt64_ftz(double %a) #0 #1 {
     73 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
     74 ; CHECK: rsqrt.approx.f64
     75 ; CHECK: rcp.approx.ftz.f64
     76   %ret = tail call double @llvm.sqrt.f64(double %a)
     77   ret double %ret
     78 }
     79 
     80 ; -- refined sqrt and rsqrt --
     81 ;
     82 ; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed
     83 ; by some math.
     84 
     85 ; CHECK-LABEL: test_rsqrt32_refined
     86 define float @test_rsqrt32_refined(float %a) #0 #2 {
     87 ; CHECK: rsqrt.approx.f32
     88   %val = tail call float @llvm.sqrt.f32(float %a)
     89   %ret = fdiv float 1.0, %val
     90   ret float %ret
     91 }
     92 
     93 ; CHECK-LABEL: test_sqrt32_refined
     94 define float @test_sqrt32_refined(float %a) #0 #2 {
     95 ; CHECK: rsqrt.approx.f32
     96   %ret = tail call float @llvm.sqrt.f32(float %a)
     97   ret float %ret
     98 }
     99 
    100 ; CHECK-LABEL: test_rsqrt64_refined
    101 define double @test_rsqrt64_refined(double %a) #0 #2 {
    102 ; CHECK: rsqrt.approx.f64
    103   %val = tail call double @llvm.sqrt.f64(double %a)
    104   %ret = fdiv double 1.0, %val
    105   ret double %ret
    106 }
    107 
    108 ; CHECK-LABEL: test_sqrt64_refined
    109 define double @test_sqrt64_refined(double %a) #0 #2 {
    110 ; CHECK: rsqrt.approx.f64
    111   %ret = tail call double @llvm.sqrt.f64(double %a)
    112   ret double %ret
    113 }
    114 
    115 ; -- refined sqrt and rsqrt with ftz enabled --
    116 
    117 ; CHECK-LABEL: test_rsqrt32_refined_ftz
    118 define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 {
    119 ; CHECK: rsqrt.approx.ftz.f32
    120   %val = tail call float @llvm.sqrt.f32(float %a)
    121   %ret = fdiv float 1.0, %val
    122   ret float %ret
    123 }
    124 
    125 ; CHECK-LABEL: test_sqrt32_refined_ftz
    126 define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
    127 ; CHECK: rsqrt.approx.ftz.f32
    128   %ret = tail call float @llvm.sqrt.f32(float %a)
    129   ret float %ret
    130 }
    131 
    132 ; CHECK-LABEL: test_rsqrt64_refined_ftz
    133 define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 {
    134 ; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version.
    135 ; CHECK: rsqrt.approx.f64
    136   %val = tail call double @llvm.sqrt.f64(double %a)
    137   %ret = fdiv double 1.0, %val
    138   ret double %ret
    139 }
    140 
    141 ; CHECK-LABEL: test_sqrt64_refined_ftz
    142 define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
    143 ; CHECK: rsqrt.approx.f64
    144   %ret = tail call double @llvm.sqrt.f64(double %a)
    145   ret double %ret
    146 }
    147 
    148 attributes #0 = { "unsafe-fp-math" = "true" }
    149 attributes #1 = { "nvptx-f32ftz" = "true" }
    150 attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" }
    151