Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
      2 
      3 declare float @llvm.sqrt.f32(float)
      4 declare double @llvm.sqrt.f64(double)
      5 
      6 ; CHECK-LABEL: sqrt_div(
      7 ; CHECK: sqrt.rn.f32
      8 ; CHECK: div.rn.f32
      9 define float @sqrt_div(float %a, float %b) {
     10   %t1 = tail call float @llvm.sqrt.f32(float %a)
     11   %t2 = fdiv float %t1, %b
     12   ret float %t2
     13 }
     14 
     15 ; CHECK-LABEL: sqrt_div_fast(
     16 ; CHECK: sqrt.approx.f32
     17 ; CHECK: div.approx.f32
     18 define float @sqrt_div_fast(float %a, float %b) #0 {
     19   %t1 = tail call float @llvm.sqrt.f32(float %a)
     20   %t2 = fdiv float %t1, %b
     21   ret float %t2
     22 }
     23 
     24 ; CHECK-LABEL: sqrt_div_ftz(
     25 ; CHECK: sqrt.rn.ftz.f32
     26 ; CHECK: div.rn.ftz.f32
     27 define float @sqrt_div_ftz(float %a, float %b) #1 {
     28   %t1 = tail call float @llvm.sqrt.f32(float %a)
     29   %t2 = fdiv float %t1, %b
     30   ret float %t2
     31 }
     32 
     33 ; CHECK-LABEL: sqrt_div_fast_ftz(
     34 ; CHECK: sqrt.approx.ftz.f32
     35 ; CHECK: div.approx.ftz.f32
     36 define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
     37   %t1 = tail call float @llvm.sqrt.f32(float %a)
     38   %t2 = fdiv float %t1, %b
     39   ret float %t2
     40 }
     41 
     42 ; There are no fast-math or ftz versions of sqrt and div for f64.  We use
     43 ; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide.
     44 
     45 ; CHECK-LABEL: sqrt_div_fast_ftz_f64(
     46 ; CHECK: rsqrt.approx.f64
     47 ; CHECK: rcp.approx.ftz.f64
     48 ; CHECK: div.rn.f64
     49 define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
     50   %t1 = tail call double @llvm.sqrt.f64(double %a)
     51   %t2 = fdiv double %t1, %b
     52   ret double %t2
     53 }
     54 
     55 ; CHECK-LABEL: rsqrt(
     56 ; CHECK-NOT: rsqrt.approx
     57 ; CHECK: sqrt.rn.f32
     58 ; CHECK-NOT: rsqrt.approx
     59 define float @rsqrt(float %a) {
     60   %b = tail call float @llvm.sqrt.f32(float %a)
     61   %ret = fdiv float 1.0, %b
     62   ret float %ret
     63 }
     64 
     65 ; CHECK-LABEL: rsqrt_fast(
     66 ; CHECK-NOT: div.
     67 ; CHECK-NOT: sqrt.
     68 ; CHECK: rsqrt.approx.f32
     69 ; CHECK-NOT: div.
     70 ; CHECK-NOT: sqrt.
     71 define float @rsqrt_fast(float %a) #0 {
     72   %b = tail call float @llvm.sqrt.f32(float %a)
     73   %ret = fdiv float 1.0, %b
     74   ret float %ret
     75 }
     76 
     77 ; CHECK-LABEL: rsqrt_fast_ftz(
     78 ; CHECK-NOT: div.
     79 ; CHECK-NOT: sqrt.
     80 ; CHECK: rsqrt.approx.ftz.f32
     81 ; CHECK-NOT: div.
     82 ; CHECK-NOT: sqrt.
     83 define float @rsqrt_fast_ftz(float %a) #0 #1 {
     84   %b = tail call float @llvm.sqrt.f32(float %a)
     85   %ret = fdiv float 1.0, %b
     86   ret float %ret
     87 }
     88 
     89 ; CHECK-LABEL: fadd
     90 ; CHECK: add.rn.f32
     91 define float @fadd(float %a, float %b) {
     92   %t1 = fadd float %a, %b
     93   ret float %t1
     94 }
     95 
     96 ; CHECK-LABEL: fadd_ftz
     97 ; CHECK: add.rn.ftz.f32
     98 define float @fadd_ftz(float %a, float %b) #1 {
     99   %t1 = fadd float %a, %b
    100   ret float %t1
    101 }
    102 
    103 declare float @llvm.sin.f32(float)
    104 declare float @llvm.cos.f32(float)
    105 
    106 ; CHECK-LABEL: fsin_approx
    107 ; CHECK:       sin.approx.f32
    108 define float @fsin_approx(float %a) #0 {
    109   %r = tail call float @llvm.sin.f32(float %a)
    110   ret float %r
    111 }
    112 
    113 ; CHECK-LABEL: fcos_approx
    114 ; CHECK:       cos.approx.f32
    115 define float @fcos_approx(float %a) #0 {
    116   %r = tail call float @llvm.cos.f32(float %a)
    117   ret float %r
    118 }
    119 
    120 ; CHECK-LABEL: repeated_div_recip_allowed
    121 define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) {
    122 ; CHECK: rcp.rn.f32
    123 ; CHECK: mul.rn.f32
    124 ; CHECK: mul.rn.f32
    125   %x = fdiv arcp float %a, %divisor
    126   %y = fdiv arcp float %b, %divisor
    127   %z = select i1 %pred, float %x, float %y
    128   ret float %z
    129 }
    130 
    131 ; CHECK-LABEL: repeated_div_recip_allowed_ftz
    132 define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
    133 ; CHECK: rcp.rn.ftz.f32
    134 ; CHECK: mul.rn.ftz.f32
    135 ; CHECK: mul.rn.ftz.f32
    136   %x = fdiv arcp float %a, %divisor
    137   %y = fdiv arcp float %b, %divisor
    138   %z = select i1 %pred, float %x, float %y
    139   ret float %z
    140 }
    141 
    142 ; CHECK-LABEL: repeated_div_fast
    143 define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
    144 ; CHECK: rcp.approx.f32
    145 ; CHECK: mul.f32
    146 ; CHECK: mul.f32
    147   %x = fdiv float %a, %divisor
    148   %y = fdiv float %b, %divisor
    149   %z = select i1 %pred, float %x, float %y
    150   ret float %z
    151 }
    152 
    153 ; CHECK-LABEL: repeated_div_fast_ftz
    154 define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
    155 ; CHECK: rcp.approx.ftz.f32
    156 ; CHECK: mul.ftz.f32
    157 ; CHECK: mul.ftz.f32
    158   %x = fdiv float %a, %divisor
    159   %y = fdiv float %b, %divisor
    160   %z = select i1 %pred, float %x, float %y
    161   ret float %z
    162 }
    163 
    164 attributes #0 = { "unsafe-fp-math" = "true" }
    165 attributes #1 = { "nvptx-f32ftz" = "true" }
    166