1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s 2 3 declare float @llvm.sqrt.f32(float) 4 declare double @llvm.sqrt.f64(double) 5 6 ; CHECK-LABEL: sqrt_div( 7 ; CHECK: sqrt.rn.f32 8 ; CHECK: div.rn.f32 9 define float @sqrt_div(float %a, float %b) { 10 %t1 = tail call float @llvm.sqrt.f32(float %a) 11 %t2 = fdiv float %t1, %b 12 ret float %t2 13 } 14 15 ; CHECK-LABEL: sqrt_div_fast( 16 ; CHECK: sqrt.approx.f32 17 ; CHECK: div.approx.f32 18 define float @sqrt_div_fast(float %a, float %b) #0 { 19 %t1 = tail call float @llvm.sqrt.f32(float %a) 20 %t2 = fdiv float %t1, %b 21 ret float %t2 22 } 23 24 ; CHECK-LABEL: sqrt_div_ftz( 25 ; CHECK: sqrt.rn.ftz.f32 26 ; CHECK: div.rn.ftz.f32 27 define float @sqrt_div_ftz(float %a, float %b) #1 { 28 %t1 = tail call float @llvm.sqrt.f32(float %a) 29 %t2 = fdiv float %t1, %b 30 ret float %t2 31 } 32 33 ; CHECK-LABEL: sqrt_div_fast_ftz( 34 ; CHECK: sqrt.approx.ftz.f32 35 ; CHECK: div.approx.ftz.f32 36 define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 { 37 %t1 = tail call float @llvm.sqrt.f32(float %a) 38 %t2 = fdiv float %t1, %b 39 ret float %t2 40 } 41 42 ; There are no fast-math or ftz versions of sqrt and div for f64. We use 43 ; reciprocal(rsqrt(x)) for sqrt(x), and emit a vanilla divide. 44 45 ; CHECK-LABEL: sqrt_div_fast_ftz_f64( 46 ; CHECK: rsqrt.approx.f64 47 ; CHECK: rcp.approx.ftz.f64 48 ; CHECK: div.rn.f64 49 define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 { 50 %t1 = tail call double @llvm.sqrt.f64(double %a) 51 %t2 = fdiv double %t1, %b 52 ret double %t2 53 } 54 55 ; CHECK-LABEL: rsqrt( 56 ; CHECK-NOT: rsqrt.approx 57 ; CHECK: sqrt.rn.f32 58 ; CHECK-NOT: rsqrt.approx 59 define float @rsqrt(float %a) { 60 %b = tail call float @llvm.sqrt.f32(float %a) 61 %ret = fdiv float 1.0, %b 62 ret float %ret 63 } 64 65 ; CHECK-LABEL: rsqrt_fast( 66 ; CHECK-NOT: div. 67 ; CHECK-NOT: sqrt. 68 ; CHECK: rsqrt.approx.f32 69 ; CHECK-NOT: div. 70 ; CHECK-NOT: sqrt. 71 define float @rsqrt_fast(float %a) #0 { 72 %b = tail call float @llvm.sqrt.f32(float %a) 73 %ret = fdiv float 1.0, %b 74 ret float %ret 75 } 76 77 ; CHECK-LABEL: rsqrt_fast_ftz( 78 ; CHECK-NOT: div. 79 ; CHECK-NOT: sqrt. 80 ; CHECK: rsqrt.approx.ftz.f32 81 ; CHECK-NOT: div. 82 ; CHECK-NOT: sqrt. 83 define float @rsqrt_fast_ftz(float %a) #0 #1 { 84 %b = tail call float @llvm.sqrt.f32(float %a) 85 %ret = fdiv float 1.0, %b 86 ret float %ret 87 } 88 89 ; CHECK-LABEL: fadd 90 ; CHECK: add.rn.f32 91 define float @fadd(float %a, float %b) { 92 %t1 = fadd float %a, %b 93 ret float %t1 94 } 95 96 ; CHECK-LABEL: fadd_ftz 97 ; CHECK: add.rn.ftz.f32 98 define float @fadd_ftz(float %a, float %b) #1 { 99 %t1 = fadd float %a, %b 100 ret float %t1 101 } 102 103 declare float @llvm.sin.f32(float) 104 declare float @llvm.cos.f32(float) 105 106 ; CHECK-LABEL: fsin_approx 107 ; CHECK: sin.approx.f32 108 define float @fsin_approx(float %a) #0 { 109 %r = tail call float @llvm.sin.f32(float %a) 110 ret float %r 111 } 112 113 ; CHECK-LABEL: fcos_approx 114 ; CHECK: cos.approx.f32 115 define float @fcos_approx(float %a) #0 { 116 %r = tail call float @llvm.cos.f32(float %a) 117 ret float %r 118 } 119 120 ; CHECK-LABEL: repeated_div_recip_allowed 121 define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) { 122 ; CHECK: rcp.rn.f32 123 ; CHECK: mul.rn.f32 124 ; CHECK: mul.rn.f32 125 %x = fdiv arcp float %a, %divisor 126 %y = fdiv arcp float %b, %divisor 127 %z = select i1 %pred, float %x, float %y 128 ret float %z 129 } 130 131 ; CHECK-LABEL: repeated_div_recip_allowed_ftz 132 define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 { 133 ; CHECK: rcp.rn.ftz.f32 134 ; CHECK: mul.rn.ftz.f32 135 ; CHECK: mul.rn.ftz.f32 136 %x = fdiv arcp float %a, %divisor 137 %y = fdiv arcp float %b, %divisor 138 %z = select i1 %pred, float %x, float %y 139 ret float %z 140 } 141 142 ; CHECK-LABEL: repeated_div_fast 143 define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 { 144 ; CHECK: rcp.approx.f32 145 ; CHECK: mul.f32 146 ; CHECK: mul.f32 147 %x = fdiv float %a, %divisor 148 %y = fdiv float %b, %divisor 149 %z = select i1 %pred, float %x, float %y 150 ret float %z 151 } 152 153 ; CHECK-LABEL: repeated_div_fast_ftz 154 define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 { 155 ; CHECK: rcp.approx.ftz.f32 156 ; CHECK: mul.ftz.f32 157 ; CHECK: mul.ftz.f32 158 %x = fdiv float %a, %divisor 159 %y = fdiv float %b, %divisor 160 %z = select i1 %pred, float %x, float %y 161 ret float %z 162 } 163 164 attributes #0 = { "unsafe-fp-math" = "true" } 165 attributes #1 = { "nvptx-f32ftz" = "true" } 166