1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -nvptx-prec-divf32=0 -nvptx-prec-sqrtf32=0 \ 2 ; RUN: | FileCheck %s 3 4 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" 5 6 declare float @llvm.sqrt.f32(float) 7 declare double @llvm.sqrt.f64(double) 8 9 ; -- reciprocal sqrt -- 10 11 ; CHECK-LABEL test_rsqrt32 12 define float @test_rsqrt32(float %a) #0 { 13 ; CHECK: rsqrt.approx.f32 14 %val = tail call float @llvm.sqrt.f32(float %a) 15 %ret = fdiv float 1.0, %val 16 ret float %ret 17 } 18 19 ; CHECK-LABEL test_rsqrt_ftz 20 define float @test_rsqrt_ftz(float %a) #0 #1 { 21 ; CHECK: rsqrt.approx.ftz.f32 22 %val = tail call float @llvm.sqrt.f32(float %a) 23 %ret = fdiv float 1.0, %val 24 ret float %ret 25 } 26 27 ; CHECK-LABEL test_rsqrt64 28 define double @test_rsqrt64(double %a) #0 { 29 ; CHECK: rsqrt.approx.f64 30 %val = tail call double @llvm.sqrt.f64(double %a) 31 %ret = fdiv double 1.0, %val 32 ret double %ret 33 } 34 35 ; CHECK-LABEL test_rsqrt64_ftz 36 define double @test_rsqrt64_ftz(double %a) #0 #1 { 37 ; There's no rsqrt.approx.ftz.f64 instruction; we just use the non-ftz version. 38 ; CHECK: rsqrt.approx.f64 39 %val = tail call double @llvm.sqrt.f64(double %a) 40 %ret = fdiv double 1.0, %val 41 ret double %ret 42 } 43 44 ; -- sqrt -- 45 46 ; CHECK-LABEL test_sqrt32 47 define float @test_sqrt32(float %a) #0 { 48 ; CHECK: sqrt.approx.f32 49 %ret = tail call float @llvm.sqrt.f32(float %a) 50 ret float %ret 51 } 52 53 ; CHECK-LABEL test_sqrt_ftz 54 define float @test_sqrt_ftz(float %a) #0 #1 { 55 ; CHECK: sqrt.approx.ftz.f32 56 %ret = tail call float @llvm.sqrt.f32(float %a) 57 ret float %ret 58 } 59 60 ; CHECK-LABEL test_sqrt64 61 define double @test_sqrt64(double %a) #0 { 62 ; There's no sqrt.approx.f64 instruction; we emit 63 ; reciprocal(rsqrt.approx.f64(x)). There's no non-ftz approximate reciprocal, 64 ; so we just use the ftz version. 65 ; CHECK: rsqrt.approx.f64 66 ; CHECK: rcp.approx.ftz.f64 67 %ret = tail call double @llvm.sqrt.f64(double %a) 68 ret double %ret 69 } 70 71 ; CHECK-LABEL test_sqrt64_ftz 72 define double @test_sqrt64_ftz(double %a) #0 #1 { 73 ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version. 74 ; CHECK: rsqrt.approx.f64 75 ; CHECK: rcp.approx.ftz.f64 76 %ret = tail call double @llvm.sqrt.f64(double %a) 77 ret double %ret 78 } 79 80 ; -- refined sqrt and rsqrt -- 81 ; 82 ; The sqrt and rsqrt refinement algorithms both emit an rsqrt.approx, followed 83 ; by some math. 84 85 ; CHECK-LABEL: test_rsqrt32_refined 86 define float @test_rsqrt32_refined(float %a) #0 #2 { 87 ; CHECK: rsqrt.approx.f32 88 %val = tail call float @llvm.sqrt.f32(float %a) 89 %ret = fdiv float 1.0, %val 90 ret float %ret 91 } 92 93 ; CHECK-LABEL: test_sqrt32_refined 94 define float @test_sqrt32_refined(float %a) #0 #2 { 95 ; CHECK: rsqrt.approx.f32 96 %ret = tail call float @llvm.sqrt.f32(float %a) 97 ret float %ret 98 } 99 100 ; CHECK-LABEL: test_rsqrt64_refined 101 define double @test_rsqrt64_refined(double %a) #0 #2 { 102 ; CHECK: rsqrt.approx.f64 103 %val = tail call double @llvm.sqrt.f64(double %a) 104 %ret = fdiv double 1.0, %val 105 ret double %ret 106 } 107 108 ; CHECK-LABEL: test_sqrt64_refined 109 define double @test_sqrt64_refined(double %a) #0 #2 { 110 ; CHECK: rsqrt.approx.f64 111 %ret = tail call double @llvm.sqrt.f64(double %a) 112 ret double %ret 113 } 114 115 ; -- refined sqrt and rsqrt with ftz enabled -- 116 117 ; CHECK-LABEL: test_rsqrt32_refined_ftz 118 define float @test_rsqrt32_refined_ftz(float %a) #0 #1 #2 { 119 ; CHECK: rsqrt.approx.ftz.f32 120 %val = tail call float @llvm.sqrt.f32(float %a) 121 %ret = fdiv float 1.0, %val 122 ret float %ret 123 } 124 125 ; CHECK-LABEL: test_sqrt32_refined_ftz 126 define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 { 127 ; CHECK: rsqrt.approx.ftz.f32 128 %ret = tail call float @llvm.sqrt.f32(float %a) 129 ret float %ret 130 } 131 132 ; CHECK-LABEL: test_rsqrt64_refined_ftz 133 define double @test_rsqrt64_refined_ftz(double %a) #0 #1 #2 { 134 ; There's no rsqrt.approx.ftz.f64, so we just use the non-ftz version. 135 ; CHECK: rsqrt.approx.f64 136 %val = tail call double @llvm.sqrt.f64(double %a) 137 %ret = fdiv double 1.0, %val 138 ret double %ret 139 } 140 141 ; CHECK-LABEL: test_sqrt64_refined_ftz 142 define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 { 143 ; CHECK: rsqrt.approx.f64 144 %ret = tail call double @llvm.sqrt.f64(double %a) 145 ret double %ret 146 } 147 148 attributes #0 = { "unsafe-fp-math" = "true" } 149 attributes #1 = { "nvptx-f32ftz" = "true" } 150 attributes #2 = { "reciprocal-estimates" = "rsqrtf:1,rsqrtd:1,sqrtf:1,sqrtd:1" } 151