1 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s 2 ; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s 3 ; Make sure this doesn't crash with no triple 4 5 ; NOOP-LABEL: @noop_fdiv_fpmath( 6 ; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0 7 define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 { 8 %md.25ulp = fdiv float %a, %b, !fpmath !0 9 store volatile float %md.25ulp, float addrspace(1)* %out 10 ret void 11 } 12 13 ; CHECK-LABEL: @fdiv_fpmath( 14 ; CHECK: %no.md = fdiv float %a, %b{{$}} 15 ; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 16 ; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 17 ; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 18 ; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3 19 ; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 20 ; CHECK: arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 21 define amdgpu_kernel void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 { 22 %no.md = fdiv float %a, %b 23 store volatile float %no.md, float addrspace(1)* %out 24 25 %md.half.ulp = fdiv float %a, %b, !fpmath !1 26 store volatile float %md.half.ulp, float addrspace(1)* %out 27 28 %md.1ulp = fdiv float %a, %b, !fpmath !2 29 store volatile float %md.1ulp, float addrspace(1)* %out 30 31 %md.25ulp = fdiv float %a, %b, !fpmath !0 32 store volatile float %md.25ulp, float addrspace(1)* %out 33 34 %md.3ulp = fdiv float %a, %b, !fpmath !3 35 store volatile float %md.3ulp, float addrspace(1)* %out 36 37 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 38 store volatile float %fast.md.25ulp, float addrspace(1)* %out 39 40 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 41 store volatile float %arcp.md.25ulp, float addrspace(1)* %out 42 43 ret void 44 } 45 46 ; CHECK-LABEL: @rcp_fdiv_fpmath( 47 ; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}} 48 ; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0 49 ; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1 50 ; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}} 51 ; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0 52 ; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}} 53 ; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0 54 define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 { 55 %no.md = fdiv float 1.0, %x 56 store volatile float %no.md, float addrspace(1)* %out 57 58 %md.25ulp = fdiv float 1.0, %x, !fpmath !0 59 store volatile float %md.25ulp, float addrspace(1)* %out 60 61 %md.half.ulp = fdiv float 1.0, %x, !fpmath !1 62 store volatile float %md.half.ulp, float addrspace(1)* %out 63 64 %arcp.no.md = fdiv arcp float 1.0, %x 65 store volatile float %arcp.no.md, float addrspace(1)* %out 66 67 %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0 68 store volatile float %arcp.25ulp, float addrspace(1)* %out 69 70 %fast.no.md = fdiv fast float 1.0, %x 71 store volatile float %fast.no.md, float addrspace(1)* %out 72 73 %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0 74 store volatile float %fast.25ulp, float addrspace(1)* %out 75 76 ret void 77 } 78 79 ; CHECK-LABEL: @fdiv_fpmath_vector( 80 ; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}} 81 ; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 82 ; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 83 84 ; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0 85 ; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0 86 ; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0 87 ; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0 88 ; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1 89 ; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1 90 ; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0 91 ; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1 92 define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 { 93 %no.md = fdiv <2 x float> %a, %b 94 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out 95 96 %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 97 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out 98 99 %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 100 store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out 101 102 %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0 103 store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out 104 105 ret void 106 } 107 108 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector( 109 ; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}} 110 ; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1 111 ; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}} 112 ; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}} 113 ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0 114 ; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0 115 ; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out 116 define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { 117 %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x 118 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out 119 120 %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1 121 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out 122 123 %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x 124 store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out 125 126 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x 127 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out 128 129 %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0 130 store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out 131 132 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0 133 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out 134 135 ret void 136 } 137 138 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat( 139 ; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x 140 ; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x 141 ; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}} 142 ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0 143 ; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0 144 ; CHECK: store volatile <2 x float> %fast.25ulp 145 define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { 146 %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x 147 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out 148 149 %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x 150 store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out 151 152 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x 153 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out 154 155 %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0 156 store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out 157 158 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0 159 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out 160 161 ret void 162 } 163 164 ; FIXME: Should be able to get fdiv for 1.0 component 165 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant( 166 ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0 167 ; CHECK: store volatile <2 x float> %arcp.25ulp 168 169 ; CHECK: %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0 170 ; CHECK: store volatile <2 x float> %fast.25ulp 171 define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 { 172 %x.insert = insertelement <2 x float> %x, float 1.0, i32 0 173 174 %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0 175 store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out 176 177 %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0 178 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out 179 180 ret void 181 } 182 183 ; CHECK-LABEL: @fdiv_fpmath_f32_denormals( 184 ; CHECK: %no.md = fdiv float %a, %b{{$}} 185 ; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 186 ; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 187 ; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0 188 ; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3 189 ; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 190 ; CHECK: %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 191 define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 { 192 %no.md = fdiv float %a, %b 193 store volatile float %no.md, float addrspace(1)* %out 194 195 %md.half.ulp = fdiv float %a, %b, !fpmath !1 196 store volatile float %md.half.ulp, float addrspace(1)* %out 197 198 %md.1ulp = fdiv float %a, %b, !fpmath !2 199 store volatile float %md.1ulp, float addrspace(1)* %out 200 201 %md.25ulp = fdiv float %a, %b, !fpmath !0 202 store volatile float %md.25ulp, float addrspace(1)* %out 203 204 %md.3ulp = fdiv float %a, %b, !fpmath !3 205 store volatile float %md.3ulp, float addrspace(1)* %out 206 207 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 208 store volatile float %fast.md.25ulp, float addrspace(1)* %out 209 210 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 211 store volatile float %arcp.md.25ulp, float addrspace(1)* %out 212 213 ret void 214 } 215 216 attributes #0 = { nounwind optnone noinline } 217 attributes #1 = { nounwind } 218 attributes #2 = { nounwind "target-features"="+fp32-denormals" } 219 220 ; CHECK: !0 = !{float 2.500000e+00} 221 ; CHECK: !1 = !{float 5.000000e-01} 222 ; CHECK: !2 = !{float 1.000000e+00} 223 ; CHECK: !3 = !{float 3.000000e+00} 224 225 !0 = !{float 2.500000e+00} 226 !1 = !{float 5.000000e-01} 227 !2 = !{float 1.000000e+00} 228 !3 = !{float 3.000000e+00} 229