1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GCN-FLUSH %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,GCN-FLUSH %s 3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s 4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH %s 5 6 ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32: 7 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} 8 ; GFX9-DENORM: v_max_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 9 define amdgpu_kernel void @test_no_fold_canonicalize_loaded_value_f32(float addrspace(1)* %arg) { 10 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 11 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 12 %v = load float, float addrspace(1)* %gep, align 4 13 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 14 store float %canonicalized, float addrspace(1)* %gep, align 4 15 ret void 16 } 17 18 ; GCN-LABEL: {{^}}test_fold_canonicalize_fmul_value_f32: 19 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}} 20 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 21 ; GCN-NOT: 1.0 22 define amdgpu_kernel void @test_fold_canonicalize_fmul_value_f32(float addrspace(1)* %arg) { 23 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 24 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 25 %load = load float, float addrspace(1)* %gep, align 4 26 %v = fmul float %load, 15.0 27 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 28 store float %canonicalized, float addrspace(1)* %gep, align 4 29 ret void 30 } 31 32 ; GCN-LABEL: {{^}}test_fold_canonicalize_sub_value_f32: 33 ; GCN: v_sub_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}} 34 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 35 ; GCN-NOT: 1.0 36 define amdgpu_kernel void @test_fold_canonicalize_sub_value_f32(float addrspace(1)* %arg) { 37 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 38 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 39 %load = load float, float addrspace(1)* %gep, align 4 40 %v = fsub float 15.0, %load 41 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 42 store float %canonicalized, float addrspace(1)* %gep, align 4 43 ret void 44 } 45 46 ; GCN-LABEL: {{^}}test_fold_canonicalize_add_value_f32: 47 ; GCN: v_add_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}} 48 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 49 ; GCN-NOT: 1.0 50 define amdgpu_kernel void @test_fold_canonicalize_add_value_f32(float addrspace(1)* %arg) { 51 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 52 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 53 %load = load float, float addrspace(1)* %gep, align 4 54 %v = fadd float %load, 15.0 55 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 56 store float %canonicalized, float addrspace(1)* %gep, align 4 57 ret void 58 } 59 60 ; GCN-LABEL: {{^}}test_fold_canonicalize_sqrt_value_f32: 61 ; GCN: v_sqrt_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 62 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 63 ; GCN-NOT: 1.0 64 define amdgpu_kernel void @test_fold_canonicalize_sqrt_value_f32(float addrspace(1)* %arg) { 65 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 66 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 67 %load = load float, float addrspace(1)* %gep, align 4 68 %v = call float @llvm.sqrt.f32(float %load) 69 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 70 store float %canonicalized, float addrspace(1)* %gep, align 4 71 ret void 72 } 73 74 ; GCN-LABEL: test_fold_canonicalize_fceil_value_f32: 75 ; GCN: v_ceil_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 76 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 77 ; GCN-NOT: 1.0 78 define amdgpu_kernel void @test_fold_canonicalize_fceil_value_f32(float addrspace(1)* %arg) { 79 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 80 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 81 %load = load float, float addrspace(1)* %gep, align 4 82 %v = call float @llvm.ceil.f32(float %load) 83 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 84 store float %canonicalized, float addrspace(1)* %gep, align 4 85 ret void 86 } 87 88 ; GCN-LABEL: test_fold_canonicalize_floor_value_f32: 89 ; GCN: v_floor_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 90 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 91 ; GCN-NOT: 1.0 92 define amdgpu_kernel void @test_fold_canonicalize_floor_value_f32(float addrspace(1)* %arg) { 93 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 94 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 95 %load = load float, float addrspace(1)* %gep, align 4 96 %v = call float @llvm.floor.f32(float %load) 97 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 98 store float %canonicalized, float addrspace(1)* %gep, align 4 99 ret void 100 } 101 102 ; GCN-LABEL: test_fold_canonicalize_fma_value_f32: 103 ; GCN: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 104 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 105 ; GCN-NOT: 1.0 106 define amdgpu_kernel void @test_fold_canonicalize_fma_value_f32(float addrspace(1)* %arg) { 107 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 108 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 109 %load = load float, float addrspace(1)* %gep, align 4 110 %v = call float @llvm.fma.f32(float %load, float 15.0, float 15.0) 111 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 112 store float %canonicalized, float addrspace(1)* %gep, align 4 113 ret void 114 } 115 116 ; GCN-LABEL: test_fold_canonicalize_fmuladd_value_f32: 117 ; GCN-FLUSH: v_mac_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} 118 ; GFX9-DENORM: v_fma_f32 [[V:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 119 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 120 ; GCN-NOT: 1.0 121 define amdgpu_kernel void @test_fold_canonicalize_fmuladd_value_f32(float addrspace(1)* %arg) { 122 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 123 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 124 %load = load float, float addrspace(1)* %gep, align 4 125 %v = call float @llvm.fmuladd.f32(float %load, float 15.0, float 15.0) 126 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 127 store float %canonicalized, float addrspace(1)* %gep, align 4 128 ret void 129 } 130 131 ; GCN-LABEL: test_fold_canonicalize_canonicalize_value_f32: 132 ; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]], 133 ; GCN-FLUSH: v_mul_f32_e32 [[V:v[0-9]+]], 1.0, [[LOAD]] 134 ; GCN-DENORM: v_max_f32_e32 [[V:v[0-9]+]], [[LOAD]], [[LOAD]] 135 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 136 ; GCN-NOT: 1.0 137 define amdgpu_kernel void @test_fold_canonicalize_canonicalize_value_f32(float addrspace(1)* %arg) { 138 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 139 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 140 %load = load float, float addrspace(1)* %gep, align 4 141 %v = call float @llvm.canonicalize.f32(float %load) 142 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 143 store float %canonicalized, float addrspace(1)* %gep, align 4 144 ret void 145 } 146 147 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f64_f32: 148 ; GCN: v_cvt_f64_f32_e32 [[V:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} 149 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]] 150 ; GCN-NOT: 1.0 151 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f64_f32(float addrspace(1)* %arg, double addrspace(1)* %out) { 152 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 153 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 154 %load = load float, float addrspace(1)* %gep, align 4 155 %v = fpext float %load to double 156 %canonicalized = tail call double @llvm.canonicalize.f64(double %v) 157 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id 158 store double %canonicalized, double addrspace(1)* %gep2, align 8 159 ret void 160 } 161 162 ; GCN-LABEL: test_fold_canonicalize_fpextend_value_f32_f16: 163 ; GCN: v_cvt_f32_f16_e32 [[V:v[0-9]+]], v{{[0-9]+}} 164 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 165 ; GCN-NOT: 1.0 166 define amdgpu_kernel void @test_fold_canonicalize_fpextend_value_f32_f16(half addrspace(1)* %arg, float addrspace(1)* %out) { 167 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 168 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 169 %load = load half, half addrspace(1)* %gep, align 2 170 %v = fpext half %load to float 171 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 172 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id 173 store float %canonicalized, float addrspace(1)* %gep2, align 4 174 ret void 175 } 176 177 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f32_f64: 178 ; GCN: v_cvt_f32_f64_e32 [[V:v[0-9]+]], v[{{[0-9:]+}}] 179 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 180 ; GCN-NOT: 1.0 181 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f32_f64(double addrspace(1)* %arg, float addrspace(1)* %out) { 182 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 183 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id 184 %load = load double, double addrspace(1)* %gep, align 8 185 %v = fptrunc double %load to float 186 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 187 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id 188 store float %canonicalized, float addrspace(1)* %gep2, align 4 189 ret void 190 } 191 192 ; GCN-LABEL: test_fold_canonicalize_fpround_value_f16_f32: 193 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 194 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] 195 ; GCN-NOT: 1.0 196 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_f16_f32(float addrspace(1)* %arg, half addrspace(1)* %out) { 197 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 198 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 199 %load = load float, float addrspace(1)* %gep, align 4 200 %v = fptrunc float %load to half 201 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 202 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id 203 store half %canonicalized, half addrspace(1)* %gep2, align 2 204 ret void 205 } 206 207 ; GCN-LABEL: test_fold_canonicalize_fpround_value_v2f16_v2f32: 208 ; GCN-DAG: v_cvt_f16_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}} 209 ; VI-DAG: v_cvt_f16_f32_sdwa [[V1:v[0-9]+]], v{{[0-9]+}} 210 ; VI: v_or_b32_e32 [[V:v[0-9]+]], [[V0]], [[V1]] 211 ; GFX9: v_cvt_f16_f32_e32 [[V1:v[0-9]+]], v{{[0-9]+}} 212 ; GFX9: v_and_b32_e32 [[V0_16:v[0-9]+]], 0xffff, [[V0]] 213 ; GFX9: v_lshl_or_b32 [[V:v[0-9]+]], [[V1]], 16, [[V0_16]] 214 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 215 ; GCN-NOT: 1.0 216 define amdgpu_kernel void @test_fold_canonicalize_fpround_value_v2f16_v2f32(<2 x float> addrspace(1)* %arg, <2 x half> addrspace(1)* %out) { 217 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 218 %gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %arg, i32 %id 219 %load = load <2 x float>, <2 x float> addrspace(1)* %gep, align 8 220 %v = fptrunc <2 x float> %load to <2 x half> 221 %canonicalized = tail call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %v) 222 %gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i32 %id 223 store <2 x half> %canonicalized, <2 x half> addrspace(1)* %gep2, align 4 224 ret void 225 } 226 227 ; GCN-LABEL: test_no_fold_canonicalize_fneg_value_f32: 228 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, -1.0, v{{[0-9]+}} 229 ; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} 230 define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) { 231 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 232 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 233 %load = load float, float addrspace(1)* %gep, align 4 234 %v = fsub float -0.0, %load 235 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 236 store float %canonicalized, float addrspace(1)* %gep, align 4 237 ret void 238 } 239 240 ; GCN-LABEL: test_fold_canonicalize_fneg_value_f32: 241 ; GCN: v_xor_b32_e32 [[V:v[0-9]+]], 0x80000000, v{{[0-9]+}} 242 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 243 ; GCN-NOT: 1.0 244 define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace(1)* %arg) { 245 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 246 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 247 %load = load float, float addrspace(1)* %gep, align 4 248 %v0 = fadd float %load, 0.0 249 %v = fsub float -0.0, %v0 250 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 251 store float %canonicalized, float addrspace(1)* %gep, align 4 252 ret void 253 } 254 255 ; GCN-LABEL: test_no_fold_canonicalize_fabs_value_f32: 256 ; GCN-FLUSH: v_mul_f32_e64 v{{[0-9]+}}, 1.0, |v{{[0-9]+}}| 257 ; GCN-DENORM: v_max_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |v{{[0-9]+}}| 258 define amdgpu_kernel void @test_no_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) { 259 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 260 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 261 %load = load float, float addrspace(1)* %gep, align 4 262 %v = tail call float @llvm.fabs.f32(float %load) 263 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 264 store float %canonicalized, float addrspace(1)* %gep, align 4 265 ret void 266 } 267 268 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32: 269 ; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}} 270 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 271 ; GCN-NOT: 1.0 272 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(float addrspace(1)* %arg) { 273 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 274 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 275 %load = load float, float addrspace(1)* %gep, align 4 276 %v0 = fadd float %load, 0.0 277 %v = tail call float @llvm.fabs.f32(float %v0) 278 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 279 store float %canonicalized, float addrspace(1)* %gep, align 4 280 ret void 281 } 282 283 ; GCN-LABEL: test_fold_canonicalize_sin_value_f32: 284 ; GCN: v_sin_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 285 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 286 ; GCN-NOT: 1.0 287 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f32(float addrspace(1)* %arg) { 288 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 289 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 290 %load = load float, float addrspace(1)* %gep, align 4 291 %v = tail call float @llvm.sin.f32(float %load) 292 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 293 store float %canonicalized, float addrspace(1)* %gep, align 4 294 ret void 295 } 296 297 ; GCN-LABEL: test_fold_canonicalize_cos_value_f32: 298 ; GCN: v_cos_f32_e32 [[V:v[0-9]+]], v{{[0-9]+}} 299 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 300 ; GCN-NOT: 1.0 301 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f32(float addrspace(1)* %arg) { 302 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 303 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 304 %load = load float, float addrspace(1)* %gep, align 4 305 %v = tail call float @llvm.cos.f32(float %load) 306 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 307 store float %canonicalized, float addrspace(1)* %gep, align 4 308 ret void 309 } 310 311 ; GCN-LABEL: test_fold_canonicalize_sin_value_f16: 312 ; GCN: v_sin_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}} 313 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]] 314 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] 315 ; GCN-NOT: 1.0 316 define amdgpu_kernel void @test_fold_canonicalize_sin_value_f16(half addrspace(1)* %arg) { 317 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 318 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 319 %load = load half, half addrspace(1)* %gep, align 2 320 %v = tail call half @llvm.sin.f16(half %load) 321 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 322 store half %canonicalized, half addrspace(1)* %gep, align 2 323 ret void 324 } 325 326 ; GCN-LABEL: test_fold_canonicalize_cos_value_f16: 327 ; GCN: v_cos_f32_e32 [[V0:v[0-9]+]], v{{[0-9]+}} 328 ; GCN: v_cvt_f16_f32_e32 [[V:v[0-9]+]], [[V0]] 329 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] 330 ; GCN-NOT: 1.0 331 define amdgpu_kernel void @test_fold_canonicalize_cos_value_f16(half addrspace(1)* %arg) { 332 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 333 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 334 %load = load half, half addrspace(1)* %gep, align 2 335 %v = tail call half @llvm.cos.f16(half %load) 336 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 337 store half %canonicalized, half addrspace(1)* %gep, align 2 338 ret void 339 } 340 341 ; GCN-LABEL: test_fold_canonicalize_qNaN_value_f32: 342 ; GCN: v_mov_b32_e32 [[V:v[0-9]+]], 0x7fc00000 343 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 344 ; GCN-NOT: 1.0 345 define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace(1)* %arg) { 346 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 347 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 348 %canonicalized = tail call float @llvm.canonicalize.f32(float 0x7FF8000000000000) 349 store float %canonicalized, float addrspace(1)* %gep, align 4 350 ret void 351 } 352 353 ; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32: 354 ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} 355 ; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}} 356 ; GFX9: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 357 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32(float addrspace(1)* %arg) { 358 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 359 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 360 %load = load float, float addrspace(1)* %gep, align 4 361 %v = tail call float @llvm.minnum.f32(float %load, float 0.0) 362 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 363 store float %canonicalized, float addrspace(1)* %gep, align 4 364 ret void 365 } 366 367 ; GCN-LABEL: test_fold_canonicalize_minnum_value_f32: 368 ; GCN: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}} 369 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 370 ; GCN-NOT: 1.0 371 define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(float addrspace(1)* %arg) { 372 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 373 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 374 %load = load float, float addrspace(1)* %gep, align 4 375 %v0 = fadd float %load, 0.0 376 %v = tail call float @llvm.minnum.f32(float %v0, float 0.0) 377 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 378 store float %canonicalized, float addrspace(1)* %gep, align 4 379 ret void 380 } 381 382 ; FIXME: Should there be more checks here? minnum with NaN operand is simplified away. 383 384 ; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32: 385 ; VI: v_add_u32_e32 v{{[0-9]+}} 386 ; GFX9: v_add_co_u32_e32 v{{[0-9]+}} 387 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}] 388 define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace(1)* %arg) { 389 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 390 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 391 %load = load float, float addrspace(1)* %gep, align 4 392 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 2139095041 to float)) 393 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 394 store float %canonicalized, float addrspace(1)* %gep, align 4 395 ret void 396 } 397 398 ; GCN-LABEL: test_fold_canonicalize_denorm_value_f32: 399 ; GFX9: v_min_f32_e32 [[RESULT:v[0-9]+]], 0x7fffff, v{{[0-9]+}} 400 ; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}} 401 ; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]] 402 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]] 403 ; GFX9-NOT: 1.0 404 define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspace(1)* %arg) { 405 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 406 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 407 %load = load float, float addrspace(1)* %gep, align 4 408 %v = tail call float @llvm.minnum.f32(float %load, float bitcast (i32 8388607 to float)) 409 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 410 store float %canonicalized, float addrspace(1)* %gep, align 4 411 ret void 412 } 413 414 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32: 415 ; GFX9: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, v{{[0-9]+}} 416 ; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}} 417 ; VI: v_mul_f32_e32 [[RESULT:v[0-9]+]], 1.0, [[V0]] 418 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[RESULT]] 419 ; GFX9-NOT: 1.0 420 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32(float addrspace(1)* %arg) { 421 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 422 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 423 %load = load float, float addrspace(1)* %gep, align 4 424 %v = tail call float @llvm.maxnum.f32(float %load, float 0.0) 425 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 426 store float %canonicalized, float addrspace(1)* %gep, align 4 427 ret void 428 } 429 430 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f32: 431 ; GCN: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}} 432 ; GCN: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[V]] 433 ; GCN-NOT: 1.0 434 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f32(float addrspace(1)* %arg) { 435 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 436 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 437 %load = load float, float addrspace(1)* %gep, align 4 438 %v0 = fadd float %load, 0.0 439 %v = tail call float @llvm.maxnum.f32(float %v0, float 0.0) 440 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 441 store float %canonicalized, float addrspace(1)* %gep, align 4 442 ret void 443 } 444 445 ; GCN-LABEL: test_fold_canonicalize_maxnum_value_f64: 446 ; GCN: v_max_f64 [[V:v\[[0-9]+:[0-9]+\]]], v[{{[0-9:]+}}], 0 447 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]] 448 ; GCN-NOT: 1.0 449 define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_f64(double addrspace(1)* %arg) { 450 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 451 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id 452 %load = load double, double addrspace(1)* %gep, align 8 453 %v0 = fadd double %load, 0.0 454 %v = tail call double @llvm.maxnum.f64(double %v0, double 0.0) 455 %canonicalized = tail call double @llvm.canonicalize.f64(double %v) 456 store double %canonicalized, double addrspace(1)* %gep, align 8 457 ret void 458 } 459 460 ; GCN-LABEL: test_no_fold_canonicalize_fmul_value_f32_no_ieee: 461 ; GCN-EXCEPT: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} 462 define amdgpu_ps float @test_no_fold_canonicalize_fmul_value_f32_no_ieee(float %arg) { 463 entry: 464 %v = fmul float %arg, 15.0 465 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 466 ret float %canonicalized 467 } 468 469 ; GCN-LABEL: test_fold_canonicalize_fmul_nnan_value_f32_no_ieee: 470 ; GCN: v_mul_f32_e32 [[V:v[0-9]+]], 0x41700000, v{{[0-9]+}} 471 ; GCN-NEXT: ; return 472 ; GCN-NOT: 1.0 473 define amdgpu_ps float @test_fold_canonicalize_fmul_nnan_value_f32_no_ieee(float %arg) { 474 entry: 475 %v = fmul nnan float %arg, 15.0 476 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 477 ret float %canonicalized 478 } 479 480 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32 481 ; GFX9-DENORM: global_load_dword [[V:v[0-9]+]], 482 ; GFX9-DENORM: global_store_dword v[{{[0-9:]+}}], [[V]] 483 ; GFX9-DENORM-NOT: 1.0 484 ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} 485 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32(float addrspace(1)* %arg, float addrspace(1)* %out) #1 { 486 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 487 %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id 488 %v = load float, float addrspace(1)* %gep, align 4 489 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) 490 %gep2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %id 491 store float %canonicalized, float addrspace(1)* %gep2, align 4 492 ret void 493 } 494 495 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64 496 ; GCN: {{flat|global}}_load_dwordx2 [[V:v\[[0-9:]+\]]], 497 ; GCN: {{flat|global}}_store_dwordx2 v[{{[0-9:]+}}], [[V]] 498 ; GCN-NOT: 1.0 499 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64(double addrspace(1)* %arg, double addrspace(1)* %out) #1 { 500 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 501 %gep = getelementptr inbounds double, double addrspace(1)* %arg, i32 %id 502 %v = load double, double addrspace(1)* %gep, align 8 503 %canonicalized = tail call double @llvm.canonicalize.f64(double %v) 504 %gep2 = getelementptr inbounds double, double addrspace(1)* %out, i32 %id 505 store double %canonicalized, double addrspace(1)* %gep2, align 8 506 ret void 507 } 508 509 ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16 510 ; GCN: {{flat|global}}_load_ushort [[V:v[0-9]+]], 511 ; GCN: {{flat|global}}_store_short v[{{[0-9:]+}}], [[V]] 512 ; GCN-NOT: 1.0 513 define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrspace(1)* %arg, half addrspace(1)* %out) #1 { 514 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 515 %gep = getelementptr inbounds half, half addrspace(1)* %arg, i32 %id 516 %v = load half, half addrspace(1)* %gep, align 2 517 %canonicalized = tail call half @llvm.canonicalize.f16(half %v) 518 %gep2 = getelementptr inbounds half, half addrspace(1)* %out, i32 %id 519 store half %canonicalized, half addrspace(1)* %gep2, align 2 520 ret void 521 } 522 523 ; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0 524 ; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive 525 ; CHECK: .amd_amdgpu_isa 526 527 declare float @llvm.canonicalize.f32(float) #0 528 declare double @llvm.canonicalize.f64(double) #0 529 declare half @llvm.canonicalize.f16(half) #0 530 declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0 531 declare i32 @llvm.amdgcn.workitem.id.x() #0 532 declare float @llvm.sqrt.f32(float) #0 533 declare float @llvm.ceil.f32(float) #0 534 declare float @llvm.floor.f32(float) #0 535 declare float @llvm.fma.f32(float, float, float) #0 536 declare float @llvm.fmuladd.f32(float, float, float) #0 537 declare float @llvm.fabs.f32(float) #0 538 declare float @llvm.sin.f32(float) #0 539 declare float @llvm.cos.f32(float) #0 540 declare half @llvm.sin.f16(half) #0 541 declare half @llvm.cos.f16(half) #0 542 declare float @llvm.minnum.f32(float, float) #0 543 declare float @llvm.maxnum.f32(float, float) #0 544 declare double @llvm.maxnum.f64(double, double) #0 545 546 attributes #0 = { nounwind readnone } 547 attributes #1 = { "no-nans-fp-math"="true" } 548