1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s 2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s 3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s 4 5 ; GCN-LABEL: {{^}}v_clamp_f32: 6 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 7 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 8 define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 9 %tid = call i32 @llvm.amdgcn.workitem.id.x() 10 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 11 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 12 %a = load float, float addrspace(1)* %gep0 13 %max = call float @llvm.maxnum.f32(float %a, float 0.0) 14 %med = call float @llvm.minnum.f32(float %max, float 1.0) 15 16 store float %med, float addrspace(1)* %out.gep 17 ret void 18 } 19 20 ; GCN-LABEL: {{^}}v_clamp_neg_f32: 21 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 22 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}} 23 define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 24 %tid = call i32 @llvm.amdgcn.workitem.id.x() 25 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 26 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 27 %a = load float, float addrspace(1)* %gep0 28 %fneg.a = fsub float -0.0, %a 29 %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0) 30 %med = call float @llvm.minnum.f32(float %max, float 1.0) 31 32 store float %med, float addrspace(1)* %out.gep 33 ret void 34 } 35 36 ; GCN-LABEL: {{^}}v_clamp_negabs_f32: 37 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 38 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}} 39 define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 40 %tid = call i32 @llvm.amdgcn.workitem.id.x() 41 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 42 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 43 %a = load float, float addrspace(1)* %gep0 44 %fabs.a = call float @llvm.fabs.f32(float %a) 45 %fneg.fabs.a = fsub float -0.0, %fabs.a 46 47 %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0) 48 %med = call float @llvm.minnum.f32(float %max, float 1.0) 49 50 store float %med, float addrspace(1)* %out.gep 51 ret void 52 } 53 54 ; GCN-LABEL: {{^}}v_clamp_negzero_f32: 55 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 56 ; GCN-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1 57 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0 58 define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 59 %tid = call i32 @llvm.amdgcn.workitem.id.x() 60 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 61 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 62 %a = load float, float addrspace(1)* %gep0 63 %max = call float @llvm.maxnum.f32(float %a, float -0.0) 64 %med = call float @llvm.minnum.f32(float %max, float 1.0) 65 66 store float %med, float addrspace(1)* %out.gep 67 ret void 68 } 69 70 ; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32: 71 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 72 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]] 73 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]] 74 define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 75 %tid = call i32 @llvm.amdgcn.workitem.id.x() 76 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 77 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 78 %a = load float, float addrspace(1)* %gep0 79 %max = call float @llvm.maxnum.f32(float %a, float 0.0) 80 %med = call float @llvm.minnum.f32(float %max, float 1.0) 81 82 store float %med, float addrspace(1)* %out.gep 83 store volatile float %max, float addrspace(1)* undef 84 ret void 85 } 86 87 ; GCN-LABEL: {{^}}v_clamp_f16: 88 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]] 89 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 90 91 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}} 92 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]] 93 define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 { 94 %tid = call i32 @llvm.amdgcn.workitem.id.x() 95 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid 96 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid 97 %a = load half, half addrspace(1)* %gep0 98 %max = call half @llvm.maxnum.f16(half %a, half 0.0) 99 %med = call half @llvm.minnum.f16(half %max, half 1.0) 100 101 store half %med, half addrspace(1)* %out.gep 102 ret void 103 } 104 105 ; GCN-LABEL: {{^}}v_clamp_neg_f16: 106 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]] 107 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}} 108 109 ; FIXME: Better to fold neg into max 110 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}} 111 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]] 112 define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 { 113 %tid = call i32 @llvm.amdgcn.workitem.id.x() 114 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid 115 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid 116 %a = load half, half addrspace(1)* %gep0 117 %fneg.a = fsub half -0.0, %a 118 %max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0) 119 %med = call half @llvm.minnum.f16(half %max, half 1.0) 120 121 store half %med, half addrspace(1)* %out.gep 122 ret void 123 } 124 125 ; GCN-LABEL: {{^}}v_clamp_negabs_f16: 126 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]] 127 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}} 128 129 ; FIXME: Better to fold neg/abs into max 130 131 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[A]]| clamp{{$}} 132 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]] 133 define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 { 134 %tid = call i32 @llvm.amdgcn.workitem.id.x() 135 %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid 136 %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid 137 %a = load half, half addrspace(1)* %gep0 138 %fabs.a = call half @llvm.fabs.f16(half %a) 139 %fneg.fabs.a = fsub half -0.0, %fabs.a 140 141 %max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0) 142 %med = call half @llvm.minnum.f16(half %max, half 1.0) 143 144 store half %med, half addrspace(1)* %out.gep 145 ret void 146 } 147 148 ; FIXME: Do f64 instructions support clamp? 149 ; GCN-LABEL: {{^}}v_clamp_f64: 150 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 151 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}} 152 define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 { 153 %tid = call i32 @llvm.amdgcn.workitem.id.x() 154 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid 155 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid 156 %a = load double, double addrspace(1)* %gep0 157 %max = call double @llvm.maxnum.f64(double %a, double 0.0) 158 %med = call double @llvm.minnum.f64(double %max, double 1.0) 159 160 store double %med, double addrspace(1)* %out.gep 161 ret void 162 } 163 164 ; GCN-LABEL: {{^}}v_clamp_neg_f64: 165 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 166 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}} 167 define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 { 168 %tid = call i32 @llvm.amdgcn.workitem.id.x() 169 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid 170 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid 171 %a = load double, double addrspace(1)* %gep0 172 %fneg.a = fsub double -0.0, %a 173 %max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0) 174 %med = call double @llvm.minnum.f64(double %max, double 1.0) 175 176 store double %med, double addrspace(1)* %out.gep 177 ret void 178 } 179 180 ; GCN-LABEL: {{^}}v_clamp_negabs_f64: 181 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 182 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -|[[A]]|, -|[[A]]| clamp{{$}} 183 define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 { 184 %tid = call i32 @llvm.amdgcn.workitem.id.x() 185 %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid 186 %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid 187 %a = load double, double addrspace(1)* %gep0 188 %fabs.a = call double @llvm.fabs.f64(double %a) 189 %fneg.fabs.a = fsub double -0.0, %fabs.a 190 191 %max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0) 192 %med = call double @llvm.minnum.f64(double %max, double 1.0) 193 194 store double %med, double addrspace(1)* %out.gep 195 ret void 196 } 197 198 ; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32: 199 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 200 ; GCN: v_med3_f32 201 define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 202 %tid = call i32 @llvm.amdgcn.workitem.id.x() 203 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 204 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 205 %a = load float, float addrspace(1)* %gep0 206 %med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a) 207 store float %med, float addrspace(1)* %out.gep 208 ret void 209 } 210 211 ; GCN-LABEL: {{^}}v_clamp_med3_aby_f32: 212 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 213 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 214 define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 215 %tid = call i32 @llvm.amdgcn.workitem.id.x() 216 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 217 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 218 %a = load float, float addrspace(1)* %gep0 219 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a) 220 store float %med, float addrspace(1)* %out.gep 221 ret void 222 } 223 224 ; GCN-LABEL: {{^}}v_clamp_med3_bay_f32: 225 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 226 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 227 define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 228 %tid = call i32 @llvm.amdgcn.workitem.id.x() 229 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 230 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 231 %a = load float, float addrspace(1)* %gep0 232 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a) 233 store float %med, float addrspace(1)* %out.gep 234 ret void 235 } 236 237 ; GCN-LABEL: {{^}}v_clamp_med3_yab_f32: 238 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 239 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 240 define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 241 %tid = call i32 @llvm.amdgcn.workitem.id.x() 242 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 243 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 244 %a = load float, float addrspace(1)* %gep0 245 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0) 246 store float %med, float addrspace(1)* %out.gep 247 ret void 248 } 249 250 ; GCN-LABEL: {{^}}v_clamp_med3_yba_f32: 251 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 252 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 253 define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 254 %tid = call i32 @llvm.amdgcn.workitem.id.x() 255 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 256 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 257 %a = load float, float addrspace(1)* %gep0 258 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0) 259 store float %med, float addrspace(1)* %out.gep 260 ret void 261 } 262 263 ; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32: 264 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 265 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 266 define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 267 %tid = call i32 @llvm.amdgcn.workitem.id.x() 268 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 269 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 270 %a = load float, float addrspace(1)* %gep0 271 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0) 272 store float %med, float addrspace(1)* %out.gep 273 ret void 274 } 275 276 ; GCN-LABEL: {{^}}v_clamp_med3_bya_f32: 277 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 278 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 279 define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { 280 %tid = call i32 @llvm.amdgcn.workitem.id.x() 281 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 282 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 283 %a = load float, float addrspace(1)* %gep0 284 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0) 285 store float %med, float addrspace(1)* %out.gep 286 ret void 287 } 288 289 ; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32: 290 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0 291 define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 { 292 %tid = call i32 @llvm.amdgcn.workitem.id.x() 293 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 294 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0) 295 store float %med, float addrspace(1)* %out.gep 296 ret void 297 } 298 299 ; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32: 300 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 301 define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 { 302 %tid = call i32 @llvm.amdgcn.workitem.id.x() 303 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 304 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0) 305 store float %med, float addrspace(1)* %out.gep 306 ret void 307 } 308 309 ; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32: 310 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5 311 define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 { 312 %tid = call i32 @llvm.amdgcn.workitem.id.x() 313 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 314 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5) 315 store float %med, float addrspace(1)* %out.gep 316 ret void 317 } 318 319 ; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32: 320 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}} 321 define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 { 322 %tid = call i32 @llvm.amdgcn.workitem.id.x() 323 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 324 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float)) 325 store float %med, float addrspace(1)* %out.gep 326 ret void 327 } 328 329 ; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32: 330 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 331 define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 { 332 %tid = call i32 @llvm.amdgcn.workitem.id.x() 333 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 334 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000) 335 store float %med, float addrspace(1)* %out.gep 336 ret void 337 } 338 339 ; GCN-LABEL: {{^}}v_clamp_constant_snan_f32: 340 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} 341 define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 { 342 %tid = call i32 @llvm.amdgcn.workitem.id.x() 343 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 344 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float)) 345 store float %med, float addrspace(1)* %out.gep 346 ret void 347 } 348 349 ; --------------------------------------------------------------------- 350 ; Test non-default behaviors enabling snans and disabling dx10_clamp 351 ; --------------------------------------------------------------------- 352 353 ; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp: 354 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 355 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0 356 define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 { 357 %tid = call i32 @llvm.amdgcn.workitem.id.x() 358 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 359 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 360 %a = load float, float addrspace(1)* %gep0 361 %max = call float @llvm.maxnum.f32(float %a, float 0.0) 362 %med = call float @llvm.minnum.f32(float %max, float 1.0) 363 364 store float %med, float addrspace(1)* %out.gep 365 ret void 366 } 367 368 ; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp: 369 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 370 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 371 define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 { 372 %tid = call i32 @llvm.amdgcn.workitem.id.x() 373 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 374 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 375 %a = load float, float addrspace(1)* %gep0 376 %max = call float @llvm.maxnum.f32(float %a, float 0.0) 377 %med = call float @llvm.minnum.f32(float %max, float 1.0) 378 379 store float %med, float addrspace(1)* %out.gep 380 ret void 381 } 382 383 ; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp: 384 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 385 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]] 386 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]] 387 define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 { 388 %tid = call i32 @llvm.amdgcn.workitem.id.x() 389 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 390 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 391 %a = load float, float addrspace(1)* %gep0 392 %max = call float @llvm.maxnum.f32(float %a, float 0.0) 393 %med = call float @llvm.minnum.f32(float %max, float 1.0) 394 395 store float %med, float addrspace(1)* %out.gep 396 ret void 397 } 398 399 ; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src: 400 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 401 ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, [[A]] 402 ; GCN: v_med3_f32 v{{[0-9]+}}, [[ADD]], 0, 1.0 403 define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 { 404 %tid = call i32 @llvm.amdgcn.workitem.id.x() 405 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 406 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 407 %a = load float, float addrspace(1)* %gep0 408 %add = fadd nnan float %a, 1.0 409 %max = call float @llvm.maxnum.f32(float %add, float 0.0) 410 %med = call float @llvm.minnum.f32(float %max, float 1.0) 411 412 store float %med, float addrspace(1)* %out.gep 413 ret void 414 } 415 416 ; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp: 417 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 418 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 419 define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 { 420 %tid = call i32 @llvm.amdgcn.workitem.id.x() 421 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 422 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 423 %a = load float, float addrspace(1)* %gep0 424 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a) 425 store float %med, float addrspace(1)* %out.gep 426 ret void 427 } 428 429 ; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp: 430 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 431 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} 432 define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 { 433 %tid = call i32 @llvm.amdgcn.workitem.id.x() 434 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 435 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 436 %a = load float, float addrspace(1)* %gep0 437 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a) 438 store float %med, float addrspace(1)* %out.gep 439 ret void 440 } 441 442 ; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp: 443 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 444 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0 445 define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 { 446 %tid = call i32 @llvm.amdgcn.workitem.id.x() 447 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 448 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 449 %a = load float, float addrspace(1)* %gep0 450 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0) 451 store float %med, float addrspace(1)* %out.gep 452 ret void 453 } 454 455 ; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp: 456 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 457 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0 458 define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 { 459 %tid = call i32 @llvm.amdgcn.workitem.id.x() 460 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 461 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 462 %a = load float, float addrspace(1)* %gep0 463 %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0) 464 store float %med, float addrspace(1)* %out.gep 465 ret void 466 } 467 468 ; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp: 469 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 470 ; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0 471 define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 { 472 %tid = call i32 @llvm.amdgcn.workitem.id.x() 473 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 474 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 475 %a = load float, float addrspace(1)* %gep0 476 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0) 477 store float %med, float addrspace(1)* %out.gep 478 ret void 479 } 480 481 ; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp: 482 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 483 ; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0 484 define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 { 485 %tid = call i32 @llvm.amdgcn.workitem.id.x() 486 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid 487 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 488 %a = load float, float addrspace(1)* %gep0 489 %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0) 490 store float %med, float addrspace(1)* %out.gep 491 ret void 492 } 493 494 ; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp: 495 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000 496 define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 { 497 %tid = call i32 @llvm.amdgcn.workitem.id.x() 498 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 499 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000) 500 store float %med, float addrspace(1)* %out.gep 501 ret void 502 } 503 504 ; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp: 505 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001 506 define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 { 507 %tid = call i32 @llvm.amdgcn.workitem.id.x() 508 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid 509 %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float)) 510 store float %med, float addrspace(1)* %out.gep 511 ret void 512 } 513 514 ; GCN-LABEL: {{^}}v_clamp_v2f16: 515 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 516 ; GFX9-NOT: [[A]] 517 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}} 518 define amdgpu_kernel void @v_clamp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { 519 %tid = call i32 @llvm.amdgcn.workitem.id.x() 520 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid 521 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 522 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0 523 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> zeroinitializer) 524 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>) 525 526 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep 527 ret void 528 } 529 530 ; GCN-LABEL: {{^}}v_clamp_v2f16_undef_elt: 531 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 532 ; GFX9-NOT: [[A]] 533 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}} 534 define amdgpu_kernel void @v_clamp_v2f16_undef_elt(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { 535 %tid = call i32 @llvm.amdgcn.workitem.id.x() 536 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid 537 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 538 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0 539 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>) 540 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>) 541 542 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep 543 ret void 544 } 545 546 ; GCN-LABEL: {{^}}v_clamp_v2f16_not_zero: 547 ; GFX9: v_pk_max_f16 548 ; GFX9: v_pk_min_f16 549 define amdgpu_kernel void @v_clamp_v2f16_not_zero(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { 550 %tid = call i32 @llvm.amdgcn.workitem.id.x() 551 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid 552 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 553 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0 554 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 0.0>) 555 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>) 556 557 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep 558 ret void 559 } 560 561 ; GCN-LABEL: {{^}}v_clamp_v2f16_not_one: 562 ; GFX9: v_pk_max_f16 563 ; GFX9: v_pk_min_f16 564 define amdgpu_kernel void @v_clamp_v2f16_not_one(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { 565 %tid = call i32 @llvm.amdgcn.workitem.id.x() 566 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid 567 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 568 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0 569 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half 0.0>) 570 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 0.0, half 1.0>) 571 572 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep 573 ret void 574 } 575 576 ; GCN-LABEL: {{^}}v_clamp_neg_v2f16: 577 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 578 ; GFX9-NOT: [[A]] 579 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}} 580 define amdgpu_kernel void @v_clamp_neg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { 581 %tid = call i32 @llvm.amdgcn.workitem.id.x() 582 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid 583 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 584 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0 585 %fneg.a = fsub <2 x half> <half -0.0, half -0.0>, %a 586 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.a, <2 x half> zeroinitializer) 587 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>) 588 589 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep 590 ret void 591 } 592 593 ; GCN-LABEL: {{^}}v_clamp_negabs_v2f16: 594 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 595 ; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, [[A]] 596 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}} 597 define amdgpu_kernel void @v_clamp_negabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { 598 %tid = call i32 @llvm.amdgcn.workitem.id.x() 599 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid 600 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 601 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0 602 %fabs.a = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a) 603 %fneg.fabs.a = fsub <2 x half> <half -0.0, half -0.0>, %fabs.a 604 605 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.fabs.a, <2 x half> zeroinitializer) 606 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>) 607 608 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep 609 ret void 610 } 611 612 ; GCN-LABEL: {{^}}v_clamp_neglo_v2f16: 613 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 614 ; GFX9-NOT: [[A]] 615 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] clamp{{$}} 616 define amdgpu_kernel void @v_clamp_neglo_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { 617 %tid = call i32 @llvm.amdgcn.workitem.id.x() 618 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid 619 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 620 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0 621 %lo = extractelement <2 x half> %a, i32 0 622 %neg.lo = fsub half -0.0, %lo 623 %neg.lo.vec = insertelement <2 x half> %a, half %neg.lo, i32 0 624 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.vec, <2 x half> zeroinitializer) 625 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>) 626 627 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep 628 ret void 629 } 630 631 ; GCN-LABEL: {{^}}v_clamp_neghi_v2f16: 632 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 633 ; GFX9-NOT: [[A]] 634 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_hi:[1,1] clamp{{$}} 635 define amdgpu_kernel void @v_clamp_neghi_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { 636 %tid = call i32 @llvm.amdgcn.workitem.id.x() 637 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid 638 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 639 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0 640 %hi = extractelement <2 x half> %a, i32 1 641 %neg.hi = fsub half -0.0, %hi 642 %neg.hi.vec = insertelement <2 x half> %a, half %neg.hi, i32 1 643 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.vec, <2 x half> zeroinitializer) 644 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>) 645 646 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep 647 ret void 648 } 649 650 ; GCN-LABEL: {{^}}v_clamp_v2f16_shuffle: 651 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] 652 ; GFX9-NOT: [[A]] 653 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}} 654 define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 { 655 %tid = call i32 @llvm.amdgcn.workitem.id.x() 656 %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid 657 %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid 658 %a = load <2 x half>, <2 x half> addrspace(1)* %gep0 659 %shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0> 660 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer) 661 %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>) 662 663 store <2 x half> %med, <2 x half> addrspace(1)* %out.gep 664 ret void 665 } 666 667 ; GCN-LABEL: {{^}}v_clamp_diff_source_f32: 668 ; GCN: v_add_f32_e32 [[A:v[0-9]+]] 669 ; GCN: v_add_f32_e32 [[B:v[0-9]+]] 670 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}} 671 define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 672 { 673 %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0 674 %gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1 675 %gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2 676 %l0 = load float, float addrspace(1)* %gep0 677 %l1 = load float, float addrspace(1)* %gep1 678 %l2 = load float, float addrspace(1)* %gep2 679 %a = fadd nsz float %l0, %l1 680 %b = fadd nsz float %l0, %l2 681 %res = call nsz float @llvm.maxnum.f32(float %a, float %b) 682 %max = call nsz float @llvm.maxnum.f32(float %res, float 0.0) 683 %min = call nsz float @llvm.minnum.f32(float %max, float 1.0) 684 %out.gep = getelementptr float, float addrspace(1)* %out, i32 3 685 store float %min, float addrspace(1)* %out.gep 686 ret void 687 } 688 689 declare i32 @llvm.amdgcn.workitem.id.x() #1 690 declare float @llvm.fabs.f32(float) #1 691 declare float @llvm.minnum.f32(float, float) #1 692 declare float @llvm.maxnum.f32(float, float) #1 693 declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1 694 declare double @llvm.fabs.f64(double) #1 695 declare double @llvm.minnum.f64(double, double) #1 696 declare double @llvm.maxnum.f64(double, double) #1 697 declare half @llvm.fabs.f16(half) #1 698 declare half @llvm.minnum.f16(half, half) #1 699 declare half @llvm.maxnum.f16(half, half) #1 700 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1 701 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1 702 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1 703 704 attributes #0 = { nounwind } 705 attributes #1 = { nounwind readnone } 706 attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" } 707 attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" } 708 attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" } 709