1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2 ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4 ; GCN-LABEL: {{^}}add_select_fabs_fabs_f32: 5 ; GCN: buffer_load_dword [[X:v[0-9]+]] 6 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 7 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 8 9 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 10 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] 11 define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 { 12 %x = load volatile float, float addrspace(1)* undef 13 %y = load volatile float, float addrspace(1)* undef 14 %z = load volatile float, float addrspace(1)* undef 15 %cmp = icmp eq i32 %c, 0 16 %fabs.x = call float @llvm.fabs.f32(float %x) 17 %fabs.y = call float @llvm.fabs.f32(float %y) 18 %select = select i1 %cmp, float %fabs.x, float %fabs.y 19 %add = fadd float %select, %z 20 store float %add, float addrspace(1)* undef 21 ret void 22 } 23 24 ; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32: 25 ; GCN: buffer_load_dword [[X:v[0-9]+]] 26 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 27 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 28 ; GCN: buffer_load_dword [[W:v[0-9]+]] 29 30 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 31 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] 32 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]] 33 define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 { 34 %x = load volatile float, float addrspace(1)* undef 35 %y = load volatile float, float addrspace(1)* undef 36 %z = load volatile float, float addrspace(1)* undef 37 %w = load volatile float, float addrspace(1)* undef 38 %cmp = icmp eq i32 %c, 0 39 %fabs.x = call float @llvm.fabs.f32(float %x) 40 %fabs.y = call float @llvm.fabs.f32(float %y) 41 %select = select i1 %cmp, float %fabs.x, float %fabs.y 42 %add0 = fadd float %select, %z 43 %add1 = fadd float %fabs.x, %w 44 store volatile float %add0, float addrspace(1)* undef 45 store volatile float %add1, float addrspace(1)* undef 46 ret void 47 } 48 49 ; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32: 50 ; GCN: buffer_load_dword [[X:v[0-9]+]] 51 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 52 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 53 54 ; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 55 ; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]] 56 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] 57 58 ; GCN: buffer_store_dword [[ADD]] 59 ; GCN: buffer_store_dword [[X_ABS]] 60 define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 { 61 %x = load volatile float, float addrspace(1)* undef 62 %y = load volatile float, float addrspace(1)* undef 63 %z = load volatile float, float addrspace(1)* undef 64 %cmp = icmp eq i32 %c, 0 65 %fabs.x = call float @llvm.fabs.f32(float %x) 66 %fabs.y = call float @llvm.fabs.f32(float %y) 67 %select = select i1 %cmp, float %fabs.x, float %fabs.y 68 %add0 = fadd float %select, %z 69 store volatile float %add0, float addrspace(1)* undef 70 store volatile float %fabs.x, float addrspace(1)* undef 71 ret void 72 } 73 74 ; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32: 75 ; GCN: buffer_load_dword [[X:v[0-9]+]] 76 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 77 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 78 ; GCN: buffer_load_dword [[W:v[0-9]+]] 79 80 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 81 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]] 82 ; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]] 83 define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 { 84 %x = load volatile float, float addrspace(1)* undef 85 %y = load volatile float, float addrspace(1)* undef 86 %z = load volatile float, float addrspace(1)* undef 87 %w = load volatile float, float addrspace(1)* undef 88 %cmp = icmp eq i32 %c, 0 89 %fabs.x = call float @llvm.fabs.f32(float %x) 90 %fabs.y = call float @llvm.fabs.f32(float %y) 91 %select = select i1 %cmp, float %fabs.x, float %fabs.y 92 %add0 = fadd float %select, %z 93 %add1 = fadd float %fabs.y, %w 94 store volatile float %add0, float addrspace(1)* undef 95 store volatile float %add1, float addrspace(1)* undef 96 ret void 97 } 98 99 ; GCN-LABEL: {{^}}add_select_fabs_var_f32: 100 ; GCN: buffer_load_dword [[X:v[0-9]+]] 101 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 102 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 103 104 ; GCN: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] 105 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_ABS]], vcc 106 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 107 define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 { 108 %x = load volatile float, float addrspace(1)* undef 109 %y = load volatile float, float addrspace(1)* undef 110 %z = load volatile float, float addrspace(1)* undef 111 %cmp = icmp eq i32 %c, 0 112 %fabs.x = call float @llvm.fabs.f32(float %x) 113 %select = select i1 %cmp, float %fabs.x, float %y 114 %add = fadd float %select, %z 115 store volatile float %add, float addrspace(1)* undef 116 ret void 117 } 118 119 ; GCN-LABEL: {{^}}add_select_fabs_negk_f32: 120 ; GCN: buffer_load_dword [[X:v[0-9]+]] 121 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 122 123 ; GCN: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]] 124 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc 125 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 126 define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 { 127 %x = load volatile float, float addrspace(1)* undef 128 %y = load volatile float, float addrspace(1)* undef 129 %cmp = icmp eq i32 %c, 0 130 %fabs = call float @llvm.fabs.f32(float %x) 131 %select = select i1 %cmp, float %fabs, float -1.0 132 %add = fadd float %select, %y 133 store volatile float %add, float addrspace(1)* undef 134 ret void 135 } 136 137 ; FIXME: fabs should fold away 138 ; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32: 139 ; GCN: buffer_load_dword [[X:v[0-9]+]] 140 141 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s 142 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]] 143 define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 { 144 %x = load volatile float, float addrspace(1)* undef 145 %cmp = icmp eq i32 %c, 0 146 %select = select i1 %cmp, float -2.0, float -1.0 147 %fabs = call float @llvm.fabs.f32(float %select) 148 %add = fadd float %fabs, %x 149 store volatile float %add, float addrspace(1)* undef 150 ret void 151 } 152 153 ; GCN-LABEL: {{^}}add_select_posk_posk_f32: 154 ; GCN: buffer_load_dword [[X:v[0-9]+]] 155 156 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s 157 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 158 define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 { 159 %x = load volatile float, float addrspace(1)* undef 160 %cmp = icmp eq i32 %c, 0 161 %select = select i1 %cmp, float 2.0, float 1.0 162 %add = fadd float %select, %x 163 store volatile float %add, float addrspace(1)* undef 164 ret void 165 } 166 167 ; GCN-LABEL: {{^}}add_select_negk_fabs_f32: 168 ; GCN: buffer_load_dword [[X:v[0-9]+]] 169 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 170 171 ; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]] 172 ; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 173 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[FABS_X]], vcc 174 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 175 define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 { 176 %x = load volatile float, float addrspace(1)* undef 177 %y = load volatile float, float addrspace(1)* undef 178 %cmp = icmp eq i32 %c, 0 179 %fabs = call float @llvm.fabs.f32(float %x) 180 %select = select i1 %cmp, float -1.0, float %fabs 181 %add = fadd float %select, %y 182 store volatile float %add, float addrspace(1)* undef 183 ret void 184 } 185 186 ; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32: 187 ; GCN: buffer_load_dword [[X:v[0-9]+]] 188 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 189 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000 190 191 ; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]] 192 ; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 193 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[FABS_X]], vcc 194 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]] 195 define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 { 196 %x = load volatile float, float addrspace(1)* undef 197 %y = load volatile float, float addrspace(1)* undef 198 %cmp = icmp eq i32 %c, 0 199 %fabs = call float @llvm.fabs.f32(float %x) 200 %select = select i1 %cmp, float -1024.0, float %fabs 201 %add = fadd float %select, %y 202 store volatile float %add, float addrspace(1)* undef 203 ret void 204 } 205 206 ; GCN-LABEL: {{^}}add_select_fabs_posk_f32: 207 ; GCN: buffer_load_dword [[X:v[0-9]+]] 208 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 209 210 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 211 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]] 212 define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 { 213 %x = load volatile float, float addrspace(1)* undef 214 %y = load volatile float, float addrspace(1)* undef 215 216 %cmp = icmp eq i32 %c, 0 217 %fabs = call float @llvm.fabs.f32(float %x) 218 %select = select i1 %cmp, float %fabs, float 1.0 219 %add = fadd float %select, %y 220 store volatile float %add, float addrspace(1)* undef 221 ret void 222 } 223 224 ; GCN-LABEL: {{^}}add_select_posk_fabs_f32: 225 ; GCN: buffer_load_dword [[X:v[0-9]+]] 226 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 227 228 ; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 229 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 230 ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Y]] 231 define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 { 232 %x = load volatile float, float addrspace(1)* undef 233 %y = load volatile float, float addrspace(1)* undef 234 %cmp = icmp eq i32 %c, 0 235 %fabs = call float @llvm.fabs.f32(float %x) 236 %select = select i1 %cmp, float 1.0, float %fabs 237 %add = fadd float %select, %y 238 store volatile float %add, float addrspace(1)* undef 239 ret void 240 } 241 242 ; GCN-LABEL: {{^}}add_select_fneg_fneg_f32: 243 ; GCN: buffer_load_dword [[X:v[0-9]+]] 244 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 245 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 246 247 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 248 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 249 define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 { 250 %x = load volatile float, float addrspace(1)* undef 251 %y = load volatile float, float addrspace(1)* undef 252 %z = load volatile float, float addrspace(1)* undef 253 %cmp = icmp eq i32 %c, 0 254 %fneg.x = fsub float -0.0, %x 255 %fneg.y = fsub float -0.0, %y 256 %select = select i1 %cmp, float %fneg.x, float %fneg.y 257 %add = fadd float %select, %z 258 store volatile float %add, float addrspace(1)* undef 259 ret void 260 } 261 262 ; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32: 263 ; GCN: buffer_load_dword [[X:v[0-9]+]] 264 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 265 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 266 ; GCN: buffer_load_dword [[W:v[0-9]+]] 267 268 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 269 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 270 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]] 271 define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 { 272 %x = load volatile float, float addrspace(1)* undef 273 %y = load volatile float, float addrspace(1)* undef 274 %z = load volatile float, float addrspace(1)* undef 275 %w = load volatile float, float addrspace(1)* undef 276 %cmp = icmp eq i32 %c, 0 277 %fneg.x = fsub float -0.0, %x 278 %fneg.y = fsub float -0.0, %y 279 %select = select i1 %cmp, float %fneg.x, float %fneg.y 280 %add0 = fadd float %select, %z 281 %add1 = fadd float %fneg.x, %w 282 store volatile float %add0, float addrspace(1)* undef 283 store volatile float %add1, float addrspace(1)* undef 284 ret void 285 } 286 287 ; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32: 288 ; GCN: buffer_load_dword [[X:v[0-9]+]] 289 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 290 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 291 292 ; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]] 293 ; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 294 ; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]] 295 296 ; GCN: buffer_store_dword [[ADD]] 297 ; GCN: buffer_store_dword [[NEG_X]] 298 define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 { 299 %x = load volatile float, float addrspace(1)* undef 300 %y = load volatile float, float addrspace(1)* undef 301 %z = load volatile float, float addrspace(1)* undef 302 %cmp = icmp eq i32 %c, 0 303 %fneg.x = fsub float -0.0, %x 304 %fneg.y = fsub float -0.0, %y 305 %select = select i1 %cmp, float %fneg.x, float %fneg.y 306 %add0 = fadd float %select, %z 307 store volatile float %add0, float addrspace(1)* undef 308 store volatile float %fneg.x, float addrspace(1)* undef 309 ret void 310 } 311 312 ; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32: 313 ; GCN: buffer_load_dword [[X:v[0-9]+]] 314 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 315 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 316 ; GCN: buffer_load_dword [[W:v[0-9]+]] 317 318 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc 319 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 320 ; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]] 321 define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 { 322 %x = load volatile float, float addrspace(1)* undef 323 %y = load volatile float, float addrspace(1)* undef 324 %z = load volatile float, float addrspace(1)* undef 325 %w = load volatile float, float addrspace(1)* undef 326 %cmp = icmp eq i32 %c, 0 327 %fneg.x = fsub float -0.0, %x 328 %fneg.y = fsub float -0.0, %y 329 %select = select i1 %cmp, float %fneg.x, float %fneg.y 330 %add0 = fadd float %select, %z 331 %add1 = fadd float %fneg.y, %w 332 store volatile float %add0, float addrspace(1)* undef 333 store volatile float %add1, float addrspace(1)* undef 334 ret void 335 } 336 337 ; GCN-LABEL: {{^}}add_select_fneg_var_f32: 338 ; GCN: buffer_load_dword [[X:v[0-9]+]] 339 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 340 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 341 342 ; GCN: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]] 343 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X_NEG]], vcc 344 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 345 define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 { 346 %x = load volatile float, float addrspace(1)* undef 347 %y = load volatile float, float addrspace(1)* undef 348 %z = load volatile float, float addrspace(1)* undef 349 %cmp = icmp eq i32 %c, 0 350 %fneg.x = fsub float -0.0, %x 351 %select = select i1 %cmp, float %fneg.x, float %y 352 %add = fadd float %select, %z 353 store volatile float %add, float addrspace(1)* undef 354 ret void 355 } 356 357 ; GCN-LABEL: {{^}}add_select_fneg_negk_f32: 358 ; GCN: buffer_load_dword [[X:v[0-9]+]] 359 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 360 361 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 362 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 363 define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 { 364 %x = load volatile float, float addrspace(1)* undef 365 %y = load volatile float, float addrspace(1)* undef 366 %cmp = icmp eq i32 %c, 0 367 %fneg.x = fsub float -0.0, %x 368 %select = select i1 %cmp, float %fneg.x, float -1.0 369 %add = fadd float %select, %y 370 store volatile float %add, float addrspace(1)* undef 371 ret void 372 } 373 374 ; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32: 375 ; GCN: buffer_load_dword [[X:v[0-9]+]] 376 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 377 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 378 379 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc 380 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 381 define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 { 382 %x = load volatile float, float addrspace(1)* undef 383 %y = load volatile float, float addrspace(1)* undef 384 %cmp = icmp eq i32 %c, 0 385 %fneg.x = fsub float -0.0, %x 386 %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000 387 %add = fadd float %select, %y 388 store volatile float %add, float addrspace(1)* undef 389 ret void 390 } 391 392 ; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32: 393 ; GCN: buffer_load_dword [[X:v[0-9]+]] 394 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 395 ; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 396 397 ; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc 398 ; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc 399 400 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 401 define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 { 402 %x = load volatile float, float addrspace(1)* undef 403 %y = load volatile float, float addrspace(1)* undef 404 %cmp = icmp eq i32 %c, 0 405 %fneg.x = fsub float -0.0, %x 406 %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000 407 %add = fadd float %select, %y 408 store volatile float %add, float addrspace(1)* undef 409 ret void 410 } 411 412 ; GCN-LABEL: {{^}}add_select_negk_negk_f32: 413 ; GCN: buffer_load_dword [[X:v[0-9]+]] 414 415 ; GCN: v_cmp_eq_u32_e64 416 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s 417 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 418 define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 { 419 %x = load volatile float, float addrspace(1)* undef 420 %cmp = icmp eq i32 %c, 0 421 %select = select i1 %cmp, float -2.0, float -1.0 422 %add = fadd float %select, %x 423 store volatile float %add, float addrspace(1)* undef 424 ret void 425 } 426 427 ; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32: 428 ; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000 429 ; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000 430 ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] 431 432 ; GCN: v_cmp_eq_u32_e64 433 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc 434 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] 435 define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 { 436 %x = load volatile float, float addrspace(1)* undef 437 %cmp = icmp eq i32 %c, 0 438 %select = select i1 %cmp, float -2048.0, float -4096.0 439 %add = fadd float %select, %x 440 store volatile float %add, float addrspace(1)* undef 441 ret void 442 } 443 444 ; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32: 445 ; GCN: buffer_load_dword [[X:v[0-9]+]] 446 447 ; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s 448 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]] 449 define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 { 450 %x = load volatile float, float addrspace(1)* undef 451 %cmp = icmp eq i32 %c, 0 452 %select = select i1 %cmp, float -2.0, float -1.0 453 %fneg.x = fsub float -0.0, %select 454 %add = fadd float %fneg.x, %x 455 store volatile float %add, float addrspace(1)* undef 456 ret void 457 } 458 459 ; GCN-LABEL: {{^}}add_select_negk_fneg_f32: 460 ; GCN: buffer_load_dword [[X:v[0-9]+]] 461 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 462 463 ; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 464 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc 465 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 466 define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 { 467 %x = load volatile float, float addrspace(1)* undef 468 %y = load volatile float, float addrspace(1)* undef 469 %cmp = icmp eq i32 %c, 0 470 %fneg.x = fsub float -0.0, %x 471 %select = select i1 %cmp, float -1.0, float %fneg.x 472 %add = fadd float %select, %y 473 store volatile float %add, float addrspace(1)* undef 474 ret void 475 } 476 477 ; GCN-LABEL: {{^}}add_select_fneg_posk_f32: 478 ; GCN: buffer_load_dword [[X:v[0-9]+]] 479 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 480 481 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc 482 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 483 define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 { 484 %x = load volatile float, float addrspace(1)* undef 485 %y = load volatile float, float addrspace(1)* undef 486 %cmp = icmp eq i32 %c, 0 487 %fneg.x = fsub float -0.0, %x 488 %select = select i1 %cmp, float %fneg.x, float 1.0 489 %add = fadd float %select, %y 490 store volatile float %add, float addrspace(1)* undef 491 ret void 492 } 493 494 ; GCN-LABEL: {{^}}add_select_posk_fneg_f32: 495 ; GCN: buffer_load_dword [[X:v[0-9]+]] 496 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 497 498 ; GCN: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 499 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc 500 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] 501 define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 { 502 %x = load volatile float, float addrspace(1)* undef 503 %y = load volatile float, float addrspace(1)* undef 504 %cmp = icmp eq i32 %c, 0 505 %fneg.x = fsub float -0.0, %x 506 %select = select i1 %cmp, float 1.0, float %fneg.x 507 %add = fadd float %select, %y 508 store volatile float %add, float addrspace(1)* undef 509 ret void 510 } 511 512 ; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32: 513 ; GCN: buffer_load_dword [[X:v[0-9]+]] 514 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 515 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 516 517 ; GCN-DAG: v_or_b32_e32 [[X_NEG_ABS:v[0-9]+]], 0x80000000, [[X]] 518 ; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]] 519 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG_ABS]], vcc 520 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 521 define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 { 522 %x = load volatile float, float addrspace(1)* undef 523 %y = load volatile float, float addrspace(1)* undef 524 %z = load volatile float, float addrspace(1)* undef 525 %cmp = icmp eq i32 %c, 0 526 %fabs.x = call float @llvm.fabs.f32(float %x) 527 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 528 %fabs.y = call float @llvm.fabs.f32(float %y) 529 %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y 530 %add = fadd float %select, %z 531 store volatile float %add, float addrspace(1)* undef 532 ret void 533 } 534 535 ; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32: 536 ; GCN: buffer_load_dword [[X:v[0-9]+]] 537 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 538 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 539 540 ; GCN-DAG: v_or_b32_e32 [[Y_NEG_ABS:v[0-9]+]], 0x80000000, [[Y]] 541 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] 542 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG_ABS]], [[X_ABS]], vcc 543 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 544 define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 { 545 %x = load volatile float, float addrspace(1)* undef 546 %y = load volatile float, float addrspace(1)* undef 547 %z = load volatile float, float addrspace(1)* undef 548 %cmp = icmp eq i32 %c, 0 549 %fabs.x = call float @llvm.fabs.f32(float %x) 550 %fabs.y = call float @llvm.fabs.f32(float %y) 551 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y 552 %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y 553 %add = fadd float %select, %z 554 store volatile float %add, float addrspace(1)* undef 555 ret void 556 } 557 558 ; GCN-LABEL: {{^}}add_select_neg_fabs_f32: 559 ; GCN: buffer_load_dword [[X:v[0-9]+]] 560 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 561 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 562 563 ; GCN-DAG: v_xor_b32_e32 [[X_NEG:v[0-9]+]], 0x80000000, [[X]] 564 ; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]] 565 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X_NEG]], vcc 566 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 567 define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 { 568 %x = load volatile float, float addrspace(1)* undef 569 %y = load volatile float, float addrspace(1)* undef 570 %z = load volatile float, float addrspace(1)* undef 571 %cmp = icmp eq i32 %c, 0 572 %fneg.x = fsub float -0.000000e+00, %x 573 %fabs.y = call float @llvm.fabs.f32(float %y) 574 %select = select i1 %cmp, float %fneg.x, float %fabs.y 575 %add = fadd float %select, %z 576 store volatile float %add, float addrspace(1)* undef 577 ret void 578 } 579 580 ; GCN-LABEL: {{^}}add_select_fabs_neg_f32: 581 ; GCN: buffer_load_dword [[X:v[0-9]+]] 582 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 583 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 584 585 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] 586 ; GCN-DAG: v_xor_b32_e32 [[Y_NEG:v[0-9]+]], 0x80000000, [[Y]] 587 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_NEG]], [[X_ABS]], vcc 588 ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]] 589 define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 { 590 %x = load volatile float, float addrspace(1)* undef 591 %y = load volatile float, float addrspace(1)* undef 592 %z = load volatile float, float addrspace(1)* undef 593 %cmp = icmp eq i32 %c, 0 594 %fabs.x = call float @llvm.fabs.f32(float %x) 595 %fneg.y = fsub float -0.000000e+00, %y 596 %select = select i1 %cmp, float %fabs.x, float %fneg.y 597 %add = fadd float %select, %z 598 store volatile float %add, float addrspace(1)* undef 599 ret void 600 } 601 602 ; GCN-LABEL: {{^}}add_select_neg_negfabs_f32: 603 ; GCN: buffer_load_dword [[X:v[0-9]+]] 604 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 605 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 606 607 ; GCN-DAG: v_and_b32_e32 [[Y_ABS:v[0-9]+]], 0x7fffffff, [[Y]] 608 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y_ABS]], [[X]], vcc 609 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 610 define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 { 611 %x = load volatile float, float addrspace(1)* undef 612 %y = load volatile float, float addrspace(1)* undef 613 %z = load volatile float, float addrspace(1)* undef 614 %cmp = icmp eq i32 %c, 0 615 %fneg.x = fsub float -0.000000e+00, %x 616 %fabs.y = call float @llvm.fabs.f32(float %y) 617 %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y 618 %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y 619 %add = fadd float %select, %z 620 store volatile float %add, float addrspace(1)* undef 621 ret void 622 } 623 624 ; GCN-LABEL: {{^}}add_select_negfabs_neg_f32: 625 ; GCN: buffer_load_dword [[X:v[0-9]+]] 626 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 627 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 628 629 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] 630 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[X_ABS]], [[Y]], vcc 631 ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]] 632 define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 { 633 %x = load volatile float, float addrspace(1)* undef 634 %y = load volatile float, float addrspace(1)* undef 635 %z = load volatile float, float addrspace(1)* undef 636 %cmp = icmp eq i32 %c, 0 637 %fabs.x = call float @llvm.fabs.f32(float %x) 638 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 639 %fneg.y = fsub float -0.000000e+00, %y 640 %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x 641 %add = fadd float %select, %z 642 store volatile float %add, float addrspace(1)* undef 643 ret void 644 } 645 646 ; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32: 647 ; GCN: buffer_load_dword [[X:v[0-9]+]] 648 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 649 650 ; GCN-DAG: v_cmp_eq_u32_e64 vcc, 651 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] 652 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc 653 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]] 654 define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 { 655 %x = load volatile float, float addrspace(1)* undef 656 %y = load volatile float, float addrspace(1)* undef 657 %cmp = icmp eq i32 %c, 0 658 %fabs.x = call float @llvm.fabs.f32(float %x) 659 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 660 %select = select i1 %cmp, float %fneg.fabs.x, float 4.0 661 %add = fmul float %select, %y 662 store volatile float %add, float addrspace(1)* undef 663 ret void 664 } 665 666 ; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32: 667 ; GCN: buffer_load_dword [[X:v[0-9]+]] 668 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 669 670 ; GCN-DAG: v_cmp_ne_u32_e64 vcc, s{{[0-9]+}}, 0 671 ; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]] 672 673 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -4.0, [[X_ABS]], vcc 674 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[SELECT]], [[Y]] 675 define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 { 676 %x = load volatile float, float addrspace(1)* undef 677 %y = load volatile float, float addrspace(1)* undef 678 %cmp = icmp eq i32 %c, 0 679 %fabs.x = call float @llvm.fabs.f32(float %x) 680 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 681 %select = select i1 %cmp, float 4.0, float %fneg.fabs.x 682 %add = fmul float %select, %y 683 store volatile float %add, float addrspace(1)* undef 684 ret void 685 } 686 687 ; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32: 688 ; GCN: buffer_load_dword [[X:v[0-9]+]] 689 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 690 691 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc 692 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]] 693 define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 { 694 %x = load volatile float, float addrspace(1)* undef 695 %y = load volatile float, float addrspace(1)* undef 696 %cmp = icmp eq i32 %c, 0 697 %fabs.x = call float @llvm.fabs.f32(float %x) 698 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 699 %select = select i1 %cmp, float %fneg.fabs.x, float -4.0 700 %add = fmul float %select, %y 701 store volatile float %add, float addrspace(1)* undef 702 ret void 703 } 704 705 ; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32: 706 ; GCN: buffer_load_dword [[X:v[0-9]+]] 707 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 708 709 ; GCN: v_cmp_ne_u32_e64 vcc 710 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 4.0, [[X]], vcc 711 ; GCN: v_mul_f32_e64 v{{[0-9]+}}, -|[[SELECT]]|, [[Y]] 712 define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 { 713 %x = load volatile float, float addrspace(1)* undef 714 %y = load volatile float, float addrspace(1)* undef 715 %cmp = icmp eq i32 %c, 0 716 %fabs.x = call float @llvm.fabs.f32(float %x) 717 %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x 718 %select = select i1 %cmp, float -4.0, float %fneg.fabs.x 719 %add = fmul float %select, %y 720 store volatile float %add, float addrspace(1)* undef 721 ret void 722 } 723 724 ; -------------------------------------------------------------------------------- 725 ; Don't fold if fneg can fold into the source 726 ; -------------------------------------------------------------------------------- 727 728 ; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32: 729 ; GCN: buffer_load_dword [[X:v[0-9]+]] 730 ; GCN: buffer_load_dword [[Y:v[0-9]+]] 731 732 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]] 733 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc 734 ; GCN-NEXT: buffer_store_dword [[SELECT]] 735 define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 { 736 %x = load volatile float, float addrspace(1)* undef 737 %y = load volatile float, float addrspace(1)* undef 738 %cmp = icmp eq i32 %c, 0 739 %add = fadd float %x, 4.0 740 %fneg = fsub float -0.0, %add 741 %select = select i1 %cmp, float %fneg, float 2.0 742 store volatile float %select, float addrspace(1)* undef 743 ret void 744 } 745 746 ; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32: 747 ; GCN: buffer_load_dword [[X:v[0-9]+]] 748 749 ; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]] 750 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc 751 ; GCN-NEXT: buffer_store_dword [[SELECT]] 752 define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 { 753 %x = load volatile float, float addrspace(1)* undef 754 %cmp = icmp eq i32 %c, 0 755 %add = fsub float %x, 4.0 756 %fneg = fsub float -0.0, %add 757 %select = select i1 %cmp, float %fneg, float 2.0 758 store volatile float %select, float addrspace(1)* undef 759 ret void 760 } 761 762 ; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32: 763 ; GCN: buffer_load_dword [[X:v[0-9]+]] 764 765 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]] 766 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc 767 ; GCN-NEXT: buffer_store_dword [[SELECT]] 768 define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 { 769 %x = load volatile float, float addrspace(1)* undef 770 %cmp = icmp eq i32 %c, 0 771 %mul = fmul float %x, 4.0 772 %fneg = fsub float -0.0, %mul 773 %select = select i1 %cmp, float %fneg, float 2.0 774 store volatile float %select, float addrspace(1)* undef 775 ret void 776 } 777 778 ; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32: 779 ; GCN: buffer_load_dword [[X:v[0-9]+]] 780 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 781 782 ; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]] 783 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc 784 ; GCN-NEXT: buffer_store_dword [[SELECT]] 785 define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 { 786 %x = load volatile float, float addrspace(1)* undef 787 %z = load volatile float, float addrspace(1)* undef 788 %cmp = icmp eq i32 %c, 0 789 %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z) 790 %fneg = fsub float -0.0, %fma 791 %select = select i1 %cmp, float %fneg, float 2.0 792 store volatile float %select, float addrspace(1)* undef 793 ret void 794 } 795 796 ; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32: 797 ; GCN: buffer_load_dword [[X:v[0-9]+]] 798 ; GCN: buffer_load_dword [[Z:v[0-9]+]] 799 800 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc 801 ; GCN-NEXT: buffer_store_dword [[SELECT]] 802 define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 { 803 %x = load volatile float, float addrspace(1)* undef 804 %z = load volatile float, float addrspace(1)* undef 805 %cmp = icmp eq i32 %c, 0 806 %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z) 807 %fneg = fsub float -0.0, %fmad 808 %select = select i1 %cmp, float %fneg, float 2.0 809 store volatile float %select, float addrspace(1)* undef 810 ret void 811 } 812 813 ; FIXME: This one should fold to rcp 814 ; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32: 815 ; GCN: buffer_load_dword [[X:v[0-9]+]] 816 817 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]] 818 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc 819 ; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]] 820 ; GCN-NEXT: buffer_store_dword [[NEG_SELECT]] 821 define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 { 822 %x = load volatile float, float addrspace(1)* undef 823 %y = load volatile float, float addrspace(1)* undef 824 %cmp = icmp eq i32 %c, 0 825 %rcp = call float @llvm.amdgcn.rcp.f32(float %x) 826 %fneg = fsub float -0.0, %rcp 827 %select = select i1 %cmp, float %fneg, float 2.0 828 store volatile float %select, float addrspace(1)* undef 829 ret void 830 } 831 832 declare float @llvm.fabs.f32(float) #1 833 declare float @llvm.fma.f32(float, float, float) #1 834 declare float @llvm.fmuladd.f32(float, float, float) #1 835 declare float @llvm.amdgcn.rcp.f32(float) #1 836 declare float @llvm.amdgcn.rcp.legacy(float) #1 837 declare float @llvm.amdgcn.fmul.legacy(float, float) #1 838 839 attributes #0 = { nounwind } 840 attributes #1 = { nounwind readnone } 841