1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s 2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s 3 ; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s 4 ; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s 5 6 ; fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 7 8 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32: 9 ; GCN: s_waitcnt 10 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}} 11 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 12 13 ; GFX9-F32DENORM-NEXT: v_mul_f16 14 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 15 ; GFX9-F32DENORM-NEXT: v_add_f32 16 define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 17 entry: 18 %mul = fmul half %x, %y 19 %mul.ext = fpext half %mul to float 20 %add = fadd float %mul.ext, %z 21 ret float %add 22 } 23 24 ; f16->f64 is not free. 25 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f64: 26 ; GFX89: v_mul_f16 27 ; GFX89: v_cvt_f32_f16 28 ; GFX89: v_cvt_f64_f32 29 ; GFX89: v_add_f64 30 define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) #0 { 31 entry: 32 %mul = fmul half %x, %y 33 %mul.ext = fpext half %mul to double 34 %add = fadd double %mul.ext, %z 35 ret double %add 36 } 37 38 ; f32->f64 is not free. 39 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f32_to_f64: 40 ; GCN: v_mul_f32 41 ; GCN: v_cvt_f64_f32 42 ; GCN: v_add_f64 43 define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) #0 { 44 entry: 45 %mul = fmul float %x, %y 46 %mul.ext = fpext float %mul to double 47 %add = fadd double %mul.ext, %z 48 ret double %add 49 } 50 51 ; fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) 52 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32_commute: 53 ; GCN: s_waitcnt 54 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}} 55 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 56 57 ; GFX9-F32DENORM-NEXT: v_mul_f16 58 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 59 ; GFX9-F32DENORM-NEXT: v_add_f32 60 ; GFX9-F32DENORM-NEXT: s_setpc_b64 61 define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) #0 { 62 entry: 63 %mul = fmul half %x, %y 64 %mul.ext = fpext half %mul to float 65 %add = fadd float %z, %mul.ext 66 ret float %add 67 } 68 69 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z) 70 ; -> (fma x, y, (fma (fpext u), (fpext v), z)) 71 72 ; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32: 73 ; GCN: s_waitcnt 74 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 75 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 76 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 77 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 78 79 ; GFX9-F32DENORM-NEXT: v_mul_f16 80 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 81 ; GFX9-F32DENORM-NEXT: v_fma_f32 82 ; GFX9-F32DENORM-NEXT: v_add_f32 83 ; GFX9-F32DENORM-NEXT: s_setpc_b64 84 define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { 85 entry: 86 %mul = fmul half %u, %v 87 %mul.ext = fpext half %mul to float 88 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) 89 %add = fadd float %fma, %z 90 ret float %add 91 } 92 93 ; fold (fadd x, (fma y, z, (fpext (fmul u, v))) 94 ; -> (fma y, z, (fma (fpext u), (fpext v), x)) 95 ; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32_commute: 96 ; GCN: s_waitcnt 97 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 98 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 99 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 100 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 101 102 ; GFX9-F32DENORM-NEXT: v_mul_f16 103 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 104 ; GFX9-F32DENORM-NEXT: v_fma_f32 105 ; GFX9-F32DENORM-NEXT: v_add_f32 106 ; GFX9-F32DENORM-NEXT: s_setpc_b64 107 define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 { 108 entry: 109 %mul = fmul half %u, %v 110 %mul.ext = fpext half %mul to float 111 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) 112 %add = fadd float %z, %fma 113 ret float %add 114 } 115 116 ; GCN-LABEL: {{^}}fadd_fmad_fpext_fmul_f16_to_f32: 117 ; GCN: s_waitcnt 118 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] 119 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 120 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 121 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 122 123 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 124 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 125 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 126 define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { 127 entry: 128 %mul = fmul half %u, %v 129 %mul.ext = fpext half %mul to float 130 %mul1 = fmul contract float %x, %y 131 %fmad = fadd contract float %mul1, %mul.ext 132 %add = fadd float %fmad, %z 133 ret float %add 134 } 135 136 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z) 137 ; -> (fma x, y, (fma (fpext u), (fpext v), z)) 138 139 ; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32: 140 ; GCN: s_waitcnt 141 ; GFX89: v_mul_f16 142 ; GFX89: v_cvt_f32_f16 143 ; GFX89: v_fma_f32 144 ; GFX89: v_add_f32 145 define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { 146 entry: 147 %mul = fmul contract half %u, %v 148 %mul.ext = fpext half %mul to float 149 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext) 150 %add = fadd float %fma, %z 151 ret float %add 152 } 153 154 ; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32_commute: 155 ; GCN: s_waitcnt 156 ; GFX89: v_mul_f16 157 ; GFX89: v_cvt_f32_f16 158 ; GFX89: v_fma_f32 159 ; GFX89: v_add_f32 160 define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 { 161 entry: 162 %mul = fmul contract half %u, %v 163 %mul.ext = fpext half %mul to float 164 %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext) 165 %add = fadd float %z, %fma 166 ret float %add 167 } 168 169 ; fold (fadd x, (fpext (fma y, z, (fmul u, v))) 170 ; -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) 171 172 ; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32: 173 ; GFX9: v_mul_f16 174 ; GFX9: v_fma_legacy_f16 175 ; GFX9: v_cvt_f32_f16 176 ; GFX9: v_add_f32_e32 177 define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { 178 entry: 179 %mul = fmul contract half %u, %v 180 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul) 181 %ext.fma = fpext half %fma to float 182 %add = fadd float %x, %ext.fma 183 ret float %add 184 } 185 186 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32: 187 ; GFX9: v_mul_f16 188 ; GFX9: v_fma_legacy_f16 189 ; GFX9: v_cvt_f32_f16 190 ; GFX9: v_add_f32_e32 191 define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { 192 entry: 193 %mul = fmul contract half %u, %v 194 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul) 195 %ext.fma = fpext half %fma to float 196 %add = fadd float %x, %ext.fma 197 ret float %add 198 } 199 200 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute: 201 ; GFX9: v_mul_f16 202 ; GFX9: v_fma_legacy_f16 203 ; GFX9: v_cvt_f32_f16 204 ; GFX9: v_add_f32_e32 205 define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 { 206 entry: 207 %mul = fmul contract half %u, %v 208 %fma = call half @llvm.fma.f16(half %y, half %z, half %mul) 209 %ext.fma = fpext half %fma to float 210 %add = fadd float %ext.fma, %x 211 ret float %add 212 } 213 214 ; fold (fsub (fpext (fmul x, y)), z) 215 ; -> (fma (fpext x), (fpext y), (fneg z)) 216 217 ; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32: 218 ; GCN: s_waitcnt 219 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]{{$}} 220 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 221 222 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 223 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 224 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 225 ; GFX9-F32DENORM-NEXT: s_setpc_b64 226 define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 227 entry: 228 %mul = fmul half %x, %y 229 %mul.ext = fpext half %mul to float 230 %add = fsub float %mul.ext, %z 231 ret float %add 232 } 233 234 ; fold (fsub x, (fpext (fmul y, z))) 235 ; -> (fma (fneg (fpext y)), (fpext z), x) 236 237 ; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32_commute: 238 ; GCN: s_waitcnt 239 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] 240 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 241 242 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 243 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 244 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 245 ; GFX9-F32DENORM-NEXT: s_setpc_b64 246 define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 { 247 entry: 248 %mul = fmul contract half %y, %z 249 %mul.ext = fpext half %mul to float 250 %add = fsub contract float %x, %mul.ext 251 ret float %add 252 } 253 254 ; fold (fsub (fpext (fneg (fmul, x, y))), z) 255 ; -> (fneg (fma (fpext x), (fpext y), z)) 256 257 ; GCN-LABEL: {{^}}fsub_fpext_fneg_fmul_f16_to_f32: 258 ; GCN: s_waitcnt 259 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}} 260 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 261 262 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 263 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 264 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 265 ; GFX9-F32DENORM-NEXT: s_setpc_b64 266 define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 267 entry: 268 %mul = fmul half %x, %y 269 %neg.mul = fsub half -0.0, %mul 270 %neg.mul.ext = fpext half %neg.mul to float 271 %add = fsub float %neg.mul.ext, %z 272 ret float %add 273 } 274 275 ; fold (fsub (fneg (fpext (fmul, x, y))), z) 276 ; -> (fneg (fma (fpext x)), (fpext y), z) 277 278 ; GCN-LABEL: {{^}}fsub_fneg_fpext_fmul_f16_to_f32: 279 ; GCN: s_waitcnt 280 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}} 281 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 282 283 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 284 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 285 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 286 ; GFX9-F32DENORM-NEXT: s_setpc_b64 287 define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { 288 entry: 289 %mul = fmul half %x, %y 290 %mul.ext = fpext half %mul to float 291 %neg.mul.ext = fsub float -0.0, %mul.ext 292 %add = fsub float %neg.mul.ext, %z 293 ret float %add 294 } 295 296 ; fold (fsub (fmad x, y, (fpext (fmul u, v))), z) 297 ; -> (fmad x, y (fmad (fpext u), (fpext v), (fneg z))) 298 ; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32: 299 ; GCN: s_waitcnt 300 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v3, v4, -v2 op_sel_hi:[1,1,0]{{$}} 301 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 302 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 303 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 304 305 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 306 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 307 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v3 308 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 309 ; GFX9-F32DENORM-NEXT: s_setpc_b64 310 define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 { 311 entry: 312 %mul = fmul half %u, %v 313 %mul.ext = fpext half %mul to float 314 %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) 315 %add = fsub float %fma, %z 316 ret float %add 317 } 318 319 ; fold (fsub (fpext (fmad x, y, (fmul u, v))), z) 320 ; -> (fmad (fpext x), (fpext y), 321 ; (fmad (fpext u), (fpext v), (fneg z))) 322 323 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32: 324 ; GFX9: v_mul_f16 325 ; GFX9: v_fma_legacy_f16 326 ; GFX9: v_cvt_f32_f16 327 ; GFX9: v_sub_f32 328 ; GCN: s_setpc_b64 329 define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) #0 { 330 entry: 331 %mul = fmul half %u, %v 332 %fma = call half @llvm.fmuladd.f16(half %x, half %y, half %mul) 333 %fma.ext = fpext half %fma to float 334 %add = fsub float %fma.ext, %z 335 ret float %add 336 } 337 338 ; fold (fsub x, (fmad y, z, (fpext (fmul u, v)))) 339 ; -> (fmad (fneg y), z, (fmad (fneg (fpext u)), (fpext v), x)) 340 ; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32_commute: 341 ; GCN: s_waitcnt 342 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v3, v4, v0 op_sel_hi:[1,1,0]{{$}} 343 ; GFX9-F32FLUSH-NEXT: v_mad_f32 v0, -v1, v2, v0{{$}} 344 ; GFX9-F32FLUSH-NEXT: s_setpc_b64 345 346 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 347 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 348 ; GFX9-F32DENORM-NEXT: v_fma_f32 v1, v1, v2, v3 349 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1 350 ; GFX9-F32DENORM-NEXT: s_setpc_b64 351 define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 { 352 entry: 353 %mul = fmul half %u, %v 354 %mul.ext = fpext half %mul to float 355 %fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext) 356 %add = fsub float %x, %fma 357 ret float %add 358 } 359 360 ; fold (fsub x, (fpext (fma y, z, (fmul u, v)))) 361 ; -> (fma (fneg (fpext y)), (fpext z), 362 ; (fma (fneg (fpext u)), (fpext v), x)) 363 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute: 364 ; GCN: s_waitcnt 365 ; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4 366 ; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3 367 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 368 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1 369 ; GFX9-NEXT: s_setpc_b64 370 define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 { 371 entry: 372 %mul = fmul half %u, %v 373 %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul) 374 %fma.ext = fpext half %fma to float 375 %add = fsub float %x, %fma.ext 376 ret float %add 377 } 378 379 declare float @llvm.fmuladd.f32(float, float, float) #0 380 declare float @llvm.fma.f32(float, float, float) #0 381 declare half @llvm.fmuladd.f16(half, half, half) #0 382 declare half @llvm.fma.f16(half, half, half) #0 383 384 attributes #0 = { nounwind readnone speculatable } 385