Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s
      2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s
      4 ; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s
      5 
      6 ;  fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
      7 
      8 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32:
      9 ; GCN: s_waitcnt
     10 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
     11 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
     12 
     13 ; GFX9-F32DENORM-NEXT: v_mul_f16
     14 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
     15 ; GFX9-F32DENORM-NEXT: v_add_f32
     16 define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
     17 entry:
     18   %mul = fmul half %x, %y
     19   %mul.ext = fpext half %mul to float
     20   %add = fadd float %mul.ext, %z
     21   ret float %add
     22 }
     23 
     24 ; f16->f64 is not free.
     25 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f64:
     26 ; GFX89: v_mul_f16
     27 ; GFX89: v_cvt_f32_f16
     28 ; GFX89: v_cvt_f64_f32
     29 ; GFX89: v_add_f64
     30 define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) #0 {
     31 entry:
     32   %mul = fmul half %x, %y
     33   %mul.ext = fpext half %mul to double
     34   %add = fadd double %mul.ext, %z
     35   ret double %add
     36 }
     37 
     38 ; f32->f64 is not free.
     39 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f32_to_f64:
     40 ; GCN: v_mul_f32
     41 ; GCN: v_cvt_f64_f32
     42 ; GCN: v_add_f64
     43 define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) #0 {
     44 entry:
     45   %mul = fmul float %x, %y
     46   %mul.ext = fpext float %mul to double
     47   %add = fadd double %mul.ext, %z
     48   ret double %add
     49 }
     50 
     51 ; fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
     52 ; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32_commute:
     53 ; GCN: s_waitcnt
     54 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
     55 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
     56 
     57 ; GFX9-F32DENORM-NEXT: v_mul_f16
     58 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
     59 ; GFX9-F32DENORM-NEXT: v_add_f32
     60 ; GFX9-F32DENORM-NEXT: s_setpc_b64
     61 define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) #0 {
     62 entry:
     63   %mul = fmul half %x, %y
     64   %mul.ext = fpext half %mul to float
     65   %add = fadd float %z, %mul.ext
     66   ret float %add
     67 }
     68 
     69 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
     70 ;   -> (fma x, y, (fma (fpext u), (fpext v), z))
     71 
     72 ; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32:
     73 ; GCN: s_waitcnt
     74 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
     75 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
     76 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
     77 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
     78 
     79 ; GFX9-F32DENORM-NEXT: v_mul_f16
     80 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
     81 ; GFX9-F32DENORM-NEXT: v_fma_f32
     82 ; GFX9-F32DENORM-NEXT: v_add_f32
     83 ; GFX9-F32DENORM-NEXT: s_setpc_b64
     84 define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
     85 entry:
     86   %mul = fmul half %u, %v
     87   %mul.ext = fpext half %mul to float
     88   %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
     89   %add = fadd float %fma, %z
     90   ret float %add
     91 }
     92 
     93 ; fold (fadd x, (fma y, z, (fpext (fmul u, v)))
     94 ;   -> (fma y, z, (fma (fpext u), (fpext v), x))
     95 ; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32_commute:
     96 ; GCN: s_waitcnt
     97 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
     98 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
     99 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
    100 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
    101 
    102 ; GFX9-F32DENORM-NEXT: v_mul_f16
    103 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16
    104 ; GFX9-F32DENORM-NEXT: v_fma_f32
    105 ; GFX9-F32DENORM-NEXT: v_add_f32
    106 ; GFX9-F32DENORM-NEXT: s_setpc_b64
    107 define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
    108 entry:
    109   %mul = fmul half %u, %v
    110   %mul.ext = fpext half %mul to float
    111   %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
    112   %add = fadd float %z, %fma
    113   ret float %add
    114 }
    115 
    116 ; GCN-LABEL: {{^}}fadd_fmad_fpext_fmul_f16_to_f32:
    117 ; GCN: s_waitcnt
    118 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0]
    119 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
    120 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
    121 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
    122 
    123 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3
    124 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2
    125 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
    126 define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
    127 entry:
    128   %mul = fmul half %u, %v
    129   %mul.ext = fpext half %mul to float
    130   %mul1 = fmul contract float %x, %y
    131   %fmad = fadd contract float %mul1, %mul.ext
    132   %add = fadd float %fmad, %z
    133   ret float %add
    134 }
    135 
    136 ; fold (fadd (fma x, y, (fpext (fmul u, v))), z)
    137 ;   -> (fma x, y, (fma (fpext u), (fpext v), z))
    138 
    139 ; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32:
    140 ; GCN: s_waitcnt
    141 ; GFX89: v_mul_f16
    142 ; GFX89: v_cvt_f32_f16
    143 ; GFX89: v_fma_f32
    144 ; GFX89: v_add_f32
    145 define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 {
    146 entry:
    147   %mul = fmul contract half %u, %v
    148   %mul.ext = fpext half %mul to float
    149   %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext)
    150   %add = fadd float %fma, %z
    151   ret float %add
    152 }
    153 
    154 ; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32_commute:
    155 ; GCN: s_waitcnt
    156 ; GFX89: v_mul_f16
    157 ; GFX89: v_cvt_f32_f16
    158 ; GFX89: v_fma_f32
    159 ; GFX89: v_add_f32
    160 define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 {
    161 entry:
    162   %mul = fmul contract half %u, %v
    163   %mul.ext = fpext half %mul to float
    164   %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext)
    165   %add = fadd float %z, %fma
    166   ret float %add
    167 }
    168 
    169 ; fold (fadd x, (fpext (fma y, z, (fmul u, v)))
    170 ;   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
    171 
    172 ; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32:
    173 ; GFX9: v_mul_f16
    174 ; GFX9: v_fma_legacy_f16
    175 ; GFX9: v_cvt_f32_f16
    176 ; GFX9: v_add_f32_e32
    177 define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
    178 entry:
    179   %mul = fmul contract half %u, %v
    180   %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul)
    181   %ext.fma = fpext half %fma to float
    182   %add = fadd float %x, %ext.fma
    183   ret float %add
    184 }
    185 
    186 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32:
    187 ; GFX9: v_mul_f16
    188 ; GFX9: v_fma_legacy_f16
    189 ; GFX9: v_cvt_f32_f16
    190 ; GFX9: v_add_f32_e32
    191 define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 {
    192 entry:
    193   %mul = fmul contract half %u, %v
    194   %fma = call half @llvm.fma.f16(half %y, half %z, half %mul)
    195   %ext.fma = fpext half %fma to float
    196   %add = fadd float %x, %ext.fma
    197   ret float %add
    198 }
    199 
    200 ; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute:
    201 ; GFX9: v_mul_f16
    202 ; GFX9: v_fma_legacy_f16
    203 ; GFX9: v_cvt_f32_f16
    204 ; GFX9: v_add_f32_e32
    205 define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
    206 entry:
    207   %mul = fmul contract half %u, %v
    208   %fma = call half @llvm.fma.f16(half %y, half %z, half %mul)
    209   %ext.fma = fpext half %fma to float
    210   %add = fadd float %ext.fma, %x
    211   ret float %add
    212 }
    213 
    214 ; fold (fsub (fpext (fmul x, y)), z)
    215 ;   -> (fma (fpext x), (fpext y), (fneg z))
    216 
    217 ; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32:
    218 ; GCN: s_waitcnt
    219 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]{{$}}
    220 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
    221 
    222 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
    223 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
    224 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
    225 ; GFX9-F32DENORM-NEXT: s_setpc_b64
    226 define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
    227 entry:
    228   %mul = fmul half %x, %y
    229   %mul.ext = fpext half %mul to float
    230   %add = fsub float %mul.ext, %z
    231   ret float %add
    232 }
    233 
    234 ; fold (fsub x, (fpext (fmul y, z)))
    235 ;   -> (fma (fneg (fpext y)), (fpext z), x)
    236 
    237 ; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32_commute:
    238 ; GCN: s_waitcnt
    239 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0]
    240 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
    241 
    242 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32
    243 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32
    244 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32
    245 ; GFX9-F32DENORM-NEXT: s_setpc_b64
    246 define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 {
    247 entry:
    248   %mul = fmul contract half %y, %z
    249   %mul.ext = fpext half %mul to float
    250   %add = fsub contract float %x, %mul.ext
    251   ret float %add
    252 }
    253 
    254 ; fold (fsub (fpext (fneg (fmul, x, y))), z)
    255 ;   -> (fneg (fma (fpext x), (fpext y), z))
    256 
    257 ; GCN-LABEL: {{^}}fsub_fpext_fneg_fmul_f16_to_f32:
    258 ; GCN: s_waitcnt
    259 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}}
    260 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
    261 
    262 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
    263 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
    264 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
    265 ; GFX9-F32DENORM-NEXT: s_setpc_b64
    266 define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
    267 entry:
    268   %mul = fmul half %x, %y
    269   %neg.mul = fsub half -0.0, %mul
    270   %neg.mul.ext = fpext half %neg.mul to float
    271   %add = fsub float %neg.mul.ext, %z
    272   ret float %add
    273 }
    274 
    275 ; fold (fsub (fneg (fpext (fmul, x, y))), z)
    276 ;   -> (fneg (fma (fpext x)), (fpext y), z)
    277 
    278 ; GCN-LABEL: {{^}}fsub_fneg_fpext_fmul_f16_to_f32:
    279 ; GCN: s_waitcnt
    280 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}}
    281 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
    282 
    283 ; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1
    284 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0
    285 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
    286 ; GFX9-F32DENORM-NEXT: s_setpc_b64
    287 define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
    288 entry:
    289   %mul = fmul half %x, %y
    290   %mul.ext = fpext half %mul to float
    291   %neg.mul.ext = fsub float -0.0, %mul.ext
    292   %add = fsub float %neg.mul.ext, %z
    293   ret float %add
    294 }
    295 
    296 ; fold (fsub (fmad x, y, (fpext (fmul u, v))), z)
    297 ;    -> (fmad x, y (fmad (fpext u), (fpext v), (fneg z)))
    298 ; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32:
    299 ; GCN: s_waitcnt
    300 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v3, v4, -v2 op_sel_hi:[1,1,0]{{$}}
    301 ; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1
    302 ; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2
    303 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
    304 
    305 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
    306 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
    307 ; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v3
    308 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2
    309 ; GFX9-F32DENORM-NEXT: s_setpc_b64
    310 define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 {
    311 entry:
    312   %mul = fmul half %u, %v
    313   %mul.ext = fpext half %mul to float
    314   %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
    315   %add = fsub float %fma, %z
    316   ret float %add
    317 }
    318 
    319 ;  fold (fsub (fpext (fmad x, y, (fmul u, v))), z)
    320 ;    -> (fmad (fpext x), (fpext y),
    321 ;            (fmad (fpext u), (fpext v), (fneg z)))
    322 
    323 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32:
    324 ; GFX9: v_mul_f16
    325 ; GFX9: v_fma_legacy_f16
    326 ; GFX9: v_cvt_f32_f16
    327 ; GFX9: v_sub_f32
    328 ; GCN: s_setpc_b64
    329 define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) #0 {
    330 entry:
    331   %mul = fmul half %u, %v
    332   %fma = call half @llvm.fmuladd.f16(half %x, half %y, half %mul)
    333   %fma.ext = fpext half %fma to float
    334   %add = fsub float %fma.ext, %z
    335   ret float %add
    336 }
    337 
    338 ; fold (fsub x, (fmad y, z, (fpext (fmul u, v))))
    339 ;   -> (fmad (fneg y), z, (fmad (fneg (fpext u)), (fpext v), x))
    340 ; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32_commute:
    341 ; GCN: s_waitcnt
    342 ; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v3, v4, v0 op_sel_hi:[1,1,0]{{$}}
    343 ; GFX9-F32FLUSH-NEXT: v_mad_f32 v0, -v1, v2, v0{{$}}
    344 ; GFX9-F32FLUSH-NEXT: s_setpc_b64
    345 
    346 ; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4
    347 ; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3
    348 ; GFX9-F32DENORM-NEXT: v_fma_f32 v1, v1, v2, v3
    349 ; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1
    350 ; GFX9-F32DENORM-NEXT: s_setpc_b64
    351 define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 {
    352 entry:
    353   %mul = fmul half %u, %v
    354   %mul.ext = fpext half %mul to float
    355   %fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext)
    356   %add = fsub float %x, %fma
    357   ret float %add
    358 }
    359 
    360 ; fold (fsub x, (fpext (fma y, z, (fmul u, v))))
    361 ;    -> (fma (fneg (fpext y)), (fpext z),
    362 ;            (fma (fneg (fpext u)), (fpext v), x))
    363 ; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute:
    364 ; GCN: s_waitcnt
    365 ; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4
    366 ; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3
    367 ; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1
    368 ; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1
    369 ; GFX9-NEXT: s_setpc_b64
    370 define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 {
    371 entry:
    372   %mul = fmul half %u, %v
    373   %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul)
    374   %fma.ext = fpext half %fma to float
    375   %add = fsub float %x, %fma.ext
    376   ret float %add
    377 }
    378 
    379 declare float @llvm.fmuladd.f32(float, float, float) #0
    380 declare float @llvm.fma.f32(float, float, float) #0
    381 declare half @llvm.fmuladd.f16(half, half, half) #0
    382 declare half @llvm.fma.f16(half, half, half) #0
    383 
    384 attributes #0 = { nounwind readnone speculatable }
    385