Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900,GFX9 %s
      2 ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX906,GFX9 %s
      3 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s
      4 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s
      5 
      6 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo:
      7 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c]
      8 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c]
      9 ; VI: v_mac_f32
     10 ; CI: v_mad_f32
     11 define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
     12   %src0.ext = fpext half %src0 to float
     13   %src1.ext = fpext half %src1 to float
     14   %src2.ext = fpext half %src2 to float
     15   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     16   ret float %result
     17 }
     18 
     19 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_int:
     20 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
     21 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
     22 ; CIVI: v_mac_f32
     23 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 {
     24   %src0.hi = lshr i32 %src0, 16
     25   %src1.hi = lshr i32 %src1, 16
     26   %src2.hi = lshr i32 %src2, 16
     27   %src0.i16 = trunc i32 %src0.hi to i16
     28   %src1.i16 = trunc i32 %src1.hi to i16
     29   %src2.i16 = trunc i32 %src2.hi to i16
     30   %src0.fp16 = bitcast i16 %src0.i16 to half
     31   %src1.fp16 = bitcast i16 %src1.i16 to half
     32   %src2.fp16 = bitcast i16 %src2.i16 to half
     33   %src0.ext = fpext half %src0.fp16 to float
     34   %src1.ext = fpext half %src1.fp16 to float
     35   %src2.ext = fpext half %src2.fp16 to float
     36   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     37   ret float %result
     38 }
     39 
     40 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
     41 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
     42 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
     43 ; VI: v_mac_f32
     44 ; CI: v_mad_f32
     45 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
     46   %src0.hi = extractelement <2 x half> %src0, i32 1
     47   %src1.hi = extractelement <2 x half> %src1, i32 1
     48   %src2.hi = extractelement <2 x half> %src2, i32 1
     49   %src0.ext = fpext half %src0.hi to float
     50   %src1.ext = fpext half %src1.hi to float
     51   %src2.ext = fpext half %src2.hi to float
     52   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     53   ret float %result
     54 }
     55 
     56 ; GCN-LABEL: {{^}}v_mad_mix_v2f32:
     57 ; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
     58 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
     59 ; GFX900-NEXT: v_mov_b32_e32 v1, v3
     60 
     61 ; GFX906: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
     62 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
     63 ; GFX906-NEXT: v_mov_b32_e32 v1, v3
     64 
     65 ; CIVI: v_mac_f32
     66 define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
     67   %src0.ext = fpext <2 x half> %src0 to <2 x float>
     68   %src1.ext = fpext <2 x half> %src1 to <2 x float>
     69   %src2.ext = fpext <2 x half> %src2 to <2 x float>
     70   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
     71   ret <2 x float> %result
     72 }
     73 
     74 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_shuffle:
     75 ; GCN: s_waitcnt
     76 ; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
     77 ; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
     78 ; GFX900-NEXT: v_mov_b32_e32 v0, v3
     79 ; GFX900-NEXT: s_setpc_b64
     80 
     81 ; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
     82 ; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
     83 ; GFX906-NEXT: v_mov_b32_e32 v0, v3
     84 ; GFX906-NEXT: s_setpc_b64
     85 
     86 ; CIVI: v_mac_f32
     87 define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
     88   %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
     89   %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
     90   %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
     91   %src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
     92   %src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
     93   %src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
     94   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
     95   ret <2 x float> %result
     96 }
     97 
     98 ; GCN-LABEL: {{^}}v_mad_mix_f32_negf16lo_f16lo_f16lo:
     99 ; GFX900: s_waitcnt
    100 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
    101 ; GFX900-NEXT: s_setpc_b64
    102 
    103 ; GFX906: s_waitcnt
    104 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2  op_sel_hi:[1,1,1] ; encoding
    105 ; GFX906-NEXT: s_setpc_b64
    106 
    107 ; CIVI: v_mad_f32
    108 define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
    109   %src0.ext = fpext half %src0 to float
    110   %src1.ext = fpext half %src1 to float
    111   %src2.ext = fpext half %src2 to float
    112   %src0.ext.neg = fsub float -0.0, %src0.ext
    113   %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext)
    114   ret float %result
    115 }
    116 
    117 ; GCN-LABEL: {{^}}v_mad_mix_f32_absf16lo_f16lo_f16lo:
    118 ; GFX900: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
    119 ; GFX906: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
    120 
    121 ; CIVI: v_mad_f32
    122 define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
    123   %src0.ext = fpext half %src0 to float
    124   %src1.ext = fpext half %src1 to float
    125   %src2.ext = fpext half %src2 to float
    126   %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
    127   %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
    128   ret float %result
    129 }
    130 
    131 ; GCN-LABEL: {{^}}v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
    132 ; GFX900: s_waitcnt
    133 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
    134 ; GFX900-NEXT: s_setpc_b64
    135 
    136 ; GFX906: s_waitcnt
    137 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
    138 ; GFX906-NEXT: s_setpc_b64
    139 
    140 ; CIVI: v_mad_f32
    141 define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
    142   %src0.ext = fpext half %src0 to float
    143   %src1.ext = fpext half %src1 to float
    144   %src2.ext = fpext half %src2 to float
    145   %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
    146   %src0.ext.neg.abs = fsub float -0.0, %src0.ext.abs
    147   %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext)
    148   ret float %result
    149 }
    150 
    151 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32:
    152 ; GCN: s_waitcnt
    153 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    154 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    155 ; GFX9-NEXT: s_setpc_b64
    156 
    157 ; CIVI: v_mad_f32
    158 define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
    159   %src0.ext = fpext half %src0 to float
    160   %src1.ext = fpext half %src1 to float
    161   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
    162   ret float %result
    163 }
    164 
    165 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negf32:
    166 ; GCN: s_waitcnt
    167 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding
    168 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding
    169 ; GFX9-NEXT: s_setpc_b64
    170 
    171 ; CIVI: v_mad_f32
    172 define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 {
    173   %src0.ext = fpext half %src0 to float
    174   %src1.ext = fpext half %src1 to float
    175   %src2.neg = fsub float -0.0, %src2
    176   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg)
    177   ret float %result
    178 }
    179 
    180 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_absf32:
    181 ; GCN: s_waitcnt
    182 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding
    183 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding
    184 ; GFX9-NEXT: s_setpc_b64
    185 
    186 ; CIVI: v_mad_f32
    187 define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 {
    188   %src0.ext = fpext half %src0 to float
    189   %src1.ext = fpext half %src1 to float
    190   %src2.abs = call float @llvm.fabs.f32(float %src2)
    191   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs)
    192   ret float %result
    193 }
    194 
    195 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negabsf32:
    196 ; GCN: s_waitcnt
    197 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding
    198 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding
    199 ; GFX9-NEXT: s_setpc_b64
    200 
    201 ; CIVI: v_mad_f32
    202 define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 {
    203   %src0.ext = fpext half %src0 to float
    204   %src1.ext = fpext half %src1 to float
    205   %src2.abs = call float @llvm.fabs.f32(float %src2)
    206   %src2.neg.abs = fsub float -0.0, %src2.abs
    207   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs)
    208   ret float %result
    209 }
    210 
    211 ; TODO: Fold inline immediates. Need to be careful because it is an
    212 ; f16 inline immediate that may be converted to f32, not an actual f32
    213 ; inline immediate.
    214 
    215 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imm1:
    216 ; GCN: s_waitcnt
    217 ; GFX9: v_mov_b32_e32 v2, 1.0
    218 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    219 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    220 
    221 ; CIVI: v_mad_f32 v0, v0, v1, 1.0
    222 ; GCN-NEXT: s_setpc_b64
    223 define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
    224   %src0.ext = fpext half %src0 to float
    225   %src1.ext = fpext half %src1 to float
    226   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0)
    227   ret float %result
    228 }
    229 
    230 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
    231 ; GCN: s_waitcnt
    232 ; GFX9: v_mov_b32_e32 v2, 0.15915494
    233 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    234 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    235 ; VI: v_mad_f32 v0, v0, v1, 0.15915494
    236 define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
    237   %src0.ext = fpext half %src0 to float
    238   %src1.ext = fpext half %src1 to float
    239   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000)
    240   ret float %result
    241 }
    242 
    243 ; Attempt to break inline immediate folding. If the operand is
    244 ; interpreted as f32, the inline immediate is really the f16 inline
    245 ; imm value converted to f32.
    246 ;	fpext f16 1/2pi = 0x3e230000
    247 ;	      f32 1/2pi = 0x3e22f983
    248 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
    249 ; GFX9: v_mov_b32_e32 v2, 0x3e230000
    250 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    251 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    252 
    253 ; CIVI: v_madak_f32 v0, v0, v1, 0x3e230000
    254 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
    255   %src0.ext = fpext half %src0 to float
    256   %src1.ext = fpext half %src1 to float
    257   %src2 = fpext half 0xH3118 to float
    258   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
    259   ret float %result
    260 }
    261 
    262 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
    263 ; GFX9: v_mov_b32_e32 v2, 0x367c0000
    264 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    265 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    266 
    267 ; CIVI: v_madak_f32 v0, v0, v1, 0x367c0000
    268 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
    269   %src0.ext = fpext half %src0 to float
    270   %src1.ext = fpext half %src1 to float
    271   %src2 = fpext half 0xH003F to float
    272   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
    273   ret float %result
    274 }
    275 
    276 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imm1:
    277 ; GFX9: v_mov_b32_e32 v3, 1.0
    278 ; GFX900: v_mad_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
    279 ; GFX900: v_mad_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
    280 ; GFX900: v_mov_b32_e32 v1, v2
    281 
    282 ; GFX906: v_fma_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
    283 ; GFX906: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
    284 ; GFX906: v_mov_b32_e32 v1, v2
    285 define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
    286   %src0.ext = fpext <2 x half> %src0 to <2 x float>
    287   %src1.ext = fpext <2 x half> %src1 to <2 x float>
    288   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>)
    289   ret <2 x float> %result
    290 }
    291 
    292 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_cvtf16imminv2pi:
    293 ; GFX9: v_mov_b32_e32 v3, 0x3e230000
    294 
    295 ; GFX900: v_mad_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
    296 ; GFX900: v_mad_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
    297 ; GFX900: v_mov_b32_e32 v1, v2
    298 
    299 ; GFX906: v_fma_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
    300 ; GFX906: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
    301 ; GFX906: v_mov_b32_e32 v1, v2
    302 define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
    303   %src0.ext = fpext <2 x half> %src0 to <2 x float>
    304   %src1.ext = fpext <2 x half> %src1 to <2 x float>
    305   %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
    306   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2)
    307   ret <2 x float> %result
    308 }
    309 
    310 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imminv2pi:
    311 ; GFX9: v_mov_b32_e32 v3, 0.15915494
    312 
    313 ; GFX900: v_mad_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
    314 ; GFX900: v_mad_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
    315 ; GFX900: v_mov_b32_e32 v1, v2
    316 
    317 ; GFX906: v_fma_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
    318 ; GFX906: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding
    319 ; GFX906: v_mov_b32_e32 v1, v2
    320 define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
    321   %src0.ext = fpext <2 x half> %src0 to <2 x float>
    322   %src1.ext = fpext <2 x half> %src1 to <2 x float>
    323   %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
    324   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>)
    325   ret <2 x float> %result
    326 }
    327 
    328 ; GCN-LABEL: {{^}}v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
    329 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding
    330 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding
    331 ; VI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
    332 ; CI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
    333 define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
    334   %src0.hi = extractelement <2 x half> %src0, i32 1
    335   %src1.hi = extractelement <2 x half> %src1, i32 1
    336   %src2.hi = extractelement <2 x half> %src2, i32 1
    337   %src0.ext = fpext half %src0.hi to float
    338   %src1.ext = fpext half %src1.hi to float
    339   %src2.ext = fpext half %src2.hi to float
    340   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
    341   %max = call float @llvm.maxnum.f32(float %result, float 0.0)
    342   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
    343   ret float %clamp
    344 }
    345 
    346 ; GCN-LABEL: no_mix_simple:
    347 ; GCN: s_waitcnt
    348 ; GCN-NEXT: v_{{mad|fma}}_f32 v0, v0, v1, v2
    349 ; GCN-NEXT: s_setpc_b64
    350 define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
    351   %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
    352   ret float %result
    353 }
    354 
    355 ; GCN-LABEL: no_mix_simple_fabs:
    356 ; GCN: s_waitcnt
    357 ; CIVI-NEXT: v_mad_f32 v0, |v0|, v1, v2
    358 ; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2
    359 ; GFX906-NEXT: v_fma_f32 v0, v1, |v0|, v2
    360 ; GCN-NEXT: s_setpc_b64
    361 define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
    362   %src0.fabs = call float @llvm.fabs.f32(float %src0)
    363   %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2)
    364   ret float %result
    365 }
    366 
    367 ; FIXME: Should abe able to select in thits case
    368 ; All sources are converted from f16, so it doesn't matter
    369 ; v_mad_mix_f32 flushes.
    370 
    371 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
    372 ; GFX900: v_cvt_f32_f16
    373 ; GFX900: v_cvt_f32_f16
    374 ; GFX900: v_cvt_f32_f16
    375 ; GFX900: v_fma_f32
    376 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 {
    377   %src0.ext = fpext half %src0 to float
    378   %src1.ext = fpext half %src1 to float
    379   %src2.ext = fpext half %src2 to float
    380   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
    381   ret float %result
    382 }
    383 
    384 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals:
    385 ; GFX900: v_cvt_f32_f16
    386 ; GFX900: v_cvt_f32_f16
    387 ; GFX900: v_fma_f32
    388 
    389 ; GFX906-NOT: v_cvt_f32_f16
    390 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
    391 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 {
    392   %src0.ext = fpext half %src0 to float
    393   %src1.ext = fpext half %src1 to float
    394   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
    395   ret float %result
    396 }
    397 
    398 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
    399 ; GFX9: v_cvt_f32_f16
    400 ; GFX9: v_cvt_f32_f16
    401 ; GFX9: v_cvt_f32_f16
    402 ; GFX9: v_mul_f32
    403 ; GFX9: v_add_f32
    404 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 {
    405   %src0.ext = fpext half %src0 to float
    406   %src1.ext = fpext half %src1 to float
    407   %src2.ext = fpext half %src2 to float
    408   %mul = fmul float %src0.ext, %src1.ext
    409   %result = fadd float %mul, %src2.ext
    410   ret float %result
    411 }
    412 
    413 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
    414 ; GFX9: v_cvt_f32_f16
    415 ; GFX9: v_cvt_f32_f16
    416 ; GFX9: v_mul_f32
    417 ; GFX9: v_add_f32
    418 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 {
    419   %src0.ext = fpext half %src0 to float
    420   %src1.ext = fpext half %src1 to float
    421   %mul = fmul float %src0.ext, %src1.ext
    422   %result = fadd float %mul, %src2
    423   ret float %result
    424 }
    425 
    426 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
    427 ; GCN: s_waitcnt
    428 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
    429 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
    430 ; GFX9-NEXT: s_setpc_b64
    431 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 {
    432   %src0.ext = fpext half %src0 to float
    433   %src1.ext = fpext half %src1 to float
    434   %src2.ext = fpext half %src2 to float
    435   %mul = fmul contract float %src0.ext, %src1.ext
    436   %result = fadd contract float %mul, %src2.ext
    437   ret float %result
    438 }
    439 
    440 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
    441 ; GCN: s_waitcnt
    442 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    443 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
    444 ; GFX9-NEXT: s_setpc_b64
    445 define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 {
    446   %src0.ext = fpext half %src0 to float
    447   %src1.ext = fpext half %src1 to float
    448   %mul = fmul contract float %src0.ext, %src1.ext
    449   %result = fadd contract float %mul, %src2
    450   ret float %result
    451 }
    452 
    453 ; GCN-LABEL: {{^}}v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
    454 ; GFX9: s_waitcnt
    455 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
    456 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
    457 ; GFX9-NEXT: s_setpc_b64
    458 
    459 ; CIVI: v_mad_f32
    460 define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
    461   %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
    462   %src0 = extractelement <2 x half> %src0.arg.bc, i32 0
    463   %src0.neg = fsub half -0.0, %src0
    464   %src0.ext = fpext half %src0.neg to float
    465   %src1.ext = fpext half %src1 to float
    466   %src2.ext = fpext half %src2 to float
    467 ;  %src0.ext.neg = fsub float -0.0, %src0.ext
    468   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
    469   ret float %result
    470 }
    471 
    472 ; Make sure we don't fold pre-cvt fneg if we already have a fabs
    473 ; GCN-LABEL: {{^}}v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
    474 ; GFX900: s_waitcnt
    475 define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
    476   %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
    477   %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
    478   %src0.neg = fsub half -0.0, %src0
    479   %src0.ext = fpext half %src0.neg to float
    480   %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
    481   %src1.ext = fpext half %src1 to float
    482   %src2.ext = fpext half %src2 to float
    483   %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
    484   ret float %result
    485 }
    486 
    487 ; GCN-LABEL: {{^}}v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
    488 ; GFX9: s_waitcnt
    489 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
    490 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
    491 ; GFX9-NEXT: s_setpc_b64
    492 define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
    493   %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
    494   %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
    495   %src0.abs = call half @llvm.fabs.f16(half %src0)
    496   %src0.ext = fpext half %src0.abs to float
    497   %src1.ext = fpext half %src1 to float
    498   %src2.ext = fpext half %src2 to float
    499   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
    500   ret float %result
    501 }
    502 
    503 ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
    504 ; GFX9: s_waitcnt
    505 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
    506 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
    507 ; GFX9-NEXT: s_setpc_b64
    508 define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
    509   %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
    510   %fneg = fsub <2 x half> <half -0.0, half -0.0>, %src0.arg.bc
    511   %src0 = extractelement <2 x half> %fneg, i32 1
    512   %src0.ext = fpext half %src0 to float
    513   %src1.ext = fpext half %src1 to float
    514   %src2.ext = fpext half %src2 to float
    515   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
    516   ret float %result
    517 }
    518 
    519 ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
    520 ; GFX9: s_waitcnt
    521 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
    522 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
    523 ; GFX9-NEXT: s_setpc_b64
    524 define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
    525   %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
    526   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
    527   %src0 = extractelement <2 x half> %fabs, i32 1
    528   %src0.ext = fpext half %src0 to float
    529   %src1.ext = fpext half %src1 to float
    530   %src2.ext = fpext half %src2 to float
    531   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
    532   ret float %result
    533 }
    534 
    535 ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
    536 ; GFX9: s_waitcnt
    537 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
    538 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
    539 ; GFX9-NEXT: s_setpc_b64
    540 define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
    541   %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
    542   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
    543   %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
    544   %src0 = extractelement <2 x half> %fneg.fabs, i32 1
    545   %src0.ext = fpext half %src0 to float
    546   %src1.ext = fpext half %src1 to float
    547   %src2.ext = fpext half %src2 to float
    548   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
    549   ret float %result
    550 }
    551 
    552 declare half @llvm.fabs.f16(half) #2
    553 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
    554 declare float @llvm.fabs.f32(float) #2
    555 declare float @llvm.minnum.f32(float, float) #2
    556 declare float @llvm.maxnum.f32(float, float) #2
    557 declare float @llvm.fmuladd.f32(float, float, float) #2
    558 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
    559 
    560 attributes #0 = { nounwind "target-features"="-fp32-denormals" }
    561 attributes #1 = { nounwind "target-features"="+fp32-denormals" }
    562 attributes #2 = { nounwind readnone speculatable }
    563