Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
      2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s
      3 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s
      4 
      5 ; GCN-LABEL: mixlo_simple:
      6 ; GCN: s_waitcnt
      7 ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2{{$}}
      8 ; GFX9-NEXT: s_setpc_b64
      9 
     10 ; CIVI: v_mac_f32_e32
     11 ; CIVI: v_cvt_f16_f32_e32
     12 define half @mixlo_simple(float %src0, float %src1, float %src2) #0 {
     13   %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
     14   %cvt.result = fptrunc float %result to half
     15   ret half %cvt.result
     16 }
     17 
     18 ; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f16lo:
     19 ; GFX9: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
     20 ; CI: v_mac_f32
     21 ; CIVI: v_cvt_f16_f32
     22 define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
     23   %src0.ext = fpext half %src0 to float
     24   %src1.ext = fpext half %src1 to float
     25   %src2.ext = fpext half %src2 to float
     26   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     27   %cvt.result = fptrunc float %result to half
     28   ret half %cvt.result
     29 }
     30 
     31 ; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32:
     32 ; GCN: s_waitcnt
     33 ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}}
     34 ; GFX9-NEXT: s_setpc_b64
     35 
     36 ; CIVI: v_mac_f32
     37 define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
     38   %src0.ext = fpext half %src0 to float
     39   %src1.ext = fpext half %src1 to float
     40   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
     41   %cvt.result = fptrunc float %result to half
     42   ret half %cvt.result
     43 }
     44 
     45 ; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt:
     46 ; GCN: s_waitcnt
     47 ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
     48 ; GFX9-NEXT: s_setpc_b64
     49 
     50 ; CIVI: v_mac_f32_e32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]$}}
     51 define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 {
     52   %src0.ext = fpext half %src0 to float
     53   %src1.ext = fpext half %src1 to float
     54   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
     55   %cvt.result = fptrunc float %result to half
     56   %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0)
     57   %clamp = call half @llvm.minnum.f16(half %max, half 1.0)
     58   ret half %clamp
     59 }
     60 
     61 ; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt:
     62 ; GCN: s_waitcnt
     63 ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}}
     64 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
     65 ; GFX9-NEXT: s_setpc_b64
     66 
     67 ; CIVI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
     68 define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 {
     69   %src0.ext = fpext half %src0 to float
     70   %src1.ext = fpext half %src1 to float
     71   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
     72   %max = call float @llvm.maxnum.f32(float %result, float 0.0)
     73   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
     74   %cvt.result = fptrunc float %clamp to half
     75   ret half %cvt.result
     76 }
     77 
     78 ; FIXME: Should abe able to avoid extra register because first
     79 ; operation only clobbers relevant lane.
     80 ; GCN-LABEL: {{^}}v_mad_mix_v2f32:
     81 ; GCN: s_waitcnt
     82 ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
     83 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]{{$}}
     84 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
     85 ; GFX9-NEXT: s_setpc_b64
     86 define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
     87   %src0.ext = fpext <2 x half> %src0 to <2 x float>
     88   %src1.ext = fpext <2 x half> %src1 to <2 x float>
     89   %src2.ext = fpext <2 x half> %src2 to <2 x float>
     90   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
     91   %cvt.result = fptrunc <2 x float> %result to <2 x half>
     92   ret <2 x half> %cvt.result
     93 }
     94 
     95 ; GCN-LABEL: {{^}}v_mad_mix_v3f32:
     96 ; GCN: s_waitcnt
     97 ; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1]
     98 ; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1]
     99 ; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
    100 ; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
    101 ; GFX9-NEXT: v_mov_b32_e32 v0, v6
    102 ; GFX9-NEXT: v_mov_b32_e32 v1, v7
    103 ; GFX9-NEXT: s_setpc_b64
    104 define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
    105   %src0.ext = fpext <3 x half> %src0 to <3 x float>
    106   %src1.ext = fpext <3 x half> %src1 to <3 x float>
    107   %src2.ext = fpext <3 x half> %src2 to <3 x float>
    108   %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext)
    109   %cvt.result = fptrunc <3 x float> %result to <3 x half>
    110   ret <3 x half> %cvt.result
    111 }
    112 
    113 ; GCN-LABEL: {{^}}v_mad_mix_v4f32:
    114 ; GCN: s_waitcnt
    115 ; GFX9-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1]
    116 ; GFX9-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1]
    117 ; GFX9-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1]
    118 ; GFX9-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
    119 ; GFX9-NEXT: v_mov_b32_e32 v0, v7
    120 ; GFX9-NEXT: v_mov_b32_e32 v1, v6
    121 ; GFX9-NEXT: s_setpc_b64
    122 define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
    123   %src0.ext = fpext <4 x half> %src0 to <4 x float>
    124   %src1.ext = fpext <4 x half> %src1 to <4 x float>
    125   %src2.ext = fpext <4 x half> %src2 to <4 x float>
    126   %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext)
    127   %cvt.result = fptrunc <4 x float> %result to <4 x half>
    128   ret <4 x half> %cvt.result
    129 }
    130 
    131 ; FIXME: Fold clamp
    132 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt:
    133 ; GFX9: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
    134 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp{{$}}
    135 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
    136 ; GFX9-NEXT: s_setpc_b64
    137 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
    138   %src0.ext = fpext <2 x half> %src0 to <2 x float>
    139   %src1.ext = fpext <2 x half> %src1 to <2 x float>
    140   %src2.ext = fpext <2 x half> %src2 to <2 x float>
    141   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
    142   %cvt.result = fptrunc <2 x float> %result to <2 x half>
    143   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %cvt.result, <2 x half> zeroinitializer)
    144   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
    145   ret <2 x half> %clamp
    146 }
    147 
    148 ; FIXME: Should be packed into 2 registers per argument?
    149 ; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_postcvt:
    150 ; GCN: s_waitcnt
    151 ; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
    152 ; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp
    153 ; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
    154 ; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
    155 ; GFX9-NEXT: v_mov_b32_e32 v0, v6
    156 ; GFX9-NEXT: v_mov_b32_e32 v1, v7
    157 ; GFX9-NEXT: s_setpc_b64
    158 define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
    159   %src0.ext = fpext <3 x half> %src0 to <3 x float>
    160   %src1.ext = fpext <3 x half> %src1 to <3 x float>
    161   %src2.ext = fpext <3 x half> %src2 to <3 x float>
    162   %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext)
    163   %cvt.result = fptrunc <3 x float> %result to <3 x half>
    164   %max = call <3 x half> @llvm.maxnum.v3f16(<3 x half> %cvt.result, <3 x half> zeroinitializer)
    165   %clamp = call <3 x half> @llvm.minnum.v3f16(<3 x half> %max, <3 x half> <half 1.0, half 1.0, half 1.0>)
    166   ret <3 x half> %clamp
    167 }
    168 
    169 ; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_postcvt:
    170 ; GCN: s_waitcnt
    171 ; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp
    172 ; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
    173 ; GFX9-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
    174 ; GFX9-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
    175 ; GFX9-NEXT: v_mov_b32_e32 v0, v6
    176 ; GFX9-NEXT: v_mov_b32_e32 v1, v2
    177 ; GFX9-NEXT: s_setpc_b64
    178 define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
    179   %src0.ext = fpext <4 x half> %src0 to <4 x float>
    180   %src1.ext = fpext <4 x half> %src1 to <4 x float>
    181   %src2.ext = fpext <4 x half> %src2 to <4 x float>
    182   %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext)
    183   %cvt.result = fptrunc <4 x float> %result to <4 x half>
    184   %max = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %cvt.result, <4 x half> zeroinitializer)
    185   %clamp = call <4 x half> @llvm.minnum.v4f16(<4 x half> %max, <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>)
    186   ret <4 x half> %clamp
    187 }
    188 
    189 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_lo:
    190 ; GCN: s_waitcnt
    191 ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
    192 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
    193 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
    194 ; GFX9-NEXT: s_setpc_b64
    195 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
    196   %src0.ext = fpext <2 x half> %src0 to <2 x float>
    197   %src1.ext = fpext <2 x half> %src1 to <2 x float>
    198   %src2.ext = fpext <2 x half> %src2 to <2 x float>
    199   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
    200   %cvt.result = fptrunc <2 x float> %result to <2 x half>
    201   %cvt.lo = extractelement <2 x half> %cvt.result, i32 0
    202   %max.lo = call half @llvm.maxnum.f16(half %cvt.lo, half 0.0)
    203   %clamp.lo = call half @llvm.minnum.f16(half %max.lo, half 1.0)
    204   %insert = insertelement <2 x half> %cvt.result, half %clamp.lo, i32 0
    205   ret <2 x half> %insert
    206 }
    207 
    208 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_hi:
    209 ; GCN: s_waitcnt
    210 ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
    211 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
    212 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
    213 ; GFX9-NEXT: s_setpc_b64
    214 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
    215   %src0.ext = fpext <2 x half> %src0 to <2 x float>
    216   %src1.ext = fpext <2 x half> %src1 to <2 x float>
    217   %src2.ext = fpext <2 x half> %src2 to <2 x float>
    218   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
    219   %cvt.result = fptrunc <2 x float> %result to <2 x half>
    220   %cvt.hi = extractelement <2 x half> %cvt.result, i32 1
    221   %max.hi = call half @llvm.maxnum.f16(half %cvt.hi, half 0.0)
    222   %clamp.hi = call half @llvm.minnum.f16(half %max.hi, half 1.0)
    223   %insert = insertelement <2 x half> %cvt.result, half %clamp.hi, i32 1
    224   ret <2 x half> %insert
    225 }
    226 
    227 ; FIXME: Should be able to use mixlo/mixhi
    228 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_precvt:
    229 ; GFX9: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp
    230 ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
    231 ; GFX9: v_cvt_f16_f32_e32 v1, v3
    232 ; GFX9: v_cvt_f16_f32_e32 v0, v0
    233 ; GFX9: v_and_b32_e32 v1, 0xffff, v1
    234 ; GFX9: v_lshl_or_b32 v0, v0, 16, v1
    235 ; GFX9: s_setpc_b64
    236 define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
    237   %src0.ext = fpext <2 x half> %src0 to <2 x float>
    238   %src1.ext = fpext <2 x half> %src1 to <2 x float>
    239   %src2.ext = fpext <2 x half> %src2 to <2 x float>
    240   %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
    241   %max = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %result, <2 x float> zeroinitializer)
    242   %clamp = call <2 x float> @llvm.minnum.v2f32(<2 x float> %max, <2 x float> <float 1.0, float 1.0>)
    243   %cvt.result = fptrunc <2 x float> %clamp to <2 x half>
    244   ret <2 x half> %cvt.result
    245 }
    246 
    247 ; FIXME: Handling undef 4th component
    248 ; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_precvt:
    249 ; GFX9: v_mad_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
    250 ; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
    251 ; GFX9: v_mad_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp
    252 ; GFX9: v_mad_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1]
    253 
    254 ; GFX9: v_cvt_f16_f32
    255 ; GFX9: v_cvt_f16_f32
    256 ; GFX9: v_cvt_f16_f32
    257 ; GFX9: v_cvt_f16_f32
    258 define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 {
    259   %src0.ext = fpext <3 x half> %src0 to <3 x float>
    260   %src1.ext = fpext <3 x half> %src1 to <3 x float>
    261   %src2.ext = fpext <3 x half> %src2 to <3 x float>
    262   %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext)
    263   %max = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %result, <3 x float> zeroinitializer)
    264   %clamp = call <3 x float> @llvm.minnum.v3f32(<3 x float> %max, <3 x float> <float 1.0, float 1.0, float 1.0>)
    265   %cvt.result = fptrunc <3 x float> %clamp to <3 x half>
    266   ret <3 x half> %cvt.result
    267 }
    268 
    269 ; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_precvt:
    270 ; GFX9: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
    271 ; GFX9: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp
    272 ; GFX9: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
    273 ; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp
    274 
    275 ; GFX9: v_cvt_f16_f32
    276 ; GFX9: v_cvt_f16_f32
    277 ; GFX9: v_cvt_f16_f32
    278 ; GFX9: v_cvt_f16_f32
    279 define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 {
    280   %src0.ext = fpext <4 x half> %src0 to <4 x float>
    281   %src1.ext = fpext <4 x half> %src1 to <4 x float>
    282   %src2.ext = fpext <4 x half> %src2 to <4 x float>
    283   %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext)
    284   %max = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %result, <4 x float> zeroinitializer)
    285   %clamp = call <4 x float> @llvm.minnum.v4f32(<4 x float> %max, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
    286   %cvt.result = fptrunc <4 x float> %clamp to <4 x half>
    287   ret <4 x half> %cvt.result
    288 }
    289 
    290 declare half @llvm.minnum.f16(half, half) #1
    291 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
    292 declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) #1
    293 declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) #1
    294 
    295 declare half @llvm.maxnum.f16(half, half) #1
    296 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
    297 declare <3 x half> @llvm.maxnum.v3f16(<3 x half>, <3 x half>) #1
    298 declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>) #1
    299 
    300 declare float @llvm.minnum.f32(float, float) #1
    301 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
    302 declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1
    303 declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
    304 
    305 declare float @llvm.maxnum.f32(float, float) #1
    306 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
    307 declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1
    308 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
    309 
    310 declare float @llvm.fmuladd.f32(float, float, float) #1
    311 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
    312 declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1
    313 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
    314 
    315 attributes #0 = { nounwind }
    316 attributes #1 = { nounwind readnone speculatable }
    317