Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
      2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s
      3 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s
      4 
      5 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
      6 ; GFX9: s_waitcnt
      7 ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2
      8 ; GFX9-NEXT: s_setpc_b64
      9 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %src1, half %src2) #0 {
     10   %src0.ext = fpext half %src0 to float
     11   %src1.ext = fpext half %src1 to float
     12   %src2.ext = fpext half %src2 to float
     13   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     14   %cvt.result = fptrunc float %result to half
     15   %vec.result = insertelement <2 x half> undef, half %cvt.result, i32 1
     16   ret <2 x half> %vec.result
     17 }
     18 
     19 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
     20 ; GFX9: s_waitcnt
     21 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00
     22 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2
     23 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
     24 ; GFX9-NEXT: s_setpc_b64
     25 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %src1, half %src2) #0 {
     26   %src0.ext = fpext half %src0 to float
     27   %src1.ext = fpext half %src1 to float
     28   %src2.ext = fpext half %src2 to float
     29   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     30   %cvt.result = fptrunc float %result to half
     31   %vec.result = insertelement <2 x half> <half 1.0, half undef>, half %cvt.result, i32 1
     32   ret <2 x half> %vec.result
     33 }
     34 
     35 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
     36 ; GFX9: s_waitcnt
     37 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2
     38 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
     39 ; GFX9-NEXT: s_setpc_b64
     40 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src1, half %src2, half %lo) #0 {
     41   %src0.ext = fpext half %src0 to float
     42   %src1.ext = fpext half %src1 to float
     43   %src2.ext = fpext half %src2 to float
     44   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     45   %cvt.result = fptrunc float %result to half
     46   %vec = insertelement <2 x half> undef, half %lo, i32 0
     47   %vec.result = insertelement <2 x half> %vec, half %cvt.result, i32 1
     48   ret <2 x half> %vec.result
     49 }
     50 
     51 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
     52 ; GFX9: v_mov_b32_e32 v3, 0
     53 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2
     54 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
     55 ; GFX9-NEXT: s_setpc_b64
     56 define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) #0 {
     57   %src0.ext = fpext half %src0 to float
     58   %src1.ext = fpext half %src1 to float
     59   %src2.ext = fpext half %src2 to float
     60   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     61   %cvt.result = fptrunc float %result to half
     62   %bc = bitcast half %cvt.result to i16
     63   %ext = zext i16 %bc to i32
     64   %shr = shl i32 %ext, 16
     65   ret i32 %shr
     66 }
     67 
     68 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext:
     69 ; GFX9: v_mov_b32_e32 v3, 0
     70 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2
     71 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
     72 ; GFX9-NEXT: s_setpc_b64
     73 define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) #0 {
     74   %src0.ext = fpext half %src0 to float
     75   %src1.ext = fpext half %src1 to float
     76   %src2.ext = fpext half %src2 to float
     77   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     78   %cvt.result = fptrunc float %result to half
     79   %bc = bitcast half %cvt.result to i16
     80   %ext = sext i16 %bc to i32
     81   %shr = shl i32 %ext, 16
     82   ret i32 %shr
     83 }
     84 
     85 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
     86 ; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
     87 ; GFX9: v_cvt_f16_f32_e32 v0, v0
     88 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 {
     89   %src0.ext = fpext half %src0 to float
     90   %src1.ext = fpext half %src1 to float
     91   %src2.ext = fpext half %src2 to float
     92   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
     93   %max = call float @llvm.maxnum.f32(float %result, float 0.0)
     94   %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
     95   %cvt.result = fptrunc float %clamp to half
     96   %vec.result = insertelement <2 x half> undef, half %cvt.result, i32 1
     97   ret <2 x half> %vec.result
     98 }
     99 
    100 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
    101 ; GCN: s_waitcnt
    102 ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
    103 ; GFX9-NEXT: s_setpc_b64
    104 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half %src0, half %src1, half %src2) #0 {
    105   %src0.ext = fpext half %src0 to float
    106   %src1.ext = fpext half %src1 to float
    107   %src2.ext = fpext half %src2 to float
    108   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
    109   %cvt.result = fptrunc float %result to half
    110   %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0)
    111   %clamp = call half @llvm.minnum.f16(half %max, half 1.0)
    112   %vec.result = insertelement <2 x half> undef, half %clamp, i32 1
    113   ret <2 x half> %vec.result
    114 }
    115 
    116 
    117 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
    118 ; GCN: s_waitcnt
    119 ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}}
    120 ; GFX9-NEXT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, v3
    121 ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}}
    122 ; GFX9-NEXT: s_waitcnt vmcnt(0)
    123 ; GFX9-NEXT: s_setpc_b64
    124 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use(half %src0, half %src1, half %src2) #0 {
    125   %src0.ext = fpext half %src0 to float
    126   %src1.ext = fpext half %src1 to float
    127   %src2.ext = fpext half %src2 to float
    128   %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
    129   %cvt.result = fptrunc float %result to half
    130   store volatile half %cvt.result, half addrspace(1)* undef
    131   %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0)
    132   %clamp = call half @llvm.minnum.f16(half %max, half 1.0)
    133   %vec.result = insertelement <2 x half> undef, half %clamp, i32 1
    134   ret <2 x half> %vec.result
    135 }
    136 
    137 declare half @llvm.minnum.f16(half, half) #1
    138 declare half @llvm.maxnum.f16(half, half) #1
    139 declare float @llvm.minnum.f32(float, float) #1
    140 declare float @llvm.maxnum.f32(float, float) #1
    141 declare float @llvm.fmuladd.f32(float, float, float) #1
    142 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
    143 
    144 attributes #0 = { nounwind }
    145 attributes #1 = { nounwind readnone speculatable }
    146