1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s 3 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s 4 5 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: 6 ; GFX9: s_waitcnt 7 ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 8 ; GFX9-NEXT: s_setpc_b64 9 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %src1, half %src2) #0 { 10 %src0.ext = fpext half %src0 to float 11 %src1.ext = fpext half %src1 to float 12 %src2.ext = fpext half %src2 to float 13 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 14 %cvt.result = fptrunc float %result to half 15 %vec.result = insertelement <2 x half> undef, half %cvt.result, i32 1 16 ret <2 x half> %vec.result 17 } 18 19 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: 20 ; GFX9: s_waitcnt 21 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00 22 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 23 ; GFX9-NEXT: v_mov_b32_e32 v0, v3 24 ; GFX9-NEXT: s_setpc_b64 25 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %src1, half %src2) #0 { 26 %src0.ext = fpext half %src0 to float 27 %src1.ext = fpext half %src1 to float 28 %src2.ext = fpext half %src2 to float 29 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 30 %cvt.result = fptrunc float %result to half 31 %vec.result = insertelement <2 x half> <half 1.0, half undef>, half %cvt.result, i32 1 32 ret <2 x half> %vec.result 33 } 34 35 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: 36 ; GFX9: s_waitcnt 37 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 38 ; GFX9-NEXT: v_mov_b32_e32 v0, v3 39 ; GFX9-NEXT: s_setpc_b64 40 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src1, half %src2, half %lo) #0 { 41 %src0.ext = fpext half %src0 to float 42 %src1.ext = fpext half %src1 to float 43 %src2.ext = fpext half %src2 to float 44 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 45 %cvt.result = fptrunc float %result to half 46 %vec = insertelement <2 x half> undef, half %lo, i32 0 47 %vec.result = insertelement <2 x half> %vec, half %cvt.result, i32 1 48 ret <2 x half> %vec.result 49 } 50 51 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: 52 ; GFX9: v_mov_b32_e32 v3, 0 53 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 54 ; GFX9-NEXT: v_mov_b32_e32 v0, v3 55 ; GFX9-NEXT: s_setpc_b64 56 define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) #0 { 57 %src0.ext = fpext half %src0 to float 58 %src1.ext = fpext half %src1 to float 59 %src2.ext = fpext half %src2 to float 60 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 61 %cvt.result = fptrunc float %result to half 62 %bc = bitcast half %cvt.result to i16 63 %ext = zext i16 %bc to i32 64 %shr = shl i32 %ext, 16 65 ret i32 %shr 66 } 67 68 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: 69 ; GFX9: v_mov_b32_e32 v3, 0 70 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 71 ; GFX9-NEXT: v_mov_b32_e32 v0, v3 72 ; GFX9-NEXT: s_setpc_b64 73 define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) #0 { 74 %src0.ext = fpext half %src0 to float 75 %src1.ext = fpext half %src1 to float 76 %src2.ext = fpext half %src2 to float 77 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 78 %cvt.result = fptrunc float %result to half 79 %bc = bitcast half %cvt.result to i16 80 %ext = sext i16 %bc to i32 81 %shr = shl i32 %ext, 16 82 ret i32 %shr 83 } 84 85 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: 86 ; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 87 ; GFX9: v_cvt_f16_f32_e32 v0, v0 88 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %src0, half %src1, half %src2) #0 { 89 %src0.ext = fpext half %src0 to float 90 %src1.ext = fpext half %src1 to float 91 %src2.ext = fpext half %src2 to float 92 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 93 %max = call float @llvm.maxnum.f32(float %result, float 0.0) 94 %clamp = call float @llvm.minnum.f32(float %max, float 1.0) 95 %cvt.result = fptrunc float %clamp to half 96 %vec.result = insertelement <2 x half> undef, half %cvt.result, i32 1 97 ret <2 x half> %vec.result 98 } 99 100 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: 101 ; GCN: s_waitcnt 102 ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 103 ; GFX9-NEXT: s_setpc_b64 104 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half %src0, half %src1, half %src2) #0 { 105 %src0.ext = fpext half %src0 to float 106 %src1.ext = fpext half %src1 to float 107 %src2.ext = fpext half %src2 to float 108 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 109 %cvt.result = fptrunc float %result to half 110 %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0) 111 %clamp = call half @llvm.minnum.f16(half %max, half 1.0) 112 %vec.result = insertelement <2 x half> undef, half %clamp, i32 1 113 ret <2 x half> %vec.result 114 } 115 116 117 ; GCN-LABEL: {{^}}v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: 118 ; GCN: s_waitcnt 119 ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}} 120 ; GFX9-NEXT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, v3 121 ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 122 ; GFX9-NEXT: s_waitcnt vmcnt(0) 123 ; GFX9-NEXT: s_setpc_b64 124 define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use(half %src0, half %src1, half %src2) #0 { 125 %src0.ext = fpext half %src0 to float 126 %src1.ext = fpext half %src1 to float 127 %src2.ext = fpext half %src2 to float 128 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 129 %cvt.result = fptrunc float %result to half 130 store volatile half %cvt.result, half addrspace(1)* undef 131 %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0) 132 %clamp = call half @llvm.minnum.f16(half %max, half 1.0) 133 %vec.result = insertelement <2 x half> undef, half %clamp, i32 1 134 ret <2 x half> %vec.result 135 } 136 137 declare half @llvm.minnum.f16(half, half) #1 138 declare half @llvm.maxnum.f16(half, half) #1 139 declare float @llvm.minnum.f32(float, float) #1 140 declare float @llvm.maxnum.f32(float, float) #1 141 declare float @llvm.fmuladd.f32(float, float, float) #1 142 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 143 144 attributes #0 = { nounwind } 145 attributes #1 = { nounwind readnone speculatable } 146