1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s 3 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s 4 5 ; GCN-LABEL: mixlo_simple: 6 ; GCN: s_waitcnt 7 ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2{{$}} 8 ; GFX9-NEXT: s_setpc_b64 9 10 ; CIVI: v_mac_f32_e32 11 ; CIVI: v_cvt_f16_f32_e32 12 define half @mixlo_simple(float %src0, float %src1, float %src2) #0 { 13 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) 14 %cvt.result = fptrunc float %result to half 15 ret half %cvt.result 16 } 17 18 ; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f16lo: 19 ; GFX9: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]{{$}} 20 ; CI: v_mac_f32 21 ; CIVI: v_cvt_f16_f32 22 define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 23 %src0.ext = fpext half %src0 to float 24 %src1.ext = fpext half %src1 to float 25 %src2.ext = fpext half %src2 to float 26 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 27 %cvt.result = fptrunc float %result to half 28 ret half %cvt.result 29 } 30 31 ; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32: 32 ; GCN: s_waitcnt 33 ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}} 34 ; GFX9-NEXT: s_setpc_b64 35 36 ; CIVI: v_mac_f32 37 define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 { 38 %src0.ext = fpext half %src0 to float 39 %src1.ext = fpext half %src1 to float 40 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 41 %cvt.result = fptrunc float %result to half 42 ret half %cvt.result 43 } 44 45 ; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: 46 ; GCN: s_waitcnt 47 ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}} 48 ; GFX9-NEXT: s_setpc_b64 49 50 ; CIVI: v_mac_f32_e32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]$}} 51 define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 { 52 %src0.ext = fpext half %src0 to float 53 %src1.ext = fpext half %src1 to float 54 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 55 %cvt.result = fptrunc float %result to half 56 %max = call half @llvm.maxnum.f16(half %cvt.result, half 0.0) 57 %clamp = call half @llvm.minnum.f16(half %max, half 1.0) 58 ret half %clamp 59 } 60 61 ; GCN-LABEL: {{^}}v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: 62 ; GCN: s_waitcnt 63 ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp{{$}} 64 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 65 ; GFX9-NEXT: s_setpc_b64 66 67 ; CIVI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} 68 define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 { 69 %src0.ext = fpext half %src0 to float 70 %src1.ext = fpext half %src1 to float 71 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 72 %max = call float @llvm.maxnum.f32(float %result, float 0.0) 73 %clamp = call float @llvm.minnum.f32(float %max, float 1.0) 74 %cvt.result = fptrunc float %clamp to half 75 ret half %cvt.result 76 } 77 78 ; FIXME: Should abe able to avoid extra register because first 79 ; operation only clobbers relevant lane. 80 ; GCN-LABEL: {{^}}v_mad_mix_v2f32: 81 ; GCN: s_waitcnt 82 ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]{{$}} 83 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]{{$}} 84 ; GFX9-NEXT: v_mov_b32_e32 v0, v3 85 ; GFX9-NEXT: s_setpc_b64 86 define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 87 %src0.ext = fpext <2 x half> %src0 to <2 x float> 88 %src1.ext = fpext <2 x half> %src1 to <2 x float> 89 %src2.ext = fpext <2 x half> %src2 to <2 x float> 90 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 91 %cvt.result = fptrunc <2 x float> %result to <2 x half> 92 ret <2 x half> %cvt.result 93 } 94 95 ; GCN-LABEL: {{^}}v_mad_mix_v3f32: 96 ; GCN: s_waitcnt 97 ; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] 98 ; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] 99 ; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] 100 ; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] 101 ; GFX9-NEXT: v_mov_b32_e32 v0, v6 102 ; GFX9-NEXT: v_mov_b32_e32 v1, v7 103 ; GFX9-NEXT: s_setpc_b64 104 define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { 105 %src0.ext = fpext <3 x half> %src0 to <3 x float> 106 %src1.ext = fpext <3 x half> %src1 to <3 x float> 107 %src2.ext = fpext <3 x half> %src2 to <3 x float> 108 %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext) 109 %cvt.result = fptrunc <3 x float> %result to <3 x half> 110 ret <3 x half> %cvt.result 111 } 112 113 ; GCN-LABEL: {{^}}v_mad_mix_v4f32: 114 ; GCN: s_waitcnt 115 ; GFX9-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] 116 ; GFX9-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] 117 ; GFX9-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] 118 ; GFX9-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] 119 ; GFX9-NEXT: v_mov_b32_e32 v0, v7 120 ; GFX9-NEXT: v_mov_b32_e32 v1, v6 121 ; GFX9-NEXT: s_setpc_b64 122 define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { 123 %src0.ext = fpext <4 x half> %src0 to <4 x float> 124 %src1.ext = fpext <4 x half> %src1 to <4 x float> 125 %src2.ext = fpext <4 x half> %src2 to <4 x float> 126 %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext) 127 %cvt.result = fptrunc <4 x float> %result to <4 x half> 128 ret <4 x half> %cvt.result 129 } 130 131 ; FIXME: Fold clamp 132 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt: 133 ; GFX9: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp{{$}} 134 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp{{$}} 135 ; GFX9-NEXT: v_mov_b32_e32 v0, v3 136 ; GFX9-NEXT: s_setpc_b64 137 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 138 %src0.ext = fpext <2 x half> %src0 to <2 x float> 139 %src1.ext = fpext <2 x half> %src1 to <2 x float> 140 %src2.ext = fpext <2 x half> %src2 to <2 x float> 141 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 142 %cvt.result = fptrunc <2 x float> %result to <2 x half> 143 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %cvt.result, <2 x half> zeroinitializer) 144 %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>) 145 ret <2 x half> %clamp 146 } 147 148 ; FIXME: Should be packed into 2 registers per argument? 149 ; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_postcvt: 150 ; GCN: s_waitcnt 151 ; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp 152 ; GFX9-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] clamp 153 ; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 154 ; GFX9-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 155 ; GFX9-NEXT: v_mov_b32_e32 v0, v6 156 ; GFX9-NEXT: v_mov_b32_e32 v1, v7 157 ; GFX9-NEXT: s_setpc_b64 158 define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { 159 %src0.ext = fpext <3 x half> %src0 to <3 x float> 160 %src1.ext = fpext <3 x half> %src1 to <3 x float> 161 %src2.ext = fpext <3 x half> %src2 to <3 x float> 162 %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext) 163 %cvt.result = fptrunc <3 x float> %result to <3 x half> 164 %max = call <3 x half> @llvm.maxnum.v3f16(<3 x half> %cvt.result, <3 x half> zeroinitializer) 165 %clamp = call <3 x half> @llvm.minnum.v3f16(<3 x half> %max, <3 x half> <half 1.0, half 1.0, half 1.0>) 166 ret <3 x half> %clamp 167 } 168 169 ; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_postcvt: 170 ; GCN: s_waitcnt 171 ; GFX9-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp 172 ; GFX9-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 173 ; GFX9-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp 174 ; GFX9-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 175 ; GFX9-NEXT: v_mov_b32_e32 v0, v6 176 ; GFX9-NEXT: v_mov_b32_e32 v1, v2 177 ; GFX9-NEXT: s_setpc_b64 178 define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { 179 %src0.ext = fpext <4 x half> %src0 to <4 x float> 180 %src1.ext = fpext <4 x half> %src1 to <4 x float> 181 %src2.ext = fpext <4 x half> %src2 to <4 x float> 182 %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext) 183 %cvt.result = fptrunc <4 x float> %result to <4 x half> 184 %max = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %cvt.result, <4 x half> zeroinitializer) 185 %clamp = call <4 x half> @llvm.minnum.v4f16(<4 x half> %max, <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>) 186 ret <4 x half> %clamp 187 } 188 189 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_lo: 190 ; GCN: s_waitcnt 191 ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp 192 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 193 ; GFX9-NEXT: v_mov_b32_e32 v0, v3 194 ; GFX9-NEXT: s_setpc_b64 195 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 196 %src0.ext = fpext <2 x half> %src0 to <2 x float> 197 %src1.ext = fpext <2 x half> %src1 to <2 x float> 198 %src2.ext = fpext <2 x half> %src2 to <2 x float> 199 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 200 %cvt.result = fptrunc <2 x float> %result to <2 x half> 201 %cvt.lo = extractelement <2 x half> %cvt.result, i32 0 202 %max.lo = call half @llvm.maxnum.f16(half %cvt.lo, half 0.0) 203 %clamp.lo = call half @llvm.minnum.f16(half %max.lo, half 1.0) 204 %insert = insertelement <2 x half> %cvt.result, half %clamp.lo, i32 0 205 ret <2 x half> %insert 206 } 207 208 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_postcvt_hi: 209 ; GCN: s_waitcnt 210 ; GFX9-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] 211 ; GFX9-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 212 ; GFX9-NEXT: v_mov_b32_e32 v0, v3 213 ; GFX9-NEXT: s_setpc_b64 214 define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 215 %src0.ext = fpext <2 x half> %src0 to <2 x float> 216 %src1.ext = fpext <2 x half> %src1 to <2 x float> 217 %src2.ext = fpext <2 x half> %src2 to <2 x float> 218 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 219 %cvt.result = fptrunc <2 x float> %result to <2 x half> 220 %cvt.hi = extractelement <2 x half> %cvt.result, i32 1 221 %max.hi = call half @llvm.maxnum.f16(half %cvt.hi, half 0.0) 222 %clamp.hi = call half @llvm.minnum.f16(half %max.hi, half 1.0) 223 %insert = insertelement <2 x half> %cvt.result, half %clamp.hi, i32 1 224 ret <2 x half> %insert 225 } 226 227 ; FIXME: Should be able to use mixlo/mixhi 228 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_clamp_precvt: 229 ; GFX9: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp 230 ; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 231 ; GFX9: v_cvt_f16_f32_e32 v1, v3 232 ; GFX9: v_cvt_f16_f32_e32 v0, v0 233 ; GFX9: v_and_b32_e32 v1, 0xffff, v1 234 ; GFX9: v_lshl_or_b32 v0, v0, 16, v1 235 ; GFX9: s_setpc_b64 236 define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 237 %src0.ext = fpext <2 x half> %src0 to <2 x float> 238 %src1.ext = fpext <2 x half> %src1 to <2 x float> 239 %src2.ext = fpext <2 x half> %src2 to <2 x float> 240 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 241 %max = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %result, <2 x float> zeroinitializer) 242 %clamp = call <2 x float> @llvm.minnum.v2f32(<2 x float> %max, <2 x float> <float 1.0, float 1.0>) 243 %cvt.result = fptrunc <2 x float> %clamp to <2 x half> 244 ret <2 x half> %cvt.result 245 } 246 247 ; FIXME: Handling undef 4th component 248 ; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_precvt: 249 ; GFX9: v_mad_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 250 ; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp 251 ; GFX9: v_mad_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp 252 ; GFX9: v_mad_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] 253 254 ; GFX9: v_cvt_f16_f32 255 ; GFX9: v_cvt_f16_f32 256 ; GFX9: v_cvt_f16_f32 257 ; GFX9: v_cvt_f16_f32 258 define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { 259 %src0.ext = fpext <3 x half> %src0 to <3 x float> 260 %src1.ext = fpext <3 x half> %src1 to <3 x float> 261 %src2.ext = fpext <3 x half> %src2 to <3 x float> 262 %result = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %src0.ext, <3 x float> %src1.ext, <3 x float> %src2.ext) 263 %max = call <3 x float> @llvm.maxnum.v3f32(<3 x float> %result, <3 x float> zeroinitializer) 264 %clamp = call <3 x float> @llvm.minnum.v3f32(<3 x float> %max, <3 x float> <float 1.0, float 1.0, float 1.0>) 265 %cvt.result = fptrunc <3 x float> %clamp to <3 x half> 266 ret <3 x half> %cvt.result 267 } 268 269 ; GCN-LABEL: {{^}}v_mad_mix_v4f32_clamp_precvt: 270 ; GFX9: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 271 ; GFX9: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp 272 ; GFX9: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 273 ; GFX9: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp 274 275 ; GFX9: v_cvt_f16_f32 276 ; GFX9: v_cvt_f16_f32 277 ; GFX9: v_cvt_f16_f32 278 ; GFX9: v_cvt_f16_f32 279 define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { 280 %src0.ext = fpext <4 x half> %src0 to <4 x float> 281 %src1.ext = fpext <4 x half> %src1 to <4 x float> 282 %src2.ext = fpext <4 x half> %src2 to <4 x float> 283 %result = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %src0.ext, <4 x float> %src1.ext, <4 x float> %src2.ext) 284 %max = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %result, <4 x float> zeroinitializer) 285 %clamp = call <4 x float> @llvm.minnum.v4f32(<4 x float> %max, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>) 286 %cvt.result = fptrunc <4 x float> %clamp to <4 x half> 287 ret <4 x half> %cvt.result 288 } 289 290 declare half @llvm.minnum.f16(half, half) #1 291 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1 292 declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) #1 293 declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) #1 294 295 declare half @llvm.maxnum.f16(half, half) #1 296 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1 297 declare <3 x half> @llvm.maxnum.v3f16(<3 x half>, <3 x half>) #1 298 declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>) #1 299 300 declare float @llvm.minnum.f32(float, float) #1 301 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1 302 declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1 303 declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1 304 305 declare float @llvm.maxnum.f32(float, float) #1 306 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1 307 declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1 308 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1 309 310 declare float @llvm.fmuladd.f32(float, float, float) #1 311 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 312 declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1 313 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 314 315 attributes #0 = { nounwind } 316 attributes #1 = { nounwind readnone speculatable } 317