1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900,GFX9 %s 2 ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX906,GFX9 %s 3 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s 4 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s 5 6 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo: 7 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c] 8 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c] 9 ; VI: v_mac_f32 10 ; CI: v_mad_f32 11 define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 12 %src0.ext = fpext half %src0 to float 13 %src1.ext = fpext half %src1 to float 14 %src2.ext = fpext half %src2 to float 15 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 16 ret float %result 17 } 18 19 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_int: 20 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding 21 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding 22 ; CIVI: v_mac_f32 23 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 { 24 %src0.hi = lshr i32 %src0, 16 25 %src1.hi = lshr i32 %src1, 16 26 %src2.hi = lshr i32 %src2, 16 27 %src0.i16 = trunc i32 %src0.hi to i16 28 %src1.i16 = trunc i32 %src1.hi to i16 29 %src2.i16 = trunc i32 %src2.hi to i16 30 %src0.fp16 = bitcast i16 %src0.i16 to half 31 %src1.fp16 = bitcast i16 %src1.i16 to half 32 %src2.fp16 = bitcast i16 %src2.i16 to half 33 %src0.ext = fpext half %src0.fp16 to float 34 %src1.ext = fpext half %src1.fp16 to float 35 %src2.ext = fpext half %src2.fp16 to float 36 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 37 ret float %result 38 } 39 40 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_elt: 41 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding 42 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding 43 ; VI: v_mac_f32 44 ; CI: v_mad_f32 45 define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 46 %src0.hi = extractelement <2 x half> %src0, i32 1 47 %src1.hi = extractelement <2 x half> %src1, i32 1 48 %src2.hi = extractelement <2 x half> %src2, i32 1 49 %src0.ext = fpext half %src0.hi to float 50 %src1.ext = fpext half %src1.hi to float 51 %src2.ext = fpext half %src2.hi to float 52 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 53 ret float %result 54 } 55 56 ; GCN-LABEL: {{^}}v_mad_mix_v2f32: 57 ; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 58 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 59 ; GFX900-NEXT: v_mov_b32_e32 v1, v3 60 61 ; GFX906: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 62 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 63 ; GFX906-NEXT: v_mov_b32_e32 v1, v3 64 65 ; CIVI: v_mac_f32 66 define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 67 %src0.ext = fpext <2 x half> %src0 to <2 x float> 68 %src1.ext = fpext <2 x half> %src1 to <2 x float> 69 %src2.ext = fpext <2 x half> %src2 to <2 x float> 70 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 71 ret <2 x float> %result 72 } 73 74 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_shuffle: 75 ; GCN: s_waitcnt 76 ; GFX900: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] 77 ; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] 78 ; GFX900-NEXT: v_mov_b32_e32 v0, v3 79 ; GFX900-NEXT: s_setpc_b64 80 81 ; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] 82 ; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] 83 ; GFX906-NEXT: v_mov_b32_e32 v0, v3 84 ; GFX906-NEXT: s_setpc_b64 85 86 ; CIVI: v_mac_f32 87 define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 88 %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0> 89 %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1> 90 %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1> 91 %src0.ext = fpext <2 x half> %src0.shuf to <2 x float> 92 %src1.ext = fpext <2 x half> %src1.shuf to <2 x float> 93 %src2.ext = fpext <2 x half> %src2.shuf to <2 x float> 94 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 95 ret <2 x float> %result 96 } 97 98 ; GCN-LABEL: {{^}}v_mad_mix_f32_negf16lo_f16lo_f16lo: 99 ; GFX900: s_waitcnt 100 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 101 ; GFX900-NEXT: s_setpc_b64 102 103 ; GFX906: s_waitcnt 104 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 105 ; GFX906-NEXT: s_setpc_b64 106 107 ; CIVI: v_mad_f32 108 define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 109 %src0.ext = fpext half %src0 to float 110 %src1.ext = fpext half %src1 to float 111 %src2.ext = fpext half %src2 to float 112 %src0.ext.neg = fsub float -0.0, %src0.ext 113 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext) 114 ret float %result 115 } 116 117 ; GCN-LABEL: {{^}}v_mad_mix_f32_absf16lo_f16lo_f16lo: 118 ; GFX900: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 119 ; GFX906: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 120 121 ; CIVI: v_mad_f32 122 define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 123 %src0.ext = fpext half %src0 to float 124 %src1.ext = fpext half %src1 to float 125 %src2.ext = fpext half %src2 to float 126 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) 127 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) 128 ret float %result 129 } 130 131 ; GCN-LABEL: {{^}}v_mad_mix_f32_negabsf16lo_f16lo_f16lo: 132 ; GFX900: s_waitcnt 133 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] 134 ; GFX900-NEXT: s_setpc_b64 135 136 ; GFX906: s_waitcnt 137 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] 138 ; GFX906-NEXT: s_setpc_b64 139 140 ; CIVI: v_mad_f32 141 define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 142 %src0.ext = fpext half %src0 to float 143 %src1.ext = fpext half %src1 to float 144 %src2.ext = fpext half %src2 to float 145 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) 146 %src0.ext.neg.abs = fsub float -0.0, %src0.ext.abs 147 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext) 148 ret float %result 149 } 150 151 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32: 152 ; GCN: s_waitcnt 153 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 154 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 155 ; GFX9-NEXT: s_setpc_b64 156 157 ; CIVI: v_mad_f32 158 define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 { 159 %src0.ext = fpext half %src0 to float 160 %src1.ext = fpext half %src1 to float 161 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 162 ret float %result 163 } 164 165 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negf32: 166 ; GCN: s_waitcnt 167 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding 168 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding 169 ; GFX9-NEXT: s_setpc_b64 170 171 ; CIVI: v_mad_f32 172 define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 { 173 %src0.ext = fpext half %src0 to float 174 %src1.ext = fpext half %src1 to float 175 %src2.neg = fsub float -0.0, %src2 176 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg) 177 ret float %result 178 } 179 180 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_absf32: 181 ; GCN: s_waitcnt 182 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding 183 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding 184 ; GFX9-NEXT: s_setpc_b64 185 186 ; CIVI: v_mad_f32 187 define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 { 188 %src0.ext = fpext half %src0 to float 189 %src1.ext = fpext half %src1 to float 190 %src2.abs = call float @llvm.fabs.f32(float %src2) 191 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs) 192 ret float %result 193 } 194 195 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negabsf32: 196 ; GCN: s_waitcnt 197 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding 198 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding 199 ; GFX9-NEXT: s_setpc_b64 200 201 ; CIVI: v_mad_f32 202 define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 { 203 %src0.ext = fpext half %src0 to float 204 %src1.ext = fpext half %src1 to float 205 %src2.abs = call float @llvm.fabs.f32(float %src2) 206 %src2.neg.abs = fsub float -0.0, %src2.abs 207 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs) 208 ret float %result 209 } 210 211 ; TODO: Fold inline immediates. Need to be careful because it is an 212 ; f16 inline immediate that may be converted to f32, not an actual f32 213 ; inline immediate. 214 215 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imm1: 216 ; GCN: s_waitcnt 217 ; GFX9: v_mov_b32_e32 v2, 1.0 218 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 219 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 220 221 ; CIVI: v_mad_f32 v0, v0, v1, 1.0 222 ; GCN-NEXT: s_setpc_b64 223 define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { 224 %src0.ext = fpext half %src0 to float 225 %src1.ext = fpext half %src1 to float 226 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0) 227 ret float %result 228 } 229 230 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 231 ; GCN: s_waitcnt 232 ; GFX9: v_mov_b32_e32 v2, 0.15915494 233 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 234 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 235 ; VI: v_mad_f32 v0, v0, v1, 0.15915494 236 define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 { 237 %src0.ext = fpext half %src0 to float 238 %src1.ext = fpext half %src1 to float 239 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000) 240 ret float %result 241 } 242 243 ; Attempt to break inline immediate folding. If the operand is 244 ; interpreted as f32, the inline immediate is really the f16 inline 245 ; imm value converted to f32. 246 ; fpext f16 1/2pi = 0x3e230000 247 ; f32 1/2pi = 0x3e22f983 248 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 249 ; GFX9: v_mov_b32_e32 v2, 0x3e230000 250 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 251 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 252 253 ; CIVI: v_madak_f32 v0, v0, v1, 0x3e230000 254 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 { 255 %src0.ext = fpext half %src0 to float 256 %src1.ext = fpext half %src1 to float 257 %src2 = fpext half 0xH3118 to float 258 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 259 ret float %result 260 } 261 262 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 263 ; GFX9: v_mov_b32_e32 v2, 0x367c0000 264 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 265 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 266 267 ; CIVI: v_madak_f32 v0, v0, v1, 0x367c0000 268 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { 269 %src0.ext = fpext half %src0 to float 270 %src1.ext = fpext half %src1 to float 271 %src2 = fpext half 0xH003F to float 272 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 273 ret float %result 274 } 275 276 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imm1: 277 ; GFX9: v_mov_b32_e32 v3, 1.0 278 ; GFX900: v_mad_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 279 ; GFX900: v_mad_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding 280 ; GFX900: v_mov_b32_e32 v1, v2 281 282 ; GFX906: v_fma_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 283 ; GFX906: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding 284 ; GFX906: v_mov_b32_e32 v1, v2 285 define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 { 286 %src0.ext = fpext <2 x half> %src0 to <2 x float> 287 %src1.ext = fpext <2 x half> %src1 to <2 x float> 288 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>) 289 ret <2 x float> %result 290 } 291 292 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_cvtf16imminv2pi: 293 ; GFX9: v_mov_b32_e32 v3, 0x3e230000 294 295 ; GFX900: v_mad_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 296 ; GFX900: v_mad_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding 297 ; GFX900: v_mov_b32_e32 v1, v2 298 299 ; GFX906: v_fma_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 300 ; GFX906: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding 301 ; GFX906: v_mov_b32_e32 v1, v2 302 define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { 303 %src0.ext = fpext <2 x half> %src0 to <2 x float> 304 %src1.ext = fpext <2 x half> %src1 to <2 x float> 305 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> 306 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2) 307 ret <2 x float> %result 308 } 309 310 ; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imminv2pi: 311 ; GFX9: v_mov_b32_e32 v3, 0.15915494 312 313 ; GFX900: v_mad_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 314 ; GFX900: v_mad_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding 315 ; GFX900: v_mov_b32_e32 v1, v2 316 317 ; GFX906: v_fma_mix_f32 v2, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding 318 ; GFX906: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[1,1,0] ; encoding 319 ; GFX906: v_mov_b32_e32 v1, v2 320 define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { 321 %src0.ext = fpext <2 x half> %src0 to <2 x float> 322 %src1.ext = fpext <2 x half> %src1 to <2 x float> 323 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> 324 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>) 325 ret <2 x float> %result 326 } 327 328 ; GCN-LABEL: {{^}}v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: 329 ; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding 330 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding 331 ; VI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} 332 ; CI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}} 333 define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 334 %src0.hi = extractelement <2 x half> %src0, i32 1 335 %src1.hi = extractelement <2 x half> %src1, i32 1 336 %src2.hi = extractelement <2 x half> %src2, i32 1 337 %src0.ext = fpext half %src0.hi to float 338 %src1.ext = fpext half %src1.hi to float 339 %src2.ext = fpext half %src2.hi to float 340 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 341 %max = call float @llvm.maxnum.f32(float %result, float 0.0) 342 %clamp = call float @llvm.minnum.f32(float %max, float 1.0) 343 ret float %clamp 344 } 345 346 ; GCN-LABEL: no_mix_simple: 347 ; GCN: s_waitcnt 348 ; GCN-NEXT: v_{{mad|fma}}_f32 v0, v0, v1, v2 349 ; GCN-NEXT: s_setpc_b64 350 define float @no_mix_simple(float %src0, float %src1, float %src2) #0 { 351 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) 352 ret float %result 353 } 354 355 ; GCN-LABEL: no_mix_simple_fabs: 356 ; GCN: s_waitcnt 357 ; CIVI-NEXT: v_mad_f32 v0, |v0|, v1, v2 358 ; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2 359 ; GFX906-NEXT: v_fma_f32 v0, v1, |v0|, v2 360 ; GCN-NEXT: s_setpc_b64 361 define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 { 362 %src0.fabs = call float @llvm.fabs.f32(float %src0) 363 %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2) 364 ret float %result 365 } 366 367 ; FIXME: Should abe able to select in thits case 368 ; All sources are converted from f16, so it doesn't matter 369 ; v_mad_mix_f32 flushes. 370 371 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: 372 ; GFX900: v_cvt_f32_f16 373 ; GFX900: v_cvt_f32_f16 374 ; GFX900: v_cvt_f32_f16 375 ; GFX900: v_fma_f32 376 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 { 377 %src0.ext = fpext half %src0 to float 378 %src1.ext = fpext half %src1 to float 379 %src2.ext = fpext half %src2 to float 380 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 381 ret float %result 382 } 383 384 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals: 385 ; GFX900: v_cvt_f32_f16 386 ; GFX900: v_cvt_f32_f16 387 ; GFX900: v_fma_f32 388 389 ; GFX906-NOT: v_cvt_f32_f16 390 ; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 391 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 { 392 %src0.ext = fpext half %src0 to float 393 %src1.ext = fpext half %src1 to float 394 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 395 ret float %result 396 } 397 398 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: 399 ; GFX9: v_cvt_f32_f16 400 ; GFX9: v_cvt_f32_f16 401 ; GFX9: v_cvt_f32_f16 402 ; GFX9: v_mul_f32 403 ; GFX9: v_add_f32 404 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 { 405 %src0.ext = fpext half %src0 to float 406 %src1.ext = fpext half %src1 to float 407 %src2.ext = fpext half %src2 to float 408 %mul = fmul float %src0.ext, %src1.ext 409 %result = fadd float %mul, %src2.ext 410 ret float %result 411 } 412 413 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: 414 ; GFX9: v_cvt_f32_f16 415 ; GFX9: v_cvt_f32_f16 416 ; GFX9: v_mul_f32 417 ; GFX9: v_add_f32 418 define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 { 419 %src0.ext = fpext half %src0 to float 420 %src1.ext = fpext half %src1 to float 421 %mul = fmul float %src0.ext, %src1.ext 422 %result = fadd float %mul, %src2 423 ret float %result 424 } 425 426 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: 427 ; GCN: s_waitcnt 428 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 429 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 430 ; GFX9-NEXT: s_setpc_b64 431 define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 { 432 %src0.ext = fpext half %src0 to float 433 %src1.ext = fpext half %src1 to float 434 %src2.ext = fpext half %src2 to float 435 %mul = fmul contract float %src0.ext, %src1.ext 436 %result = fadd contract float %mul, %src2.ext 437 ret float %result 438 } 439 440 ; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: 441 ; GCN: s_waitcnt 442 ; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 443 ; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding 444 ; GFX9-NEXT: s_setpc_b64 445 define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 { 446 %src0.ext = fpext half %src0 to float 447 %src1.ext = fpext half %src1 to float 448 %mul = fmul contract float %src0.ext, %src1.ext 449 %result = fadd contract float %mul, %src2 450 ret float %result 451 } 452 453 ; GCN-LABEL: {{^}}v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 454 ; GFX9: s_waitcnt 455 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 456 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding 457 ; GFX9-NEXT: s_setpc_b64 458 459 ; CIVI: v_mad_f32 460 define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 461 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 462 %src0 = extractelement <2 x half> %src0.arg.bc, i32 0 463 %src0.neg = fsub half -0.0, %src0 464 %src0.ext = fpext half %src0.neg to float 465 %src1.ext = fpext half %src1 to float 466 %src2.ext = fpext half %src2 to float 467 ; %src0.ext.neg = fsub float -0.0, %src0.ext 468 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 469 ret float %result 470 } 471 472 ; Make sure we don't fold pre-cvt fneg if we already have a fabs 473 ; GCN-LABEL: {{^}}v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: 474 ; GFX900: s_waitcnt 475 define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 476 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 477 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 478 %src0.neg = fsub half -0.0, %src0 479 %src0.ext = fpext half %src0.neg to float 480 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) 481 %src1.ext = fpext half %src1 to float 482 %src2.ext = fpext half %src2 to float 483 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) 484 ret float %result 485 } 486 487 ; GCN-LABEL: {{^}}v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: 488 ; GFX9: s_waitcnt 489 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 490 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 491 ; GFX9-NEXT: s_setpc_b64 492 define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 493 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 494 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 495 %src0.abs = call half @llvm.fabs.f16(half %src0) 496 %src0.ext = fpext half %src0.abs to float 497 %src1.ext = fpext half %src1 to float 498 %src2.ext = fpext half %src2 to float 499 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 500 ret float %result 501 } 502 503 ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 504 ; GFX9: s_waitcnt 505 ; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 506 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 507 ; GFX9-NEXT: s_setpc_b64 508 define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 509 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 510 %fneg = fsub <2 x half> <half -0.0, half -0.0>, %src0.arg.bc 511 %src0 = extractelement <2 x half> %fneg, i32 1 512 %src0.ext = fpext half %src0 to float 513 %src1.ext = fpext half %src1 to float 514 %src2.ext = fpext half %src2 to float 515 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 516 ret float %result 517 } 518 519 ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 520 ; GFX9: s_waitcnt 521 ; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 522 ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 523 ; GFX9-NEXT: s_setpc_b64 524 define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 525 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 526 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) 527 %src0 = extractelement <2 x half> %fabs, i32 1 528 %src0.ext = fpext half %src0 to float 529 %src1.ext = fpext half %src1 to float 530 %src2.ext = fpext half %src2 to float 531 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 532 ret float %result 533 } 534 535 ; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 536 ; GFX9: s_waitcnt 537 ; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 538 ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 539 ; GFX9-NEXT: s_setpc_b64 540 define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 541 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 542 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) 543 %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs 544 %src0 = extractelement <2 x half> %fneg.fabs, i32 1 545 %src0.ext = fpext half %src0 to float 546 %src1.ext = fpext half %src1 to float 547 %src2.ext = fpext half %src2 to float 548 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 549 ret float %result 550 } 551 552 declare half @llvm.fabs.f16(half) #2 553 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 554 declare float @llvm.fabs.f32(float) #2 555 declare float @llvm.minnum.f32(float, float) #2 556 declare float @llvm.maxnum.f32(float, float) #2 557 declare float @llvm.fmuladd.f32(float, float, float) #2 558 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2 559 560 attributes #0 = { nounwind "target-features"="-fp32-denormals" } 561 attributes #1 = { nounwind "target-features"="+fp32-denormals" } 562 attributes #2 = { nounwind readnone speculatable } 563