1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-INFS 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-INFS 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-NOINFS 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-NOINFS 10 11 ; 12 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 13 ; 14 15 define float @test_f32_fmadd(float %a0, float %a1, float %a2) { 16 ; FMA-LABEL: test_f32_fmadd: 17 ; FMA: # %bb.0: 18 ; FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 19 ; FMA-NEXT: retq 20 ; 21 ; FMA4-LABEL: test_f32_fmadd: 22 ; FMA4: # %bb.0: 23 ; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 24 ; FMA4-NEXT: retq 25 ; 26 ; AVX512-LABEL: test_f32_fmadd: 27 ; AVX512: # %bb.0: 28 ; AVX512-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 29 ; AVX512-NEXT: retq 30 %x = fmul float %a0, %a1 31 %res = fadd float %x, %a2 32 ret float %res 33 } 34 35 define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 36 ; FMA-LABEL: test_4f32_fmadd: 37 ; FMA: # %bb.0: 38 ; FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 39 ; FMA-NEXT: retq 40 ; 41 ; FMA4-LABEL: test_4f32_fmadd: 42 ; FMA4: # %bb.0: 43 ; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 44 ; FMA4-NEXT: retq 45 ; 46 ; AVX512-LABEL: test_4f32_fmadd: 47 ; AVX512: # %bb.0: 48 ; AVX512-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 49 ; AVX512-NEXT: retq 50 %x = fmul <4 x float> %a0, %a1 51 %res = fadd <4 x float> %x, %a2 52 ret <4 x float> %res 53 } 54 55 define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 56 ; FMA-LABEL: test_8f32_fmadd: 57 ; FMA: # %bb.0: 58 ; FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 59 ; FMA-NEXT: retq 60 ; 61 ; FMA4-LABEL: test_8f32_fmadd: 62 ; FMA4: # %bb.0: 63 ; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 64 ; FMA4-NEXT: retq 65 ; 66 ; AVX512-LABEL: test_8f32_fmadd: 67 ; AVX512: # %bb.0: 68 ; AVX512-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 69 ; AVX512-NEXT: retq 70 %x = fmul <8 x float> %a0, %a1 71 %res = fadd <8 x float> %x, %a2 72 ret <8 x float> %res 73 } 74 75 define double @test_f64_fmadd(double %a0, double %a1, double %a2) { 76 ; FMA-LABEL: test_f64_fmadd: 77 ; FMA: # %bb.0: 78 ; FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 79 ; FMA-NEXT: retq 80 ; 81 ; FMA4-LABEL: test_f64_fmadd: 82 ; FMA4: # %bb.0: 83 ; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 84 ; FMA4-NEXT: retq 85 ; 86 ; AVX512-LABEL: test_f64_fmadd: 87 ; AVX512: # %bb.0: 88 ; AVX512-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 89 ; AVX512-NEXT: retq 90 %x = fmul double %a0, %a1 91 %res = fadd double %x, %a2 92 ret double %res 93 } 94 95 define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 96 ; FMA-LABEL: test_2f64_fmadd: 97 ; FMA: # %bb.0: 98 ; FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 99 ; FMA-NEXT: retq 100 ; 101 ; FMA4-LABEL: test_2f64_fmadd: 102 ; FMA4: # %bb.0: 103 ; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 104 ; FMA4-NEXT: retq 105 ; 106 ; AVX512-LABEL: test_2f64_fmadd: 107 ; AVX512: # %bb.0: 108 ; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 109 ; AVX512-NEXT: retq 110 %x = fmul <2 x double> %a0, %a1 111 %res = fadd <2 x double> %x, %a2 112 ret <2 x double> %res 113 } 114 115 define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 116 ; FMA-LABEL: test_4f64_fmadd: 117 ; FMA: # %bb.0: 118 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 119 ; FMA-NEXT: retq 120 ; 121 ; FMA4-LABEL: test_4f64_fmadd: 122 ; FMA4: # %bb.0: 123 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 124 ; FMA4-NEXT: retq 125 ; 126 ; AVX512-LABEL: test_4f64_fmadd: 127 ; AVX512: # %bb.0: 128 ; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 129 ; AVX512-NEXT: retq 130 %x = fmul <4 x double> %a0, %a1 131 %res = fadd <4 x double> %x, %a2 132 ret <4 x double> %res 133 } 134 135 ; 136 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 137 ; 138 139 define float @test_f32_fmsub(float %a0, float %a1, float %a2) { 140 ; FMA-LABEL: test_f32_fmsub: 141 ; FMA: # %bb.0: 142 ; FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 143 ; FMA-NEXT: retq 144 ; 145 ; FMA4-LABEL: test_f32_fmsub: 146 ; FMA4: # %bb.0: 147 ; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 148 ; FMA4-NEXT: retq 149 ; 150 ; AVX512-LABEL: test_f32_fmsub: 151 ; AVX512: # %bb.0: 152 ; AVX512-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 153 ; AVX512-NEXT: retq 154 %x = fmul float %a0, %a1 155 %res = fsub float %x, %a2 156 ret float %res 157 } 158 159 define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 160 ; FMA-LABEL: test_4f32_fmsub: 161 ; FMA: # %bb.0: 162 ; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 163 ; FMA-NEXT: retq 164 ; 165 ; FMA4-LABEL: test_4f32_fmsub: 166 ; FMA4: # %bb.0: 167 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 168 ; FMA4-NEXT: retq 169 ; 170 ; AVX512-LABEL: test_4f32_fmsub: 171 ; AVX512: # %bb.0: 172 ; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 173 ; AVX512-NEXT: retq 174 %x = fmul <4 x float> %a0, %a1 175 %res = fsub <4 x float> %x, %a2 176 ret <4 x float> %res 177 } 178 179 define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 180 ; FMA-LABEL: test_8f32_fmsub: 181 ; FMA: # %bb.0: 182 ; FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 183 ; FMA-NEXT: retq 184 ; 185 ; FMA4-LABEL: test_8f32_fmsub: 186 ; FMA4: # %bb.0: 187 ; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 188 ; FMA4-NEXT: retq 189 ; 190 ; AVX512-LABEL: test_8f32_fmsub: 191 ; AVX512: # %bb.0: 192 ; AVX512-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 193 ; AVX512-NEXT: retq 194 %x = fmul <8 x float> %a0, %a1 195 %res = fsub <8 x float> %x, %a2 196 ret <8 x float> %res 197 } 198 199 define double @test_f64_fmsub(double %a0, double %a1, double %a2) { 200 ; FMA-LABEL: test_f64_fmsub: 201 ; FMA: # %bb.0: 202 ; FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 203 ; FMA-NEXT: retq 204 ; 205 ; FMA4-LABEL: test_f64_fmsub: 206 ; FMA4: # %bb.0: 207 ; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 208 ; FMA4-NEXT: retq 209 ; 210 ; AVX512-LABEL: test_f64_fmsub: 211 ; AVX512: # %bb.0: 212 ; AVX512-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 213 ; AVX512-NEXT: retq 214 %x = fmul double %a0, %a1 215 %res = fsub double %x, %a2 216 ret double %res 217 } 218 219 define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 220 ; FMA-LABEL: test_2f64_fmsub: 221 ; FMA: # %bb.0: 222 ; FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 223 ; FMA-NEXT: retq 224 ; 225 ; FMA4-LABEL: test_2f64_fmsub: 226 ; FMA4: # %bb.0: 227 ; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 228 ; FMA4-NEXT: retq 229 ; 230 ; AVX512-LABEL: test_2f64_fmsub: 231 ; AVX512: # %bb.0: 232 ; AVX512-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 233 ; AVX512-NEXT: retq 234 %x = fmul <2 x double> %a0, %a1 235 %res = fsub <2 x double> %x, %a2 236 ret <2 x double> %res 237 } 238 239 define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 240 ; FMA-LABEL: test_4f64_fmsub: 241 ; FMA: # %bb.0: 242 ; FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 243 ; FMA-NEXT: retq 244 ; 245 ; FMA4-LABEL: test_4f64_fmsub: 246 ; FMA4: # %bb.0: 247 ; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 248 ; FMA4-NEXT: retq 249 ; 250 ; AVX512-LABEL: test_4f64_fmsub: 251 ; AVX512: # %bb.0: 252 ; AVX512-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 253 ; AVX512-NEXT: retq 254 %x = fmul <4 x double> %a0, %a1 255 %res = fsub <4 x double> %x, %a2 256 ret <4 x double> %res 257 } 258 259 ; 260 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 261 ; 262 263 define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { 264 ; FMA-LABEL: test_f32_fnmadd: 265 ; FMA: # %bb.0: 266 ; FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 267 ; FMA-NEXT: retq 268 ; 269 ; FMA4-LABEL: test_f32_fnmadd: 270 ; FMA4: # %bb.0: 271 ; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 272 ; FMA4-NEXT: retq 273 ; 274 ; AVX512-LABEL: test_f32_fnmadd: 275 ; AVX512: # %bb.0: 276 ; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 277 ; AVX512-NEXT: retq 278 %x = fmul float %a0, %a1 279 %res = fsub float %a2, %x 280 ret float %res 281 } 282 283 define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 284 ; FMA-LABEL: test_4f32_fnmadd: 285 ; FMA: # %bb.0: 286 ; FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 287 ; FMA-NEXT: retq 288 ; 289 ; FMA4-LABEL: test_4f32_fnmadd: 290 ; FMA4: # %bb.0: 291 ; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 292 ; FMA4-NEXT: retq 293 ; 294 ; AVX512-LABEL: test_4f32_fnmadd: 295 ; AVX512: # %bb.0: 296 ; AVX512-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 297 ; AVX512-NEXT: retq 298 %x = fmul <4 x float> %a0, %a1 299 %res = fsub <4 x float> %a2, %x 300 ret <4 x float> %res 301 } 302 303 define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 304 ; FMA-LABEL: test_8f32_fnmadd: 305 ; FMA: # %bb.0: 306 ; FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 307 ; FMA-NEXT: retq 308 ; 309 ; FMA4-LABEL: test_8f32_fnmadd: 310 ; FMA4: # %bb.0: 311 ; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 312 ; FMA4-NEXT: retq 313 ; 314 ; AVX512-LABEL: test_8f32_fnmadd: 315 ; AVX512: # %bb.0: 316 ; AVX512-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 317 ; AVX512-NEXT: retq 318 %x = fmul <8 x float> %a0, %a1 319 %res = fsub <8 x float> %a2, %x 320 ret <8 x float> %res 321 } 322 323 define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { 324 ; FMA-LABEL: test_f64_fnmadd: 325 ; FMA: # %bb.0: 326 ; FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 327 ; FMA-NEXT: retq 328 ; 329 ; FMA4-LABEL: test_f64_fnmadd: 330 ; FMA4: # %bb.0: 331 ; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 332 ; FMA4-NEXT: retq 333 ; 334 ; AVX512-LABEL: test_f64_fnmadd: 335 ; AVX512: # %bb.0: 336 ; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 337 ; AVX512-NEXT: retq 338 %x = fmul double %a0, %a1 339 %res = fsub double %a2, %x 340 ret double %res 341 } 342 343 define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 344 ; FMA-LABEL: test_2f64_fnmadd: 345 ; FMA: # %bb.0: 346 ; FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 347 ; FMA-NEXT: retq 348 ; 349 ; FMA4-LABEL: test_2f64_fnmadd: 350 ; FMA4: # %bb.0: 351 ; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 352 ; FMA4-NEXT: retq 353 ; 354 ; AVX512-LABEL: test_2f64_fnmadd: 355 ; AVX512: # %bb.0: 356 ; AVX512-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 357 ; AVX512-NEXT: retq 358 %x = fmul <2 x double> %a0, %a1 359 %res = fsub <2 x double> %a2, %x 360 ret <2 x double> %res 361 } 362 363 define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 364 ; FMA-LABEL: test_4f64_fnmadd: 365 ; FMA: # %bb.0: 366 ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 367 ; FMA-NEXT: retq 368 ; 369 ; FMA4-LABEL: test_4f64_fnmadd: 370 ; FMA4: # %bb.0: 371 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 372 ; FMA4-NEXT: retq 373 ; 374 ; AVX512-LABEL: test_4f64_fnmadd: 375 ; AVX512: # %bb.0: 376 ; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 377 ; AVX512-NEXT: retq 378 %x = fmul <4 x double> %a0, %a1 379 %res = fsub <4 x double> %a2, %x 380 ret <4 x double> %res 381 } 382 383 ; 384 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 385 ; 386 387 define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { 388 ; FMA-LABEL: test_f32_fnmsub: 389 ; FMA: # %bb.0: 390 ; FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 391 ; FMA-NEXT: retq 392 ; 393 ; FMA4-LABEL: test_f32_fnmsub: 394 ; FMA4: # %bb.0: 395 ; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 396 ; FMA4-NEXT: retq 397 ; 398 ; AVX512-LABEL: test_f32_fnmsub: 399 ; AVX512: # %bb.0: 400 ; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 401 ; AVX512-NEXT: retq 402 %x = fmul float %a0, %a1 403 %y = fsub float -0.000000e+00, %x 404 %res = fsub float %y, %a2 405 ret float %res 406 } 407 408 define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 409 ; FMA-LABEL: test_4f32_fnmsub: 410 ; FMA: # %bb.0: 411 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 412 ; FMA-NEXT: retq 413 ; 414 ; FMA4-LABEL: test_4f32_fnmsub: 415 ; FMA4: # %bb.0: 416 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 417 ; FMA4-NEXT: retq 418 ; 419 ; AVX512-LABEL: test_4f32_fnmsub: 420 ; AVX512: # %bb.0: 421 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 422 ; AVX512-NEXT: retq 423 %x = fmul <4 x float> %a0, %a1 424 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 425 %res = fsub <4 x float> %y, %a2 426 ret <4 x float> %res 427 } 428 429 define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 430 ; FMA-LABEL: test_8f32_fnmsub: 431 ; FMA: # %bb.0: 432 ; FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 433 ; FMA-NEXT: retq 434 ; 435 ; FMA4-LABEL: test_8f32_fnmsub: 436 ; FMA4: # %bb.0: 437 ; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 438 ; FMA4-NEXT: retq 439 ; 440 ; AVX512-LABEL: test_8f32_fnmsub: 441 ; AVX512: # %bb.0: 442 ; AVX512-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 443 ; AVX512-NEXT: retq 444 %x = fmul <8 x float> %a0, %a1 445 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 446 %res = fsub <8 x float> %y, %a2 447 ret <8 x float> %res 448 } 449 450 define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { 451 ; FMA-LABEL: test_f64_fnmsub: 452 ; FMA: # %bb.0: 453 ; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 454 ; FMA-NEXT: retq 455 ; 456 ; FMA4-LABEL: test_f64_fnmsub: 457 ; FMA4: # %bb.0: 458 ; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 459 ; FMA4-NEXT: retq 460 ; 461 ; AVX512-LABEL: test_f64_fnmsub: 462 ; AVX512: # %bb.0: 463 ; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 464 ; AVX512-NEXT: retq 465 %x = fmul double %a0, %a1 466 %y = fsub double -0.000000e+00, %x 467 %res = fsub double %y, %a2 468 ret double %res 469 } 470 471 define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 472 ; FMA-LABEL: test_2f64_fnmsub: 473 ; FMA: # %bb.0: 474 ; FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 475 ; FMA-NEXT: retq 476 ; 477 ; FMA4-LABEL: test_2f64_fnmsub: 478 ; FMA4: # %bb.0: 479 ; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 480 ; FMA4-NEXT: retq 481 ; 482 ; AVX512-LABEL: test_2f64_fnmsub: 483 ; AVX512: # %bb.0: 484 ; AVX512-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 485 ; AVX512-NEXT: retq 486 %x = fmul <2 x double> %a0, %a1 487 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x 488 %res = fsub <2 x double> %y, %a2 489 ret <2 x double> %res 490 } 491 492 define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 493 ; FMA-LABEL: test_4f64_fnmsub: 494 ; FMA: # %bb.0: 495 ; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 496 ; FMA-NEXT: retq 497 ; 498 ; FMA4-LABEL: test_4f64_fnmsub: 499 ; FMA4: # %bb.0: 500 ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 501 ; FMA4-NEXT: retq 502 ; 503 ; AVX512-LABEL: test_4f64_fnmsub: 504 ; AVX512: # %bb.0: 505 ; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 506 ; AVX512-NEXT: retq 507 %x = fmul <4 x double> %a0, %a1 508 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 509 %res = fsub <4 x double> %y, %a2 510 ret <4 x double> %res 511 } 512 513 ; 514 ; Load Folding Patterns 515 ; 516 517 define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { 518 ; FMA-LABEL: test_4f32_fmadd_load: 519 ; FMA: # %bb.0: 520 ; FMA-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 521 ; FMA-NEXT: retq 522 ; 523 ; FMA4-LABEL: test_4f32_fmadd_load: 524 ; FMA4: # %bb.0: 525 ; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 526 ; FMA4-NEXT: retq 527 ; 528 ; AVX512-LABEL: test_4f32_fmadd_load: 529 ; AVX512: # %bb.0: 530 ; AVX512-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 531 ; AVX512-NEXT: retq 532 %x = load <4 x float>, <4 x float>* %a0 533 %y = fmul <4 x float> %x, %a1 534 %res = fadd <4 x float> %y, %a2 535 ret <4 x float> %res 536 } 537 538 define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) { 539 ; FMA-LABEL: test_2f64_fmsub_load: 540 ; FMA: # %bb.0: 541 ; FMA-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0 542 ; FMA-NEXT: retq 543 ; 544 ; FMA4-LABEL: test_2f64_fmsub_load: 545 ; FMA4: # %bb.0: 546 ; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 547 ; FMA4-NEXT: retq 548 ; 549 ; AVX512-LABEL: test_2f64_fmsub_load: 550 ; AVX512: # %bb.0: 551 ; AVX512-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0 552 ; AVX512-NEXT: retq 553 %x = load <2 x double>, <2 x double>* %a0 554 %y = fmul <2 x double> %x, %a1 555 %res = fsub <2 x double> %y, %a2 556 ret <2 x double> %res 557 } 558 559 ; 560 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 561 ; 562 563 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { 564 ; FMA-INFS-LABEL: test_v4f32_mul_add_x_one_y: 565 ; FMA-INFS: # %bb.0: 566 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 567 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 568 ; FMA-INFS-NEXT: retq 569 ; 570 ; FMA4-INFS-LABEL: test_v4f32_mul_add_x_one_y: 571 ; FMA4-INFS: # %bb.0: 572 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 573 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 574 ; FMA4-INFS-NEXT: retq 575 ; 576 ; AVX512-INFS-LABEL: test_v4f32_mul_add_x_one_y: 577 ; AVX512-INFS: # %bb.0: 578 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 579 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 580 ; AVX512-INFS-NEXT: retq 581 ; 582 ; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 583 ; FMA-NOINFS: # %bb.0: 584 ; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 585 ; FMA-NOINFS-NEXT: retq 586 ; 587 ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 588 ; FMA4-NOINFS: # %bb.0: 589 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 590 ; FMA4-NOINFS-NEXT: retq 591 ; 592 ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 593 ; AVX512-NOINFS: # %bb.0: 594 ; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 595 ; AVX512-NOINFS-NEXT: retq 596 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 597 %m = fmul <4 x float> %a, %y 598 ret <4 x float> %m 599 } 600 601 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { 602 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one: 603 ; FMA-INFS: # %bb.0: 604 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 605 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 606 ; FMA-INFS-NEXT: retq 607 ; 608 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one: 609 ; FMA4-INFS: # %bb.0: 610 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 611 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 612 ; FMA4-INFS-NEXT: retq 613 ; 614 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one: 615 ; AVX512-INFS: # %bb.0: 616 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 617 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 618 ; AVX512-INFS-NEXT: retq 619 ; 620 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 621 ; FMA-NOINFS: # %bb.0: 622 ; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 623 ; FMA-NOINFS-NEXT: retq 624 ; 625 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 626 ; FMA4-NOINFS: # %bb.0: 627 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 628 ; FMA4-NOINFS-NEXT: retq 629 ; 630 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 631 ; AVX512-NOINFS: # %bb.0: 632 ; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 633 ; AVX512-NOINFS-NEXT: retq 634 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 635 %m = fmul <4 x float> %y, %a 636 ret <4 x float> %m 637 } 638 639 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { 640 ; FMA-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 641 ; FMA-INFS: # %bb.0: 642 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 643 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 644 ; FMA-INFS-NEXT: retq 645 ; 646 ; FMA4-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 647 ; FMA4-INFS: # %bb.0: 648 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 649 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 650 ; FMA4-INFS-NEXT: retq 651 ; 652 ; AVX512-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 653 ; AVX512-INFS: # %bb.0: 654 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 655 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 656 ; AVX512-INFS-NEXT: retq 657 ; 658 ; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 659 ; FMA-NOINFS: # %bb.0: 660 ; FMA-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 661 ; FMA-NOINFS-NEXT: retq 662 ; 663 ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 664 ; FMA4-NOINFS: # %bb.0: 665 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 666 ; FMA4-NOINFS-NEXT: retq 667 ; 668 ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 669 ; AVX512-NOINFS: # %bb.0: 670 ; AVX512-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 671 ; AVX512-NOINFS-NEXT: retq 672 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 673 %m = fmul <4 x float> %a, %y 674 ret <4 x float> %m 675 } 676 677 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { 678 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 679 ; FMA-INFS: # %bb.0: 680 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 681 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 682 ; FMA-INFS-NEXT: retq 683 ; 684 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 685 ; FMA4-INFS: # %bb.0: 686 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 687 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 688 ; FMA4-INFS-NEXT: retq 689 ; 690 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 691 ; AVX512-INFS: # %bb.0: 692 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0 693 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 694 ; AVX512-INFS-NEXT: retq 695 ; 696 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 697 ; FMA-NOINFS: # %bb.0: 698 ; FMA-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 699 ; FMA-NOINFS-NEXT: retq 700 ; 701 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 702 ; FMA4-NOINFS: # %bb.0: 703 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 704 ; FMA4-NOINFS-NEXT: retq 705 ; 706 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 707 ; AVX512-NOINFS: # %bb.0: 708 ; AVX512-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 709 ; AVX512-NOINFS-NEXT: retq 710 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 711 %m = fmul <4 x float> %y, %a 712 ret <4 x float> %m 713 } 714 715 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { 716 ; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 717 ; FMA-INFS: # %bb.0: 718 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 719 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 720 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 721 ; FMA-INFS-NEXT: retq 722 ; 723 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 724 ; FMA4-INFS: # %bb.0: 725 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 726 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 727 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 728 ; FMA4-INFS-NEXT: retq 729 ; 730 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 731 ; AVX512-INFS: # %bb.0: 732 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] 733 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 734 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 735 ; AVX512-INFS-NEXT: retq 736 ; 737 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 738 ; FMA-NOINFS: # %bb.0: 739 ; FMA-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 740 ; FMA-NOINFS-NEXT: retq 741 ; 742 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 743 ; FMA4-NOINFS: # %bb.0: 744 ; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 745 ; FMA4-NOINFS-NEXT: retq 746 ; 747 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 748 ; AVX512-NOINFS: # %bb.0: 749 ; AVX512-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 750 ; AVX512-NOINFS-NEXT: retq 751 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 752 %m = fmul <4 x float> %s, %y 753 ret <4 x float> %m 754 } 755 756 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { 757 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 758 ; FMA-INFS: # %bb.0: 759 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 760 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 761 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 762 ; FMA-INFS-NEXT: retq 763 ; 764 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 765 ; FMA4-INFS: # %bb.0: 766 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 767 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 768 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 769 ; FMA4-INFS-NEXT: retq 770 ; 771 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 772 ; AVX512-INFS: # %bb.0: 773 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] 774 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 775 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 776 ; AVX512-INFS-NEXT: retq 777 ; 778 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 779 ; FMA-NOINFS: # %bb.0: 780 ; FMA-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 781 ; FMA-NOINFS-NEXT: retq 782 ; 783 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 784 ; FMA4-NOINFS: # %bb.0: 785 ; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 786 ; FMA4-NOINFS-NEXT: retq 787 ; 788 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 789 ; AVX512-NOINFS: # %bb.0: 790 ; AVX512-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 791 ; AVX512-NOINFS-NEXT: retq 792 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 793 %m = fmul <4 x float> %y, %s 794 ret <4 x float> %m 795 } 796 797 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { 798 ; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 799 ; FMA-INFS: # %bb.0: 800 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 801 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 802 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 803 ; FMA-INFS-NEXT: retq 804 ; 805 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 806 ; FMA4-INFS: # %bb.0: 807 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 808 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 809 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 810 ; FMA4-INFS-NEXT: retq 811 ; 812 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 813 ; AVX512-INFS: # %bb.0: 814 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1] 815 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 816 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 817 ; AVX512-INFS-NEXT: retq 818 ; 819 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 820 ; FMA-NOINFS: # %bb.0: 821 ; FMA-NOINFS-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 822 ; FMA-NOINFS-NEXT: retq 823 ; 824 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 825 ; FMA4-NOINFS: # %bb.0: 826 ; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 827 ; FMA4-NOINFS-NEXT: retq 828 ; 829 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 830 ; AVX512-NOINFS: # %bb.0: 831 ; AVX512-NOINFS-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 832 ; AVX512-NOINFS-NEXT: retq 833 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 834 %m = fmul <4 x float> %s, %y 835 ret <4 x float> %m 836 } 837 838 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { 839 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 840 ; FMA-INFS: # %bb.0: 841 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 842 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 843 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 844 ; FMA-INFS-NEXT: retq 845 ; 846 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 847 ; FMA4-INFS: # %bb.0: 848 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 849 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 850 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 851 ; FMA4-INFS-NEXT: retq 852 ; 853 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 854 ; AVX512-INFS: # %bb.0: 855 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1] 856 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 857 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 858 ; AVX512-INFS-NEXT: retq 859 ; 860 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 861 ; FMA-NOINFS: # %bb.0: 862 ; FMA-NOINFS-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 863 ; FMA-NOINFS-NEXT: retq 864 ; 865 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 866 ; FMA4-NOINFS: # %bb.0: 867 ; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 868 ; FMA4-NOINFS-NEXT: retq 869 ; 870 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 871 ; AVX512-NOINFS: # %bb.0: 872 ; AVX512-NOINFS-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 873 ; AVX512-NOINFS-NEXT: retq 874 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 875 %m = fmul <4 x float> %y, %s 876 ret <4 x float> %m 877 } 878 879 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { 880 ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 881 ; FMA-INFS: # %bb.0: 882 ; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 883 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 884 ; FMA-INFS-NEXT: retq 885 ; 886 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 887 ; FMA4-INFS: # %bb.0: 888 ; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 889 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 890 ; FMA4-INFS-NEXT: retq 891 ; 892 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 893 ; AVX512-INFS: # %bb.0: 894 ; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 895 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 896 ; AVX512-INFS-NEXT: retq 897 ; 898 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 899 ; FMA-NOINFS: # %bb.0: 900 ; FMA-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 901 ; FMA-NOINFS-NEXT: retq 902 ; 903 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 904 ; FMA4-NOINFS: # %bb.0: 905 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 906 ; FMA4-NOINFS-NEXT: retq 907 ; 908 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 909 ; AVX512-NOINFS: # %bb.0: 910 ; AVX512-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 911 ; AVX512-NOINFS-NEXT: retq 912 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 913 %m = fmul <4 x float> %s, %y 914 ret <4 x float> %m 915 } 916 917 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { 918 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 919 ; FMA-INFS: # %bb.0: 920 ; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 921 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 922 ; FMA-INFS-NEXT: retq 923 ; 924 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 925 ; FMA4-INFS: # %bb.0: 926 ; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 927 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 928 ; FMA4-INFS-NEXT: retq 929 ; 930 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 931 ; AVX512-INFS: # %bb.0: 932 ; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 933 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 934 ; AVX512-INFS-NEXT: retq 935 ; 936 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 937 ; FMA-NOINFS: # %bb.0: 938 ; FMA-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 939 ; FMA-NOINFS-NEXT: retq 940 ; 941 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 942 ; FMA4-NOINFS: # %bb.0: 943 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 944 ; FMA4-NOINFS-NEXT: retq 945 ; 946 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 947 ; AVX512-NOINFS: # %bb.0: 948 ; AVX512-NOINFS-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 949 ; AVX512-NOINFS-NEXT: retq 950 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 951 %m = fmul <4 x float> %y, %s 952 ret <4 x float> %m 953 } 954 955 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { 956 ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 957 ; FMA-INFS: # %bb.0: 958 ; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 959 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 960 ; FMA-INFS-NEXT: retq 961 ; 962 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 963 ; FMA4-INFS: # %bb.0: 964 ; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 965 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 966 ; FMA4-INFS-NEXT: retq 967 ; 968 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 969 ; AVX512-INFS: # %bb.0: 970 ; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 971 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 972 ; AVX512-INFS-NEXT: retq 973 ; 974 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 975 ; FMA-NOINFS: # %bb.0: 976 ; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 977 ; FMA-NOINFS-NEXT: retq 978 ; 979 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 980 ; FMA4-NOINFS: # %bb.0: 981 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 982 ; FMA4-NOINFS-NEXT: retq 983 ; 984 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 985 ; AVX512-NOINFS: # %bb.0: 986 ; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 987 ; AVX512-NOINFS-NEXT: retq 988 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 989 %m = fmul <4 x float> %s, %y 990 ret <4 x float> %m 991 } 992 993 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { 994 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 995 ; FMA-INFS: # %bb.0: 996 ; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 997 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 998 ; FMA-INFS-NEXT: retq 999 ; 1000 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1001 ; FMA4-INFS: # %bb.0: 1002 ; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 1003 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1004 ; FMA4-INFS-NEXT: retq 1005 ; 1006 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1007 ; AVX512-INFS: # %bb.0: 1008 ; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0 1009 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1010 ; AVX512-INFS-NEXT: retq 1011 ; 1012 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1013 ; FMA-NOINFS: # %bb.0: 1014 ; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 1015 ; FMA-NOINFS-NEXT: retq 1016 ; 1017 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1018 ; FMA4-NOINFS: # %bb.0: 1019 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 1020 ; FMA4-NOINFS-NEXT: retq 1021 ; 1022 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1023 ; AVX512-NOINFS: # %bb.0: 1024 ; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 1025 ; AVX512-NOINFS-NEXT: retq 1026 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 1027 %m = fmul <4 x float> %y, %s 1028 ret <4 x float> %m 1029 } 1030 1031 ; 1032 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 1033 ; 1034 1035 define float @test_f32_interp(float %x, float %y, float %t) { 1036 ; FMA-INFS-LABEL: test_f32_interp: 1037 ; FMA-INFS: # %bb.0: 1038 ; FMA-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1039 ; FMA-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1040 ; FMA-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1041 ; FMA-INFS-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 1042 ; FMA-INFS-NEXT: retq 1043 ; 1044 ; FMA4-INFS-LABEL: test_f32_interp: 1045 ; FMA4-INFS: # %bb.0: 1046 ; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1047 ; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1048 ; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1049 ; FMA4-INFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 1050 ; FMA4-INFS-NEXT: retq 1051 ; 1052 ; AVX512-INFS-LABEL: test_f32_interp: 1053 ; AVX512-INFS: # %bb.0: 1054 ; AVX512-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1055 ; AVX512-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1056 ; AVX512-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1057 ; AVX512-INFS-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 1058 ; AVX512-INFS-NEXT: retq 1059 ; 1060 ; FMA-NOINFS-LABEL: test_f32_interp: 1061 ; FMA-NOINFS: # %bb.0: 1062 ; FMA-NOINFS-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 1063 ; FMA-NOINFS-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 1064 ; FMA-NOINFS-NEXT: retq 1065 ; 1066 ; FMA4-NOINFS-LABEL: test_f32_interp: 1067 ; FMA4-NOINFS: # %bb.0: 1068 ; FMA4-NOINFS-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1 1069 ; FMA4-NOINFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 1070 ; FMA4-NOINFS-NEXT: retq 1071 ; 1072 ; AVX512-NOINFS-LABEL: test_f32_interp: 1073 ; AVX512-NOINFS: # %bb.0: 1074 ; AVX512-NOINFS-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 1075 ; AVX512-NOINFS-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 1076 ; AVX512-NOINFS-NEXT: retq 1077 %t1 = fsub float 1.0, %t 1078 %tx = fmul float %x, %t 1079 %ty = fmul float %y, %t1 1080 %r = fadd float %tx, %ty 1081 ret float %r 1082 } 1083 1084 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { 1085 ; FMA-INFS-LABEL: test_v4f32_interp: 1086 ; FMA-INFS: # %bb.0: 1087 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1088 ; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1089 ; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1090 ; FMA-INFS-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 1091 ; FMA-INFS-NEXT: retq 1092 ; 1093 ; FMA4-INFS-LABEL: test_v4f32_interp: 1094 ; FMA4-INFS: # %bb.0: 1095 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1096 ; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1097 ; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1098 ; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 1099 ; FMA4-INFS-NEXT: retq 1100 ; 1101 ; AVX512-INFS-LABEL: test_v4f32_interp: 1102 ; AVX512-INFS: # %bb.0: 1103 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] 1104 ; AVX512-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1105 ; AVX512-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1106 ; AVX512-INFS-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 1107 ; AVX512-INFS-NEXT: retq 1108 ; 1109 ; FMA-NOINFS-LABEL: test_v4f32_interp: 1110 ; FMA-NOINFS: # %bb.0: 1111 ; FMA-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 1112 ; FMA-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 1113 ; FMA-NOINFS-NEXT: retq 1114 ; 1115 ; FMA4-NOINFS-LABEL: test_v4f32_interp: 1116 ; FMA4-NOINFS: # %bb.0: 1117 ; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1 1118 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 1119 ; FMA4-NOINFS-NEXT: retq 1120 ; 1121 ; AVX512-NOINFS-LABEL: test_v4f32_interp: 1122 ; AVX512-NOINFS: # %bb.0: 1123 ; AVX512-NOINFS-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 1124 ; AVX512-NOINFS-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 1125 ; AVX512-NOINFS-NEXT: retq 1126 %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t 1127 %tx = fmul <4 x float> %x, %t 1128 %ty = fmul <4 x float> %y, %t1 1129 %r = fadd <4 x float> %tx, %ty 1130 ret <4 x float> %r 1131 } 1132 1133 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { 1134 ; FMA-INFS-LABEL: test_v8f32_interp: 1135 ; FMA-INFS: # %bb.0: 1136 ; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1137 ; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1138 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1139 ; FMA-INFS-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 1140 ; FMA-INFS-NEXT: retq 1141 ; 1142 ; FMA4-INFS-LABEL: test_v8f32_interp: 1143 ; FMA4-INFS: # %bb.0: 1144 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1145 ; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1146 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1147 ; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 1148 ; FMA4-INFS-NEXT: retq 1149 ; 1150 ; AVX512-INFS-LABEL: test_v8f32_interp: 1151 ; AVX512-INFS: # %bb.0: 1152 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] 1153 ; AVX512-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1154 ; AVX512-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1155 ; AVX512-INFS-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 1156 ; AVX512-INFS-NEXT: retq 1157 ; 1158 ; FMA-NOINFS-LABEL: test_v8f32_interp: 1159 ; FMA-NOINFS: # %bb.0: 1160 ; FMA-NOINFS-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 1161 ; FMA-NOINFS-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 1162 ; FMA-NOINFS-NEXT: retq 1163 ; 1164 ; FMA4-NOINFS-LABEL: test_v8f32_interp: 1165 ; FMA4-NOINFS: # %bb.0: 1166 ; FMA4-NOINFS-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1 1167 ; FMA4-NOINFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 1168 ; FMA4-NOINFS-NEXT: retq 1169 ; 1170 ; AVX512-NOINFS-LABEL: test_v8f32_interp: 1171 ; AVX512-NOINFS: # %bb.0: 1172 ; AVX512-NOINFS-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 1173 ; AVX512-NOINFS-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 1174 ; AVX512-NOINFS-NEXT: retq 1175 %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 1176 %tx = fmul <8 x float> %x, %t 1177 %ty = fmul <8 x float> %y, %t1 1178 %r = fadd <8 x float> %tx, %ty 1179 ret <8 x float> %r 1180 } 1181 1182 define double @test_f64_interp(double %x, double %y, double %t) { 1183 ; FMA-INFS-LABEL: test_f64_interp: 1184 ; FMA-INFS: # %bb.0: 1185 ; FMA-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1186 ; FMA-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1187 ; FMA-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1188 ; FMA-INFS-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 1189 ; FMA-INFS-NEXT: retq 1190 ; 1191 ; FMA4-INFS-LABEL: test_f64_interp: 1192 ; FMA4-INFS: # %bb.0: 1193 ; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1194 ; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1195 ; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1196 ; FMA4-INFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 1197 ; FMA4-INFS-NEXT: retq 1198 ; 1199 ; AVX512-INFS-LABEL: test_f64_interp: 1200 ; AVX512-INFS: # %bb.0: 1201 ; AVX512-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1202 ; AVX512-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1203 ; AVX512-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1204 ; AVX512-INFS-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 1205 ; AVX512-INFS-NEXT: retq 1206 ; 1207 ; FMA-NOINFS-LABEL: test_f64_interp: 1208 ; FMA-NOINFS: # %bb.0: 1209 ; FMA-NOINFS-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 1210 ; FMA-NOINFS-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 1211 ; FMA-NOINFS-NEXT: retq 1212 ; 1213 ; FMA4-NOINFS-LABEL: test_f64_interp: 1214 ; FMA4-NOINFS: # %bb.0: 1215 ; FMA4-NOINFS-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1 1216 ; FMA4-NOINFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 1217 ; FMA4-NOINFS-NEXT: retq 1218 ; 1219 ; AVX512-NOINFS-LABEL: test_f64_interp: 1220 ; AVX512-NOINFS: # %bb.0: 1221 ; AVX512-NOINFS-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 1222 ; AVX512-NOINFS-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 1223 ; AVX512-NOINFS-NEXT: retq 1224 %t1 = fsub double 1.0, %t 1225 %tx = fmul double %x, %t 1226 %ty = fmul double %y, %t1 1227 %r = fadd double %tx, %ty 1228 ret double %r 1229 } 1230 1231 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { 1232 ; FMA-INFS-LABEL: test_v2f64_interp: 1233 ; FMA-INFS: # %bb.0: 1234 ; FMA-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00] 1235 ; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1236 ; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1237 ; FMA-INFS-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 1238 ; FMA-INFS-NEXT: retq 1239 ; 1240 ; FMA4-INFS-LABEL: test_v2f64_interp: 1241 ; FMA4-INFS: # %bb.0: 1242 ; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00] 1243 ; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1244 ; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1245 ; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 1246 ; FMA4-INFS-NEXT: retq 1247 ; 1248 ; AVX512-INFS-LABEL: test_v2f64_interp: 1249 ; AVX512-INFS: # %bb.0: 1250 ; AVX512-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00] 1251 ; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1252 ; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1253 ; AVX512-INFS-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 1254 ; AVX512-INFS-NEXT: retq 1255 ; 1256 ; FMA-NOINFS-LABEL: test_v2f64_interp: 1257 ; FMA-NOINFS: # %bb.0: 1258 ; FMA-NOINFS-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 1259 ; FMA-NOINFS-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 1260 ; FMA-NOINFS-NEXT: retq 1261 ; 1262 ; FMA4-NOINFS-LABEL: test_v2f64_interp: 1263 ; FMA4-NOINFS: # %bb.0: 1264 ; FMA4-NOINFS-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1 1265 ; FMA4-NOINFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 1266 ; FMA4-NOINFS-NEXT: retq 1267 ; 1268 ; AVX512-NOINFS-LABEL: test_v2f64_interp: 1269 ; AVX512-NOINFS: # %bb.0: 1270 ; AVX512-NOINFS-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 1271 ; AVX512-NOINFS-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 1272 ; AVX512-NOINFS-NEXT: retq 1273 %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t 1274 %tx = fmul <2 x double> %x, %t 1275 %ty = fmul <2 x double> %y, %t1 1276 %r = fadd <2 x double> %tx, %ty 1277 ret <2 x double> %r 1278 } 1279 1280 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { 1281 ; FMA-INFS-LABEL: test_v4f64_interp: 1282 ; FMA-INFS: # %bb.0: 1283 ; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1284 ; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1285 ; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1286 ; FMA-INFS-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 1287 ; FMA-INFS-NEXT: retq 1288 ; 1289 ; FMA4-INFS-LABEL: test_v4f64_interp: 1290 ; FMA4-INFS: # %bb.0: 1291 ; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 1292 ; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1293 ; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1294 ; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 1295 ; FMA4-INFS-NEXT: retq 1296 ; 1297 ; AVX512-INFS-LABEL: test_v4f64_interp: 1298 ; AVX512-INFS: # %bb.0: 1299 ; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1,1,1,1] 1300 ; AVX512-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1301 ; AVX512-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1302 ; AVX512-INFS-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 1303 ; AVX512-INFS-NEXT: retq 1304 ; 1305 ; FMA-NOINFS-LABEL: test_v4f64_interp: 1306 ; FMA-NOINFS: # %bb.0: 1307 ; FMA-NOINFS-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 1308 ; FMA-NOINFS-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 1309 ; FMA-NOINFS-NEXT: retq 1310 ; 1311 ; FMA4-NOINFS-LABEL: test_v4f64_interp: 1312 ; FMA4-NOINFS: # %bb.0: 1313 ; FMA4-NOINFS-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1 1314 ; FMA4-NOINFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 1315 ; FMA4-NOINFS-NEXT: retq 1316 ; 1317 ; AVX512-NOINFS-LABEL: test_v4f64_interp: 1318 ; AVX512-NOINFS: # %bb.0: 1319 ; AVX512-NOINFS-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 1320 ; AVX512-NOINFS-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 1321 ; AVX512-NOINFS-NEXT: retq 1322 %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t 1323 %tx = fmul <4 x double> %x, %t 1324 %ty = fmul <4 x double> %y, %t1 1325 %r = fadd <4 x double> %tx, %ty 1326 ret <4 x double> %r 1327 } 1328 1329 ; 1330 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 1331 ; 1332 1333 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1334 ; FMA-LABEL: test_v4f32_fneg_fmadd: 1335 ; FMA: # %bb.0: 1336 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1337 ; FMA-NEXT: retq 1338 ; 1339 ; FMA4-LABEL: test_v4f32_fneg_fmadd: 1340 ; FMA4: # %bb.0: 1341 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 1342 ; FMA4-NEXT: retq 1343 ; 1344 ; AVX512-LABEL: test_v4f32_fneg_fmadd: 1345 ; AVX512: # %bb.0: 1346 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1347 ; AVX512-NEXT: retq 1348 %mul = fmul <4 x float> %a0, %a1 1349 %add = fadd <4 x float> %mul, %a2 1350 %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1351 ret <4 x float> %neg 1352 } 1353 1354 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1355 ; FMA-LABEL: test_v4f64_fneg_fmsub: 1356 ; FMA: # %bb.0: 1357 ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1358 ; FMA-NEXT: retq 1359 ; 1360 ; FMA4-LABEL: test_v4f64_fneg_fmsub: 1361 ; FMA4: # %bb.0: 1362 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1363 ; FMA4-NEXT: retq 1364 ; 1365 ; AVX512-LABEL: test_v4f64_fneg_fmsub: 1366 ; AVX512: # %bb.0: 1367 ; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1368 ; AVX512-NEXT: retq 1369 %mul = fmul <4 x double> %a0, %a1 1370 %sub = fsub <4 x double> %mul, %a2 1371 %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1372 ret <4 x double> %neg 1373 } 1374 1375 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1376 ; FMA-LABEL: test_v4f32_fneg_fnmadd: 1377 ; FMA: # %bb.0: 1378 ; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1379 ; FMA-NEXT: retq 1380 ; 1381 ; FMA4-LABEL: test_v4f32_fneg_fnmadd: 1382 ; FMA4: # %bb.0: 1383 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 1384 ; FMA4-NEXT: retq 1385 ; 1386 ; AVX512-LABEL: test_v4f32_fneg_fnmadd: 1387 ; AVX512: # %bb.0: 1388 ; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1389 ; AVX512-NEXT: retq 1390 %mul = fmul <4 x float> %a0, %a1 1391 %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1392 %add = fadd <4 x float> %neg0, %a2 1393 %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1394 ret <4 x float> %neg1 1395 } 1396 1397 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1398 ; FMA-LABEL: test_v4f64_fneg_fnmsub: 1399 ; FMA: # %bb.0: 1400 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1401 ; FMA-NEXT: retq 1402 ; 1403 ; FMA4-LABEL: test_v4f64_fneg_fnmsub: 1404 ; FMA4: # %bb.0: 1405 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1406 ; FMA4-NEXT: retq 1407 ; 1408 ; AVX512-LABEL: test_v4f64_fneg_fnmsub: 1409 ; AVX512: # %bb.0: 1410 ; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1411 ; AVX512-NEXT: retq 1412 %mul = fmul <4 x double> %a0, %a1 1413 %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1414 %sub = fsub <4 x double> %neg0, %a2 1415 %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1416 ret <4 x double> %neg1 1417 } 1418 1419 ; 1420 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1421 ; 1422 1423 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { 1424 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1425 ; FMA: # %bb.0: 1426 ; FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1427 ; FMA-NEXT: retq 1428 ; 1429 ; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1430 ; FMA4: # %bb.0: 1431 ; FMA4-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1432 ; FMA4-NEXT: retq 1433 ; 1434 ; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1435 ; AVX512: # %bb.0: 1436 ; AVX512-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0 1437 ; AVX512-NEXT: retq 1438 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1439 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0> 1440 %a = fadd <4 x float> %m0, %m1 1441 ret <4 x float> %a 1442 } 1443 1444 ; 1445 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1446 ; 1447 1448 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 { 1449 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1450 ; FMA: # %bb.0: 1451 ; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0 1452 ; FMA-NEXT: retq 1453 ; 1454 ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1455 ; FMA4: # %bb.0: 1456 ; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 1457 ; FMA4-NEXT: retq 1458 ; 1459 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1460 ; AVX512: # %bb.0: 1461 ; AVX512-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0 1462 ; AVX512-NEXT: retq 1463 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1464 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0> 1465 %a = fadd <4 x float> %m1, %y 1466 ret <4 x float> %a 1467 } 1468 1469 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1470 1471 define double @test_f64_fneg_fmul(double %x, double %y) #0 { 1472 ; FMA-LABEL: test_f64_fneg_fmul: 1473 ; FMA: # %bb.0: 1474 ; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1475 ; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 1476 ; FMA-NEXT: retq 1477 ; 1478 ; FMA4-LABEL: test_f64_fneg_fmul: 1479 ; FMA4: # %bb.0: 1480 ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1481 ; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 1482 ; FMA4-NEXT: retq 1483 ; 1484 ; AVX512-LABEL: test_f64_fneg_fmul: 1485 ; AVX512: # %bb.0: 1486 ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1487 ; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 1488 ; AVX512-NEXT: retq 1489 %m = fmul nsz double %x, %y 1490 %n = fsub double -0.0, %m 1491 ret double %n 1492 } 1493 1494 define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { 1495 ; FMA-LABEL: test_v4f32_fneg_fmul: 1496 ; FMA: # %bb.0: 1497 ; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2 1498 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1499 ; FMA-NEXT: retq 1500 ; 1501 ; FMA4-LABEL: test_v4f32_fneg_fmul: 1502 ; FMA4: # %bb.0: 1503 ; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 1504 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 1505 ; FMA4-NEXT: retq 1506 ; 1507 ; AVX512-LABEL: test_v4f32_fneg_fmul: 1508 ; AVX512: # %bb.0: 1509 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1510 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1511 ; AVX512-NEXT: retq 1512 %m = fmul nsz <4 x float> %x, %y 1513 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m 1514 ret <4 x float> %n 1515 } 1516 1517 define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { 1518 ; FMA-LABEL: test_v4f64_fneg_fmul: 1519 ; FMA: # %bb.0: 1520 ; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1521 ; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1522 ; FMA-NEXT: retq 1523 ; 1524 ; FMA4-LABEL: test_v4f64_fneg_fmul: 1525 ; FMA4: # %bb.0: 1526 ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1527 ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 1528 ; FMA4-NEXT: retq 1529 ; 1530 ; AVX512-LABEL: test_v4f64_fneg_fmul: 1531 ; AVX512: # %bb.0: 1532 ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1533 ; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1534 ; AVX512-NEXT: retq 1535 %m = fmul nsz <4 x double> %x, %y 1536 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1537 ret <4 x double> %n 1538 } 1539 1540 define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 { 1541 ; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz: 1542 ; FMA: # %bb.0: 1543 ; FMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1544 ; FMA-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 1545 ; FMA-NEXT: retq 1546 ; 1547 ; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz: 1548 ; FMA4: # %bb.0: 1549 ; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1550 ; FMA4-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 1551 ; FMA4-NEXT: retq 1552 ; 1553 ; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz: 1554 ; AVX512: # %bb.0: 1555 ; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1556 ; AVX512-NEXT: vxorpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 1557 ; AVX512-NEXT: retq 1558 %m = fmul <4 x double> %x, %y 1559 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1560 ret <4 x double> %n 1561 } 1562 1563 attributes #0 = { "unsafe-fp-math"="true" } 1564