1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 6 7 ; 8 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 9 ; 10 11 define float @test_f32_fmadd(float %a0, float %a1, float %a2) { 12 ; FMA-LABEL: test_f32_fmadd: 13 ; FMA: # BB#0: 14 ; FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 15 ; FMA-NEXT: retq 16 ; 17 ; FMA4-LABEL: test_f32_fmadd: 18 ; FMA4: # BB#0: 19 ; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 20 ; FMA4-NEXT: retq 21 ; 22 ; AVX512-LABEL: test_f32_fmadd: 23 ; AVX512: # BB#0: 24 ; AVX512-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 25 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 26 ; AVX512-NEXT: retq 27 %x = fmul float %a0, %a1 28 %res = fadd float %x, %a2 29 ret float %res 30 } 31 32 define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 33 ; FMA-LABEL: test_4f32_fmadd: 34 ; FMA: # BB#0: 35 ; FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 36 ; FMA-NEXT: retq 37 ; 38 ; FMA4-LABEL: test_4f32_fmadd: 39 ; FMA4: # BB#0: 40 ; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 41 ; FMA4-NEXT: retq 42 ; 43 ; AVX512-LABEL: test_4f32_fmadd: 44 ; AVX512: # BB#0: 45 ; AVX512-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 46 ; AVX512-NEXT: retq 47 %x = fmul <4 x float> %a0, %a1 48 %res = fadd <4 x float> %x, %a2 49 ret <4 x float> %res 50 } 51 52 define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 53 ; FMA-LABEL: test_8f32_fmadd: 54 ; FMA: # BB#0: 55 ; FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 56 ; FMA-NEXT: retq 57 ; 58 ; FMA4-LABEL: test_8f32_fmadd: 59 ; FMA4: # BB#0: 60 ; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 61 ; FMA4-NEXT: retq 62 ; 63 ; AVX512-LABEL: test_8f32_fmadd: 64 ; AVX512: # BB#0: 65 ; AVX512-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 66 ; AVX512-NEXT: retq 67 %x = fmul <8 x float> %a0, %a1 68 %res = fadd <8 x float> %x, %a2 69 ret <8 x float> %res 70 } 71 72 define double @test_f64_fmadd(double %a0, double %a1, double %a2) { 73 ; FMA-LABEL: test_f64_fmadd: 74 ; FMA: # BB#0: 75 ; FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 76 ; FMA-NEXT: retq 77 ; 78 ; FMA4-LABEL: test_f64_fmadd: 79 ; FMA4: # BB#0: 80 ; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 81 ; FMA4-NEXT: retq 82 ; 83 ; AVX512-LABEL: test_f64_fmadd: 84 ; AVX512: # BB#0: 85 ; AVX512-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 86 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 87 ; AVX512-NEXT: retq 88 %x = fmul double %a0, %a1 89 %res = fadd double %x, %a2 90 ret double %res 91 } 92 93 define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 94 ; FMA-LABEL: test_2f64_fmadd: 95 ; FMA: # BB#0: 96 ; FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 97 ; FMA-NEXT: retq 98 ; 99 ; FMA4-LABEL: test_2f64_fmadd: 100 ; FMA4: # BB#0: 101 ; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 102 ; FMA4-NEXT: retq 103 ; 104 ; AVX512-LABEL: test_2f64_fmadd: 105 ; AVX512: # BB#0: 106 ; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 107 ; AVX512-NEXT: retq 108 %x = fmul <2 x double> %a0, %a1 109 %res = fadd <2 x double> %x, %a2 110 ret <2 x double> %res 111 } 112 113 define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 114 ; FMA-LABEL: test_4f64_fmadd: 115 ; FMA: # BB#0: 116 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 117 ; FMA-NEXT: retq 118 ; 119 ; FMA4-LABEL: test_4f64_fmadd: 120 ; FMA4: # BB#0: 121 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 122 ; FMA4-NEXT: retq 123 ; 124 ; AVX512-LABEL: test_4f64_fmadd: 125 ; AVX512: # BB#0: 126 ; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 127 ; AVX512-NEXT: retq 128 %x = fmul <4 x double> %a0, %a1 129 %res = fadd <4 x double> %x, %a2 130 ret <4 x double> %res 131 } 132 133 ; 134 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 135 ; 136 137 define float @test_f32_fmsub(float %a0, float %a1, float %a2) { 138 ; FMA-LABEL: test_f32_fmsub: 139 ; FMA: # BB#0: 140 ; FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 141 ; FMA-NEXT: retq 142 ; 143 ; FMA4-LABEL: test_f32_fmsub: 144 ; FMA4: # BB#0: 145 ; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 146 ; FMA4-NEXT: retq 147 ; 148 ; AVX512-LABEL: test_f32_fmsub: 149 ; AVX512: # BB#0: 150 ; AVX512-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 151 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 152 ; AVX512-NEXT: retq 153 %x = fmul float %a0, %a1 154 %res = fsub float %x, %a2 155 ret float %res 156 } 157 158 define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 159 ; FMA-LABEL: test_4f32_fmsub: 160 ; FMA: # BB#0: 161 ; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 162 ; FMA-NEXT: retq 163 ; 164 ; FMA4-LABEL: test_4f32_fmsub: 165 ; FMA4: # BB#0: 166 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 167 ; FMA4-NEXT: retq 168 ; 169 ; AVX512-LABEL: test_4f32_fmsub: 170 ; AVX512: # BB#0: 171 ; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 172 ; AVX512-NEXT: retq 173 %x = fmul <4 x float> %a0, %a1 174 %res = fsub <4 x float> %x, %a2 175 ret <4 x float> %res 176 } 177 178 define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 179 ; FMA-LABEL: test_8f32_fmsub: 180 ; FMA: # BB#0: 181 ; FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 182 ; FMA-NEXT: retq 183 ; 184 ; FMA4-LABEL: test_8f32_fmsub: 185 ; FMA4: # BB#0: 186 ; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 187 ; FMA4-NEXT: retq 188 ; 189 ; AVX512-LABEL: test_8f32_fmsub: 190 ; AVX512: # BB#0: 191 ; AVX512-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 192 ; AVX512-NEXT: retq 193 %x = fmul <8 x float> %a0, %a1 194 %res = fsub <8 x float> %x, %a2 195 ret <8 x float> %res 196 } 197 198 define double @test_f64_fmsub(double %a0, double %a1, double %a2) { 199 ; FMA-LABEL: test_f64_fmsub: 200 ; FMA: # BB#0: 201 ; FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 202 ; FMA-NEXT: retq 203 ; 204 ; FMA4-LABEL: test_f64_fmsub: 205 ; FMA4: # BB#0: 206 ; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 207 ; FMA4-NEXT: retq 208 ; 209 ; AVX512-LABEL: test_f64_fmsub: 210 ; AVX512: # BB#0: 211 ; AVX512-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 212 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 213 ; AVX512-NEXT: retq 214 %x = fmul double %a0, %a1 215 %res = fsub double %x, %a2 216 ret double %res 217 } 218 219 define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 220 ; FMA-LABEL: test_2f64_fmsub: 221 ; FMA: # BB#0: 222 ; FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 223 ; FMA-NEXT: retq 224 ; 225 ; FMA4-LABEL: test_2f64_fmsub: 226 ; FMA4: # BB#0: 227 ; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 228 ; FMA4-NEXT: retq 229 ; 230 ; AVX512-LABEL: test_2f64_fmsub: 231 ; AVX512: # BB#0: 232 ; AVX512-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 233 ; AVX512-NEXT: retq 234 %x = fmul <2 x double> %a0, %a1 235 %res = fsub <2 x double> %x, %a2 236 ret <2 x double> %res 237 } 238 239 define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 240 ; FMA-LABEL: test_4f64_fmsub: 241 ; FMA: # BB#0: 242 ; FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 243 ; FMA-NEXT: retq 244 ; 245 ; FMA4-LABEL: test_4f64_fmsub: 246 ; FMA4: # BB#0: 247 ; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 248 ; FMA4-NEXT: retq 249 ; 250 ; AVX512-LABEL: test_4f64_fmsub: 251 ; AVX512: # BB#0: 252 ; AVX512-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 253 ; AVX512-NEXT: retq 254 %x = fmul <4 x double> %a0, %a1 255 %res = fsub <4 x double> %x, %a2 256 ret <4 x double> %res 257 } 258 259 ; 260 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 261 ; 262 263 define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { 264 ; FMA-LABEL: test_f32_fnmadd: 265 ; FMA: # BB#0: 266 ; FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 267 ; FMA-NEXT: retq 268 ; 269 ; FMA4-LABEL: test_f32_fnmadd: 270 ; FMA4: # BB#0: 271 ; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 272 ; FMA4-NEXT: retq 273 ; 274 ; AVX512-LABEL: test_f32_fnmadd: 275 ; AVX512: # BB#0: 276 ; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 277 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 278 ; AVX512-NEXT: retq 279 %x = fmul float %a0, %a1 280 %res = fsub float %a2, %x 281 ret float %res 282 } 283 284 define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 285 ; FMA-LABEL: test_4f32_fnmadd: 286 ; FMA: # BB#0: 287 ; FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 288 ; FMA-NEXT: retq 289 ; 290 ; FMA4-LABEL: test_4f32_fnmadd: 291 ; FMA4: # BB#0: 292 ; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 293 ; FMA4-NEXT: retq 294 ; 295 ; AVX512-LABEL: test_4f32_fnmadd: 296 ; AVX512: # BB#0: 297 ; AVX512-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 298 ; AVX512-NEXT: retq 299 %x = fmul <4 x float> %a0, %a1 300 %res = fsub <4 x float> %a2, %x 301 ret <4 x float> %res 302 } 303 304 define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 305 ; FMA-LABEL: test_8f32_fnmadd: 306 ; FMA: # BB#0: 307 ; FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 308 ; FMA-NEXT: retq 309 ; 310 ; FMA4-LABEL: test_8f32_fnmadd: 311 ; FMA4: # BB#0: 312 ; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 313 ; FMA4-NEXT: retq 314 ; 315 ; AVX512-LABEL: test_8f32_fnmadd: 316 ; AVX512: # BB#0: 317 ; AVX512-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 318 ; AVX512-NEXT: retq 319 %x = fmul <8 x float> %a0, %a1 320 %res = fsub <8 x float> %a2, %x 321 ret <8 x float> %res 322 } 323 324 define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { 325 ; FMA-LABEL: test_f64_fnmadd: 326 ; FMA: # BB#0: 327 ; FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 328 ; FMA-NEXT: retq 329 ; 330 ; FMA4-LABEL: test_f64_fnmadd: 331 ; FMA4: # BB#0: 332 ; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 333 ; FMA4-NEXT: retq 334 ; 335 ; AVX512-LABEL: test_f64_fnmadd: 336 ; AVX512: # BB#0: 337 ; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 338 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 339 ; AVX512-NEXT: retq 340 %x = fmul double %a0, %a1 341 %res = fsub double %a2, %x 342 ret double %res 343 } 344 345 define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 346 ; FMA-LABEL: test_2f64_fnmadd: 347 ; FMA: # BB#0: 348 ; FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 349 ; FMA-NEXT: retq 350 ; 351 ; FMA4-LABEL: test_2f64_fnmadd: 352 ; FMA4: # BB#0: 353 ; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 354 ; FMA4-NEXT: retq 355 ; 356 ; AVX512-LABEL: test_2f64_fnmadd: 357 ; AVX512: # BB#0: 358 ; AVX512-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 359 ; AVX512-NEXT: retq 360 %x = fmul <2 x double> %a0, %a1 361 %res = fsub <2 x double> %a2, %x 362 ret <2 x double> %res 363 } 364 365 define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 366 ; FMA-LABEL: test_4f64_fnmadd: 367 ; FMA: # BB#0: 368 ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 369 ; FMA-NEXT: retq 370 ; 371 ; FMA4-LABEL: test_4f64_fnmadd: 372 ; FMA4: # BB#0: 373 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 374 ; FMA4-NEXT: retq 375 ; 376 ; AVX512-LABEL: test_4f64_fnmadd: 377 ; AVX512: # BB#0: 378 ; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 379 ; AVX512-NEXT: retq 380 %x = fmul <4 x double> %a0, %a1 381 %res = fsub <4 x double> %a2, %x 382 ret <4 x double> %res 383 } 384 385 ; 386 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 387 ; 388 389 define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { 390 ; FMA-LABEL: test_f32_fnmsub: 391 ; FMA: # BB#0: 392 ; FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 393 ; FMA-NEXT: retq 394 ; 395 ; FMA4-LABEL: test_f32_fnmsub: 396 ; FMA4: # BB#0: 397 ; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 398 ; FMA4-NEXT: retq 399 ; 400 ; AVX512-LABEL: test_f32_fnmsub: 401 ; AVX512: # BB#0: 402 ; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 403 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 404 ; AVX512-NEXT: retq 405 %x = fmul float %a0, %a1 406 %y = fsub float -0.000000e+00, %x 407 %res = fsub float %y, %a2 408 ret float %res 409 } 410 411 define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 412 ; FMA-LABEL: test_4f32_fnmsub: 413 ; FMA: # BB#0: 414 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 415 ; FMA-NEXT: retq 416 ; 417 ; FMA4-LABEL: test_4f32_fnmsub: 418 ; FMA4: # BB#0: 419 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 420 ; FMA4-NEXT: retq 421 ; 422 ; AVX512-LABEL: test_4f32_fnmsub: 423 ; AVX512: # BB#0: 424 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 425 ; AVX512-NEXT: retq 426 %x = fmul <4 x float> %a0, %a1 427 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 428 %res = fsub <4 x float> %y, %a2 429 ret <4 x float> %res 430 } 431 432 define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 433 ; FMA-LABEL: test_8f32_fnmsub: 434 ; FMA: # BB#0: 435 ; FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 436 ; FMA-NEXT: retq 437 ; 438 ; FMA4-LABEL: test_8f32_fnmsub: 439 ; FMA4: # BB#0: 440 ; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 441 ; FMA4-NEXT: retq 442 ; 443 ; AVX512-LABEL: test_8f32_fnmsub: 444 ; AVX512: # BB#0: 445 ; AVX512-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 446 ; AVX512-NEXT: retq 447 %x = fmul <8 x float> %a0, %a1 448 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 449 %res = fsub <8 x float> %y, %a2 450 ret <8 x float> %res 451 } 452 453 define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { 454 ; FMA-LABEL: test_f64_fnmsub: 455 ; FMA: # BB#0: 456 ; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 457 ; FMA-NEXT: retq 458 ; 459 ; FMA4-LABEL: test_f64_fnmsub: 460 ; FMA4: # BB#0: 461 ; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 462 ; FMA4-NEXT: retq 463 ; 464 ; AVX512-LABEL: test_f64_fnmsub: 465 ; AVX512: # BB#0: 466 ; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 467 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 468 ; AVX512-NEXT: retq 469 %x = fmul double %a0, %a1 470 %y = fsub double -0.000000e+00, %x 471 %res = fsub double %y, %a2 472 ret double %res 473 } 474 475 define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 476 ; FMA-LABEL: test_2f64_fnmsub: 477 ; FMA: # BB#0: 478 ; FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 479 ; FMA-NEXT: retq 480 ; 481 ; FMA4-LABEL: test_2f64_fnmsub: 482 ; FMA4: # BB#0: 483 ; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 484 ; FMA4-NEXT: retq 485 ; 486 ; AVX512-LABEL: test_2f64_fnmsub: 487 ; AVX512: # BB#0: 488 ; AVX512-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 489 ; AVX512-NEXT: retq 490 %x = fmul <2 x double> %a0, %a1 491 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x 492 %res = fsub <2 x double> %y, %a2 493 ret <2 x double> %res 494 } 495 496 define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 497 ; FMA-LABEL: test_4f64_fnmsub: 498 ; FMA: # BB#0: 499 ; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 500 ; FMA-NEXT: retq 501 ; 502 ; FMA4-LABEL: test_4f64_fnmsub: 503 ; FMA4: # BB#0: 504 ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 505 ; FMA4-NEXT: retq 506 ; 507 ; AVX512-LABEL: test_4f64_fnmsub: 508 ; AVX512: # BB#0: 509 ; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 510 ; AVX512-NEXT: retq 511 %x = fmul <4 x double> %a0, %a1 512 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 513 %res = fsub <4 x double> %y, %a2 514 ret <4 x double> %res 515 } 516 517 ; 518 ; Load Folding Patterns 519 ; 520 521 define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { 522 ; FMA-LABEL: test_4f32_fmadd_load: 523 ; FMA: # BB#0: 524 ; FMA-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 525 ; FMA-NEXT: retq 526 ; 527 ; FMA4-LABEL: test_4f32_fmadd_load: 528 ; FMA4: # BB#0: 529 ; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 530 ; FMA4-NEXT: retq 531 ; 532 ; AVX512-LABEL: test_4f32_fmadd_load: 533 ; AVX512: # BB#0: 534 ; AVX512-NEXT: vmovaps (%rdi), %xmm2 535 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm0, %xmm2 536 ; AVX512-NEXT: vmovaps %zmm2, %zmm0 537 ; AVX512-NEXT: retq 538 %x = load <4 x float>, <4 x float>* %a0 539 %y = fmul <4 x float> %x, %a1 540 %res = fadd <4 x float> %y, %a2 541 ret <4 x float> %res 542 } 543 544 define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) { 545 ; FMA-LABEL: test_2f64_fmsub_load: 546 ; FMA: # BB#0: 547 ; FMA-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0 548 ; FMA-NEXT: retq 549 ; 550 ; FMA4-LABEL: test_2f64_fmsub_load: 551 ; FMA4: # BB#0: 552 ; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 553 ; FMA4-NEXT: retq 554 ; 555 ; AVX512-LABEL: test_2f64_fmsub_load: 556 ; AVX512: # BB#0: 557 ; AVX512-NEXT: vmovapd (%rdi), %xmm2 558 ; AVX512-NEXT: vfmsub213pd %xmm1, %xmm0, %xmm2 559 ; AVX512-NEXT: vmovaps %zmm2, %zmm0 560 ; AVX512-NEXT: retq 561 %x = load <2 x double>, <2 x double>* %a0 562 %y = fmul <2 x double> %x, %a1 563 %res = fsub <2 x double> %y, %a2 564 ret <2 x double> %res 565 } 566 567 ; 568 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 569 ; 570 571 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { 572 ; FMA-LABEL: test_v4f32_mul_add_x_one_y: 573 ; FMA: # BB#0: 574 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 575 ; FMA-NEXT: retq 576 ; 577 ; FMA4-LABEL: test_v4f32_mul_add_x_one_y: 578 ; FMA4: # BB#0: 579 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 580 ; FMA4-NEXT: retq 581 ; 582 ; AVX512-LABEL: test_v4f32_mul_add_x_one_y: 583 ; AVX512: # BB#0: 584 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 585 ; AVX512-NEXT: retq 586 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 587 %m = fmul <4 x float> %a, %y 588 ret <4 x float> %m 589 } 590 591 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { 592 ; FMA-LABEL: test_v4f32_mul_y_add_x_one: 593 ; FMA: # BB#0: 594 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 595 ; FMA-NEXT: retq 596 ; 597 ; FMA4-LABEL: test_v4f32_mul_y_add_x_one: 598 ; FMA4: # BB#0: 599 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 600 ; FMA4-NEXT: retq 601 ; 602 ; AVX512-LABEL: test_v4f32_mul_y_add_x_one: 603 ; AVX512: # BB#0: 604 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 605 ; AVX512-NEXT: retq 606 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 607 %m = fmul <4 x float> %y, %a 608 ret <4 x float> %m 609 } 610 611 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { 612 ; FMA-LABEL: test_v4f32_mul_add_x_negone_y: 613 ; FMA: # BB#0: 614 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 615 ; FMA-NEXT: retq 616 ; 617 ; FMA4-LABEL: test_v4f32_mul_add_x_negone_y: 618 ; FMA4: # BB#0: 619 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 620 ; FMA4-NEXT: retq 621 ; 622 ; AVX512-LABEL: test_v4f32_mul_add_x_negone_y: 623 ; AVX512: # BB#0: 624 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 625 ; AVX512-NEXT: retq 626 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 627 %m = fmul <4 x float> %a, %y 628 ret <4 x float> %m 629 } 630 631 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { 632 ; FMA-LABEL: test_v4f32_mul_y_add_x_negone: 633 ; FMA: # BB#0: 634 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 635 ; FMA-NEXT: retq 636 ; 637 ; FMA4-LABEL: test_v4f32_mul_y_add_x_negone: 638 ; FMA4: # BB#0: 639 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 640 ; FMA4-NEXT: retq 641 ; 642 ; AVX512-LABEL: test_v4f32_mul_y_add_x_negone: 643 ; AVX512: # BB#0: 644 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 645 ; AVX512-NEXT: retq 646 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 647 %m = fmul <4 x float> %y, %a 648 ret <4 x float> %m 649 } 650 651 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { 652 ; FMA-LABEL: test_v4f32_mul_sub_one_x_y: 653 ; FMA: # BB#0: 654 ; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 655 ; FMA-NEXT: retq 656 ; 657 ; FMA4-LABEL: test_v4f32_mul_sub_one_x_y: 658 ; FMA4: # BB#0: 659 ; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 660 ; FMA4-NEXT: retq 661 ; 662 ; AVX512-LABEL: test_v4f32_mul_sub_one_x_y: 663 ; AVX512: # BB#0: 664 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 665 ; AVX512-NEXT: retq 666 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 667 %m = fmul <4 x float> %s, %y 668 ret <4 x float> %m 669 } 670 671 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { 672 ; FMA-LABEL: test_v4f32_mul_y_sub_one_x: 673 ; FMA: # BB#0: 674 ; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 675 ; FMA-NEXT: retq 676 ; 677 ; FMA4-LABEL: test_v4f32_mul_y_sub_one_x: 678 ; FMA4: # BB#0: 679 ; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 680 ; FMA4-NEXT: retq 681 ; 682 ; AVX512-LABEL: test_v4f32_mul_y_sub_one_x: 683 ; AVX512: # BB#0: 684 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 685 ; AVX512-NEXT: retq 686 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 687 %m = fmul <4 x float> %y, %s 688 ret <4 x float> %m 689 } 690 691 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { 692 ; FMA-LABEL: test_v4f32_mul_sub_negone_x_y: 693 ; FMA: # BB#0: 694 ; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 695 ; FMA-NEXT: retq 696 ; 697 ; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y: 698 ; FMA4: # BB#0: 699 ; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 700 ; FMA4-NEXT: retq 701 ; 702 ; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y: 703 ; AVX512: # BB#0: 704 ; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 705 ; AVX512-NEXT: retq 706 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 707 %m = fmul <4 x float> %s, %y 708 ret <4 x float> %m 709 } 710 711 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { 712 ; FMA-LABEL: test_v4f32_mul_y_sub_negone_x: 713 ; FMA: # BB#0: 714 ; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 715 ; FMA-NEXT: retq 716 ; 717 ; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x: 718 ; FMA4: # BB#0: 719 ; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 720 ; FMA4-NEXT: retq 721 ; 722 ; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x: 723 ; AVX512: # BB#0: 724 ; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 725 ; AVX512-NEXT: retq 726 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 727 %m = fmul <4 x float> %y, %s 728 ret <4 x float> %m 729 } 730 731 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { 732 ; FMA-LABEL: test_v4f32_mul_sub_x_one_y: 733 ; FMA: # BB#0: 734 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 735 ; FMA-NEXT: retq 736 ; 737 ; FMA4-LABEL: test_v4f32_mul_sub_x_one_y: 738 ; FMA4: # BB#0: 739 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 740 ; FMA4-NEXT: retq 741 ; 742 ; AVX512-LABEL: test_v4f32_mul_sub_x_one_y: 743 ; AVX512: # BB#0: 744 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 745 ; AVX512-NEXT: retq 746 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 747 %m = fmul <4 x float> %s, %y 748 ret <4 x float> %m 749 } 750 751 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { 752 ; FMA-LABEL: test_v4f32_mul_y_sub_x_one: 753 ; FMA: # BB#0: 754 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 755 ; FMA-NEXT: retq 756 ; 757 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_one: 758 ; FMA4: # BB#0: 759 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 760 ; FMA4-NEXT: retq 761 ; 762 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_one: 763 ; AVX512: # BB#0: 764 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 765 ; AVX512-NEXT: retq 766 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 767 %m = fmul <4 x float> %y, %s 768 ret <4 x float> %m 769 } 770 771 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { 772 ; FMA-LABEL: test_v4f32_mul_sub_x_negone_y: 773 ; FMA: # BB#0: 774 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 775 ; FMA-NEXT: retq 776 ; 777 ; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y: 778 ; FMA4: # BB#0: 779 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 780 ; FMA4-NEXT: retq 781 ; 782 ; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y: 783 ; AVX512: # BB#0: 784 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 785 ; AVX512-NEXT: retq 786 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 787 %m = fmul <4 x float> %s, %y 788 ret <4 x float> %m 789 } 790 791 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { 792 ; FMA-LABEL: test_v4f32_mul_y_sub_x_negone: 793 ; FMA: # BB#0: 794 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 795 ; FMA-NEXT: retq 796 ; 797 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone: 798 ; FMA4: # BB#0: 799 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 800 ; FMA4-NEXT: retq 801 ; 802 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone: 803 ; AVX512: # BB#0: 804 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 805 ; AVX512-NEXT: retq 806 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 807 %m = fmul <4 x float> %y, %s 808 ret <4 x float> %m 809 } 810 811 ; 812 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 813 ; 814 815 define float @test_f32_interp(float %x, float %y, float %t) { 816 ; FMA-LABEL: test_f32_interp: 817 ; FMA: # BB#0: 818 ; FMA-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 819 ; FMA-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 820 ; FMA-NEXT: retq 821 ; 822 ; FMA4-LABEL: test_f32_interp: 823 ; FMA4: # BB#0: 824 ; FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1 825 ; FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 826 ; FMA4-NEXT: retq 827 ; 828 ; AVX512-LABEL: test_f32_interp: 829 ; AVX512: # BB#0: 830 ; AVX512-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 831 ; AVX512-NEXT: vfmadd213ss %xmm1, %xmm0, %xmm2 832 ; AVX512-NEXT: vmovaps %zmm2, %zmm0 833 ; AVX512-NEXT: retq 834 %t1 = fsub float 1.0, %t 835 %tx = fmul float %x, %t 836 %ty = fmul float %y, %t1 837 %r = fadd float %tx, %ty 838 ret float %r 839 } 840 841 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { 842 ; FMA-LABEL: test_v4f32_interp: 843 ; FMA: # BB#0: 844 ; FMA-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 845 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 846 ; FMA-NEXT: retq 847 ; 848 ; FMA4-LABEL: test_v4f32_interp: 849 ; FMA4: # BB#0: 850 ; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1 851 ; FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 852 ; FMA4-NEXT: retq 853 ; 854 ; AVX512-LABEL: test_v4f32_interp: 855 ; AVX512: # BB#0: 856 ; AVX512-NEXT: vmovaps %zmm2, %zmm3 857 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm3 858 ; AVX512-NEXT: vfmadd213ps %xmm3, %xmm2, %xmm0 859 ; AVX512-NEXT: retq 860 %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t 861 %tx = fmul <4 x float> %x, %t 862 %ty = fmul <4 x float> %y, %t1 863 %r = fadd <4 x float> %tx, %ty 864 ret <4 x float> %r 865 } 866 867 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { 868 ; FMA-LABEL: test_v8f32_interp: 869 ; FMA: # BB#0: 870 ; FMA-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 871 ; FMA-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 872 ; FMA-NEXT: retq 873 ; 874 ; FMA4-LABEL: test_v8f32_interp: 875 ; FMA4: # BB#0: 876 ; FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1 877 ; FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 878 ; FMA4-NEXT: retq 879 ; 880 ; AVX512-LABEL: test_v8f32_interp: 881 ; AVX512: # BB#0: 882 ; AVX512-NEXT: vmovaps %zmm2, %zmm3 883 ; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm1, %ymm3 884 ; AVX512-NEXT: vfmadd213ps %ymm3, %ymm2, %ymm0 885 ; AVX512-NEXT: retq 886 %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 887 %tx = fmul <8 x float> %x, %t 888 %ty = fmul <8 x float> %y, %t1 889 %r = fadd <8 x float> %tx, %ty 890 ret <8 x float> %r 891 } 892 893 define double @test_f64_interp(double %x, double %y, double %t) { 894 ; FMA-LABEL: test_f64_interp: 895 ; FMA: # BB#0: 896 ; FMA-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 897 ; FMA-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 898 ; FMA-NEXT: retq 899 ; 900 ; FMA4-LABEL: test_f64_interp: 901 ; FMA4: # BB#0: 902 ; FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1 903 ; FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 904 ; FMA4-NEXT: retq 905 ; 906 ; AVX512-LABEL: test_f64_interp: 907 ; AVX512: # BB#0: 908 ; AVX512-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 909 ; AVX512-NEXT: vfmadd213sd %xmm1, %xmm0, %xmm2 910 ; AVX512-NEXT: vmovaps %zmm2, %zmm0 911 ; AVX512-NEXT: retq 912 %t1 = fsub double 1.0, %t 913 %tx = fmul double %x, %t 914 %ty = fmul double %y, %t1 915 %r = fadd double %tx, %ty 916 ret double %r 917 } 918 919 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { 920 ; FMA-LABEL: test_v2f64_interp: 921 ; FMA: # BB#0: 922 ; FMA-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 923 ; FMA-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 924 ; FMA-NEXT: retq 925 ; 926 ; FMA4-LABEL: test_v2f64_interp: 927 ; FMA4: # BB#0: 928 ; FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1 929 ; FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 930 ; FMA4-NEXT: retq 931 ; 932 ; AVX512-LABEL: test_v2f64_interp: 933 ; AVX512: # BB#0: 934 ; AVX512-NEXT: vmovaps %zmm2, %zmm3 935 ; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm1, %xmm3 936 ; AVX512-NEXT: vfmadd213pd %xmm3, %xmm2, %xmm0 937 ; AVX512-NEXT: retq 938 %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t 939 %tx = fmul <2 x double> %x, %t 940 %ty = fmul <2 x double> %y, %t1 941 %r = fadd <2 x double> %tx, %ty 942 ret <2 x double> %r 943 } 944 945 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { 946 ; FMA-LABEL: test_v4f64_interp: 947 ; FMA: # BB#0: 948 ; FMA-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 949 ; FMA-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 950 ; FMA-NEXT: retq 951 ; 952 ; FMA4-LABEL: test_v4f64_interp: 953 ; FMA4: # BB#0: 954 ; FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1 955 ; FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 956 ; FMA4-NEXT: retq 957 ; 958 ; AVX512-LABEL: test_v4f64_interp: 959 ; AVX512: # BB#0: 960 ; AVX512-NEXT: vmovaps %zmm2, %zmm3 961 ; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm1, %ymm3 962 ; AVX512-NEXT: vfmadd213pd %ymm3, %ymm2, %ymm0 963 ; AVX512-NEXT: retq 964 %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t 965 %tx = fmul <4 x double> %x, %t 966 %ty = fmul <4 x double> %y, %t1 967 %r = fadd <4 x double> %tx, %ty 968 ret <4 x double> %r 969 } 970 971 ; 972 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 973 ; 974 975 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 976 ; FMA-LABEL: test_v4f32_fneg_fmadd: 977 ; FMA: # BB#0: 978 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 979 ; FMA-NEXT: retq 980 ; 981 ; FMA4-LABEL: test_v4f32_fneg_fmadd: 982 ; FMA4: # BB#0: 983 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 984 ; FMA4-NEXT: retq 985 ; 986 ; AVX512-LABEL: test_v4f32_fneg_fmadd: 987 ; AVX512: # BB#0: 988 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 989 ; AVX512-NEXT: retq 990 %mul = fmul <4 x float> %a0, %a1 991 %add = fadd <4 x float> %mul, %a2 992 %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 993 ret <4 x float> %neg 994 } 995 996 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 997 ; FMA-LABEL: test_v4f64_fneg_fmsub: 998 ; FMA: # BB#0: 999 ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1000 ; FMA-NEXT: retq 1001 ; 1002 ; FMA4-LABEL: test_v4f64_fneg_fmsub: 1003 ; FMA4: # BB#0: 1004 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1005 ; FMA4-NEXT: retq 1006 ; 1007 ; AVX512-LABEL: test_v4f64_fneg_fmsub: 1008 ; AVX512: # BB#0: 1009 ; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1010 ; AVX512-NEXT: retq 1011 %mul = fmul <4 x double> %a0, %a1 1012 %sub = fsub <4 x double> %mul, %a2 1013 %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1014 ret <4 x double> %neg 1015 } 1016 1017 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1018 ; FMA-LABEL: test_v4f32_fneg_fnmadd: 1019 ; FMA: # BB#0: 1020 ; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1021 ; FMA-NEXT: retq 1022 ; 1023 ; FMA4-LABEL: test_v4f32_fneg_fnmadd: 1024 ; FMA4: # BB#0: 1025 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 1026 ; FMA4-NEXT: retq 1027 ; 1028 ; AVX512-LABEL: test_v4f32_fneg_fnmadd: 1029 ; AVX512: # BB#0: 1030 ; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1031 ; AVX512-NEXT: retq 1032 %mul = fmul <4 x float> %a0, %a1 1033 %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1034 %add = fadd <4 x float> %neg0, %a2 1035 %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1036 ret <4 x float> %neg1 1037 } 1038 1039 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1040 ; FMA-LABEL: test_v4f64_fneg_fnmsub: 1041 ; FMA: # BB#0: 1042 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1043 ; FMA-NEXT: retq 1044 ; 1045 ; FMA4-LABEL: test_v4f64_fneg_fnmsub: 1046 ; FMA4: # BB#0: 1047 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1048 ; FMA4-NEXT: retq 1049 ; 1050 ; AVX512-LABEL: test_v4f64_fneg_fnmsub: 1051 ; AVX512: # BB#0: 1052 ; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1053 ; AVX512-NEXT: retq 1054 %mul = fmul <4 x double> %a0, %a1 1055 %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1056 %sub = fsub <4 x double> %neg0, %a2 1057 %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1058 ret <4 x double> %neg1 1059 } 1060 1061 ; 1062 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1063 ; 1064 1065 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { 1066 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1067 ; FMA: # BB#0: 1068 ; FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1069 ; FMA-NEXT: retq 1070 ; 1071 ; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1072 ; FMA4: # BB#0: 1073 ; FMA4-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1074 ; FMA4-NEXT: retq 1075 ; 1076 ; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1077 ; AVX512: # BB#0: 1078 ; AVX512-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0 1079 ; AVX512-NEXT: retq 1080 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1081 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0> 1082 %a = fadd <4 x float> %m0, %m1 1083 ret <4 x float> %a 1084 } 1085 1086 ; 1087 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1088 ; 1089 1090 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 { 1091 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1092 ; FMA: # BB#0: 1093 ; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0 1094 ; FMA-NEXT: retq 1095 ; 1096 ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1097 ; FMA4: # BB#0: 1098 ; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 1099 ; FMA4-NEXT: retq 1100 ; 1101 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1102 ; AVX512: # BB#0: 1103 ; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1 1104 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 1105 ; AVX512-NEXT: retq 1106 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1107 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0> 1108 %a = fadd <4 x float> %m1, %y 1109 ret <4 x float> %a 1110 } 1111 1112 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1113 1114 define double @test_f64_fneg_fmul(double %x, double %y) #0 { 1115 ; FMA-LABEL: test_f64_fneg_fmul: 1116 ; FMA: # BB#0: 1117 ; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1118 ; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 1119 ; FMA-NEXT: retq 1120 ; 1121 ; FMA4-LABEL: test_f64_fneg_fmul: 1122 ; FMA4: # BB#0: 1123 ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1124 ; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 1125 ; FMA4-NEXT: retq 1126 ; 1127 ; AVX512-LABEL: test_f64_fneg_fmul: 1128 ; AVX512: # BB#0: 1129 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1130 ; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 1131 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 1132 ; AVX512-NEXT: retq 1133 %m = fmul nsz double %x, %y 1134 %n = fsub double -0.0, %m 1135 ret double %n 1136 } 1137 1138 define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { 1139 ; FMA-LABEL: test_v4f32_fneg_fmul: 1140 ; FMA: # BB#0: 1141 ; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2 1142 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1143 ; FMA-NEXT: retq 1144 ; 1145 ; FMA4-LABEL: test_v4f32_fneg_fmul: 1146 ; FMA4: # BB#0: 1147 ; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 1148 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 1149 ; FMA4-NEXT: retq 1150 ; 1151 ; AVX512-LABEL: test_v4f32_fneg_fmul: 1152 ; AVX512: # BB#0: 1153 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1154 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1155 ; AVX512-NEXT: retq 1156 %m = fmul nsz <4 x float> %x, %y 1157 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m 1158 ret <4 x float> %n 1159 } 1160 1161 define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { 1162 ; FMA-LABEL: test_v4f64_fneg_fmul: 1163 ; FMA: # BB#0: 1164 ; FMA-NEXT: vxorpd %ymm2, %ymm2, %ymm2 1165 ; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1166 ; FMA-NEXT: retq 1167 ; 1168 ; FMA4-LABEL: test_v4f64_fneg_fmul: 1169 ; FMA4: # BB#0: 1170 ; FMA4-NEXT: vxorpd %ymm2, %ymm2, %ymm2 1171 ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 1172 ; FMA4-NEXT: retq 1173 ; 1174 ; AVX512-LABEL: test_v4f64_fneg_fmul: 1175 ; AVX512: # BB#0: 1176 ; AVX512-NEXT: vxorps %ymm2, %ymm2, %ymm2 1177 ; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1178 ; AVX512-NEXT: retq 1179 %m = fmul nsz <4 x double> %x, %y 1180 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1181 ret <4 x double> %n 1182 } 1183 1184 define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 { 1185 ; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz: 1186 ; ALL: # BB#0: 1187 ; ALL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1188 ; ALL-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 1189 ; ALL-NEXT: retq 1190 %m = fmul <4 x double> %x, %y 1191 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1192 ret <4 x double> %n 1193 } 1194 1195 attributes #0 = { "unsafe-fp-math"="true" } 1196