1 ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py 2 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 7 8 ; 9 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 10 ; 11 12 define float @test_f32_fmadd(float %a0, float %a1, float %a2) { 13 ; FMA-LABEL: test_f32_fmadd: 14 ; FMA: # BB#0: 15 ; FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 16 ; FMA-NEXT: retq 17 ; 18 ; FMA4-LABEL: test_f32_fmadd: 19 ; FMA4: # BB#0: 20 ; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 21 ; FMA4-NEXT: retq 22 ; 23 ; AVX512-LABEL: test_f32_fmadd: 24 ; AVX512: # BB#0: 25 ; AVX512-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 26 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 27 ; AVX512-NEXT: retq 28 %x = fmul float %a0, %a1 29 %res = fadd float %x, %a2 30 ret float %res 31 } 32 33 define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 34 ; FMA-LABEL: test_4f32_fmadd: 35 ; FMA: # BB#0: 36 ; FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 37 ; FMA-NEXT: retq 38 ; 39 ; FMA4-LABEL: test_4f32_fmadd: 40 ; FMA4: # BB#0: 41 ; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 42 ; FMA4-NEXT: retq 43 ; 44 ; AVX512-LABEL: test_4f32_fmadd: 45 ; AVX512: # BB#0: 46 ; AVX512-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 47 ; AVX512-NEXT: retq 48 %x = fmul <4 x float> %a0, %a1 49 %res = fadd <4 x float> %x, %a2 50 ret <4 x float> %res 51 } 52 53 define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 54 ; FMA-LABEL: test_8f32_fmadd: 55 ; FMA: # BB#0: 56 ; FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 57 ; FMA-NEXT: retq 58 ; 59 ; FMA4-LABEL: test_8f32_fmadd: 60 ; FMA4: # BB#0: 61 ; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 62 ; FMA4-NEXT: retq 63 ; 64 ; AVX512-LABEL: test_8f32_fmadd: 65 ; AVX512: # BB#0: 66 ; AVX512-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 67 ; AVX512-NEXT: retq 68 %x = fmul <8 x float> %a0, %a1 69 %res = fadd <8 x float> %x, %a2 70 ret <8 x float> %res 71 } 72 73 define double @test_f64_fmadd(double %a0, double %a1, double %a2) { 74 ; FMA-LABEL: test_f64_fmadd: 75 ; FMA: # BB#0: 76 ; FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 77 ; FMA-NEXT: retq 78 ; 79 ; FMA4-LABEL: test_f64_fmadd: 80 ; FMA4: # BB#0: 81 ; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 82 ; FMA4-NEXT: retq 83 ; 84 ; AVX512-LABEL: test_f64_fmadd: 85 ; AVX512: # BB#0: 86 ; AVX512-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 87 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 88 ; AVX512-NEXT: retq 89 %x = fmul double %a0, %a1 90 %res = fadd double %x, %a2 91 ret double %res 92 } 93 94 define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 95 ; FMA-LABEL: test_2f64_fmadd: 96 ; FMA: # BB#0: 97 ; FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 98 ; FMA-NEXT: retq 99 ; 100 ; FMA4-LABEL: test_2f64_fmadd: 101 ; FMA4: # BB#0: 102 ; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 103 ; FMA4-NEXT: retq 104 ; 105 ; AVX512-LABEL: test_2f64_fmadd: 106 ; AVX512: # BB#0: 107 ; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 108 ; AVX512-NEXT: retq 109 %x = fmul <2 x double> %a0, %a1 110 %res = fadd <2 x double> %x, %a2 111 ret <2 x double> %res 112 } 113 114 define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 115 ; FMA-LABEL: test_4f64_fmadd: 116 ; FMA: # BB#0: 117 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 118 ; FMA-NEXT: retq 119 ; 120 ; FMA4-LABEL: test_4f64_fmadd: 121 ; FMA4: # BB#0: 122 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 123 ; FMA4-NEXT: retq 124 ; 125 ; AVX512-LABEL: test_4f64_fmadd: 126 ; AVX512: # BB#0: 127 ; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 128 ; AVX512-NEXT: retq 129 %x = fmul <4 x double> %a0, %a1 130 %res = fadd <4 x double> %x, %a2 131 ret <4 x double> %res 132 } 133 134 ; 135 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 136 ; 137 138 define float @test_f32_fmsub(float %a0, float %a1, float %a2) { 139 ; FMA-LABEL: test_f32_fmsub: 140 ; FMA: # BB#0: 141 ; FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 142 ; FMA-NEXT: retq 143 ; 144 ; FMA4-LABEL: test_f32_fmsub: 145 ; FMA4: # BB#0: 146 ; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 147 ; FMA4-NEXT: retq 148 ; 149 ; AVX512-LABEL: test_f32_fmsub: 150 ; AVX512: # BB#0: 151 ; AVX512-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 152 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 153 ; AVX512-NEXT: retq 154 %x = fmul float %a0, %a1 155 %res = fsub float %x, %a2 156 ret float %res 157 } 158 159 define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 160 ; FMA-LABEL: test_4f32_fmsub: 161 ; FMA: # BB#0: 162 ; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 163 ; FMA-NEXT: retq 164 ; 165 ; FMA4-LABEL: test_4f32_fmsub: 166 ; FMA4: # BB#0: 167 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 168 ; FMA4-NEXT: retq 169 ; 170 ; AVX512-LABEL: test_4f32_fmsub: 171 ; AVX512: # BB#0: 172 ; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 173 ; AVX512-NEXT: retq 174 %x = fmul <4 x float> %a0, %a1 175 %res = fsub <4 x float> %x, %a2 176 ret <4 x float> %res 177 } 178 179 define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 180 ; FMA-LABEL: test_8f32_fmsub: 181 ; FMA: # BB#0: 182 ; FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 183 ; FMA-NEXT: retq 184 ; 185 ; FMA4-LABEL: test_8f32_fmsub: 186 ; FMA4: # BB#0: 187 ; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 188 ; FMA4-NEXT: retq 189 ; 190 ; AVX512-LABEL: test_8f32_fmsub: 191 ; AVX512: # BB#0: 192 ; AVX512-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 193 ; AVX512-NEXT: retq 194 %x = fmul <8 x float> %a0, %a1 195 %res = fsub <8 x float> %x, %a2 196 ret <8 x float> %res 197 } 198 199 define double @test_f64_fmsub(double %a0, double %a1, double %a2) { 200 ; FMA-LABEL: test_f64_fmsub: 201 ; FMA: # BB#0: 202 ; FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 203 ; FMA-NEXT: retq 204 ; 205 ; FMA4-LABEL: test_f64_fmsub: 206 ; FMA4: # BB#0: 207 ; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 208 ; FMA4-NEXT: retq 209 ; 210 ; AVX512-LABEL: test_f64_fmsub: 211 ; AVX512: # BB#0: 212 ; AVX512-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 213 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 214 ; AVX512-NEXT: retq 215 %x = fmul double %a0, %a1 216 %res = fsub double %x, %a2 217 ret double %res 218 } 219 220 define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 221 ; FMA-LABEL: test_2f64_fmsub: 222 ; FMA: # BB#0: 223 ; FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 224 ; FMA-NEXT: retq 225 ; 226 ; FMA4-LABEL: test_2f64_fmsub: 227 ; FMA4: # BB#0: 228 ; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 229 ; FMA4-NEXT: retq 230 ; 231 ; AVX512-LABEL: test_2f64_fmsub: 232 ; AVX512: # BB#0: 233 ; AVX512-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 234 ; AVX512-NEXT: retq 235 %x = fmul <2 x double> %a0, %a1 236 %res = fsub <2 x double> %x, %a2 237 ret <2 x double> %res 238 } 239 240 define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 241 ; FMA-LABEL: test_4f64_fmsub: 242 ; FMA: # BB#0: 243 ; FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 244 ; FMA-NEXT: retq 245 ; 246 ; FMA4-LABEL: test_4f64_fmsub: 247 ; FMA4: # BB#0: 248 ; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 249 ; FMA4-NEXT: retq 250 ; 251 ; AVX512-LABEL: test_4f64_fmsub: 252 ; AVX512: # BB#0: 253 ; AVX512-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 254 ; AVX512-NEXT: retq 255 %x = fmul <4 x double> %a0, %a1 256 %res = fsub <4 x double> %x, %a2 257 ret <4 x double> %res 258 } 259 260 ; 261 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 262 ; 263 264 define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { 265 ; FMA-LABEL: test_f32_fnmadd: 266 ; FMA: # BB#0: 267 ; FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 268 ; FMA-NEXT: retq 269 ; 270 ; FMA4-LABEL: test_f32_fnmadd: 271 ; FMA4: # BB#0: 272 ; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 273 ; FMA4-NEXT: retq 274 ; 275 ; AVX512-LABEL: test_f32_fnmadd: 276 ; AVX512: # BB#0: 277 ; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 278 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 279 ; AVX512-NEXT: retq 280 %x = fmul float %a0, %a1 281 %res = fsub float %a2, %x 282 ret float %res 283 } 284 285 define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 286 ; FMA-LABEL: test_4f32_fnmadd: 287 ; FMA: # BB#0: 288 ; FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 289 ; FMA-NEXT: retq 290 ; 291 ; FMA4-LABEL: test_4f32_fnmadd: 292 ; FMA4: # BB#0: 293 ; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 294 ; FMA4-NEXT: retq 295 ; 296 ; AVX512-LABEL: test_4f32_fnmadd: 297 ; AVX512: # BB#0: 298 ; AVX512-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 299 ; AVX512-NEXT: retq 300 %x = fmul <4 x float> %a0, %a1 301 %res = fsub <4 x float> %a2, %x 302 ret <4 x float> %res 303 } 304 305 define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 306 ; FMA-LABEL: test_8f32_fnmadd: 307 ; FMA: # BB#0: 308 ; FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 309 ; FMA-NEXT: retq 310 ; 311 ; FMA4-LABEL: test_8f32_fnmadd: 312 ; FMA4: # BB#0: 313 ; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 314 ; FMA4-NEXT: retq 315 ; 316 ; AVX512-LABEL: test_8f32_fnmadd: 317 ; AVX512: # BB#0: 318 ; AVX512-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 319 ; AVX512-NEXT: retq 320 %x = fmul <8 x float> %a0, %a1 321 %res = fsub <8 x float> %a2, %x 322 ret <8 x float> %res 323 } 324 325 define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { 326 ; FMA-LABEL: test_f64_fnmadd: 327 ; FMA: # BB#0: 328 ; FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 329 ; FMA-NEXT: retq 330 ; 331 ; FMA4-LABEL: test_f64_fnmadd: 332 ; FMA4: # BB#0: 333 ; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 334 ; FMA4-NEXT: retq 335 ; 336 ; AVX512-LABEL: test_f64_fnmadd: 337 ; AVX512: # BB#0: 338 ; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 339 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 340 ; AVX512-NEXT: retq 341 %x = fmul double %a0, %a1 342 %res = fsub double %a2, %x 343 ret double %res 344 } 345 346 define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 347 ; FMA-LABEL: test_2f64_fnmadd: 348 ; FMA: # BB#0: 349 ; FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 350 ; FMA-NEXT: retq 351 ; 352 ; FMA4-LABEL: test_2f64_fnmadd: 353 ; FMA4: # BB#0: 354 ; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 355 ; FMA4-NEXT: retq 356 ; 357 ; AVX512-LABEL: test_2f64_fnmadd: 358 ; AVX512: # BB#0: 359 ; AVX512-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 360 ; AVX512-NEXT: retq 361 %x = fmul <2 x double> %a0, %a1 362 %res = fsub <2 x double> %a2, %x 363 ret <2 x double> %res 364 } 365 366 define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 367 ; FMA-LABEL: test_4f64_fnmadd: 368 ; FMA: # BB#0: 369 ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 370 ; FMA-NEXT: retq 371 ; 372 ; FMA4-LABEL: test_4f64_fnmadd: 373 ; FMA4: # BB#0: 374 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 375 ; FMA4-NEXT: retq 376 ; 377 ; AVX512-LABEL: test_4f64_fnmadd: 378 ; AVX512: # BB#0: 379 ; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 380 ; AVX512-NEXT: retq 381 %x = fmul <4 x double> %a0, %a1 382 %res = fsub <4 x double> %a2, %x 383 ret <4 x double> %res 384 } 385 386 ; 387 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 388 ; 389 390 define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { 391 ; FMA-LABEL: test_f32_fnmsub: 392 ; FMA: # BB#0: 393 ; FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 394 ; FMA-NEXT: retq 395 ; 396 ; FMA4-LABEL: test_f32_fnmsub: 397 ; FMA4: # BB#0: 398 ; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 399 ; FMA4-NEXT: retq 400 ; 401 ; AVX512-LABEL: test_f32_fnmsub: 402 ; AVX512: # BB#0: 403 ; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 404 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 405 ; AVX512-NEXT: retq 406 %x = fmul float %a0, %a1 407 %y = fsub float -0.000000e+00, %x 408 %res = fsub float %y, %a2 409 ret float %res 410 } 411 412 define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 413 ; FMA-LABEL: test_4f32_fnmsub: 414 ; FMA: # BB#0: 415 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 416 ; FMA-NEXT: retq 417 ; 418 ; FMA4-LABEL: test_4f32_fnmsub: 419 ; FMA4: # BB#0: 420 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 421 ; FMA4-NEXT: retq 422 ; 423 ; AVX512-LABEL: test_4f32_fnmsub: 424 ; AVX512: # BB#0: 425 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 426 ; AVX512-NEXT: retq 427 %x = fmul <4 x float> %a0, %a1 428 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 429 %res = fsub <4 x float> %y, %a2 430 ret <4 x float> %res 431 } 432 433 define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 434 ; FMA-LABEL: test_8f32_fnmsub: 435 ; FMA: # BB#0: 436 ; FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 437 ; FMA-NEXT: retq 438 ; 439 ; FMA4-LABEL: test_8f32_fnmsub: 440 ; FMA4: # BB#0: 441 ; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 442 ; FMA4-NEXT: retq 443 ; 444 ; AVX512-LABEL: test_8f32_fnmsub: 445 ; AVX512: # BB#0: 446 ; AVX512-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 447 ; AVX512-NEXT: retq 448 %x = fmul <8 x float> %a0, %a1 449 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 450 %res = fsub <8 x float> %y, %a2 451 ret <8 x float> %res 452 } 453 454 define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { 455 ; FMA-LABEL: test_f64_fnmsub: 456 ; FMA: # BB#0: 457 ; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 458 ; FMA-NEXT: retq 459 ; 460 ; FMA4-LABEL: test_f64_fnmsub: 461 ; FMA4: # BB#0: 462 ; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 463 ; FMA4-NEXT: retq 464 ; 465 ; AVX512-LABEL: test_f64_fnmsub: 466 ; AVX512: # BB#0: 467 ; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 468 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 469 ; AVX512-NEXT: retq 470 %x = fmul double %a0, %a1 471 %y = fsub double -0.000000e+00, %x 472 %res = fsub double %y, %a2 473 ret double %res 474 } 475 476 define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 477 ; FMA-LABEL: test_2f64_fnmsub: 478 ; FMA: # BB#0: 479 ; FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 480 ; FMA-NEXT: retq 481 ; 482 ; FMA4-LABEL: test_2f64_fnmsub: 483 ; FMA4: # BB#0: 484 ; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 485 ; FMA4-NEXT: retq 486 ; 487 ; AVX512-LABEL: test_2f64_fnmsub: 488 ; AVX512: # BB#0: 489 ; AVX512-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 490 ; AVX512-NEXT: retq 491 %x = fmul <2 x double> %a0, %a1 492 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x 493 %res = fsub <2 x double> %y, %a2 494 ret <2 x double> %res 495 } 496 497 define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 498 ; FMA-LABEL: test_4f64_fnmsub: 499 ; FMA: # BB#0: 500 ; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 501 ; FMA-NEXT: retq 502 ; 503 ; FMA4-LABEL: test_4f64_fnmsub: 504 ; FMA4: # BB#0: 505 ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 506 ; FMA4-NEXT: retq 507 ; 508 ; AVX512-LABEL: test_4f64_fnmsub: 509 ; AVX512: # BB#0: 510 ; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 511 ; AVX512-NEXT: retq 512 %x = fmul <4 x double> %a0, %a1 513 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 514 %res = fsub <4 x double> %y, %a2 515 ret <4 x double> %res 516 } 517 518 ; 519 ; Load Folding Patterns 520 ; 521 522 define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { 523 ; FMA-LABEL: test_4f32_fmadd_load: 524 ; FMA: # BB#0: 525 ; FMA-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0 526 ; FMA-NEXT: retq 527 ; 528 ; FMA4-LABEL: test_4f32_fmadd_load: 529 ; FMA4: # BB#0: 530 ; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 531 ; FMA4-NEXT: retq 532 ; 533 ; AVX512-LABEL: test_4f32_fmadd_load: 534 ; AVX512: # BB#0: 535 ; AVX512-NEXT: vmovaps (%rdi), %xmm2 536 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm0, %xmm2 537 ; AVX512-NEXT: vmovaps %zmm2, %zmm0 538 ; AVX512-NEXT: retq 539 %x = load <4 x float>, <4 x float>* %a0 540 %y = fmul <4 x float> %x, %a1 541 %res = fadd <4 x float> %y, %a2 542 ret <4 x float> %res 543 } 544 545 define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) { 546 ; FMA-LABEL: test_2f64_fmsub_load: 547 ; FMA: # BB#0: 548 ; FMA-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0 549 ; FMA-NEXT: retq 550 ; 551 ; FMA4-LABEL: test_2f64_fmsub_load: 552 ; FMA4: # BB#0: 553 ; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 554 ; FMA4-NEXT: retq 555 ; 556 ; AVX512-LABEL: test_2f64_fmsub_load: 557 ; AVX512: # BB#0: 558 ; AVX512-NEXT: vmovapd (%rdi), %xmm2 559 ; AVX512-NEXT: vfmsub213pd %xmm1, %xmm0, %xmm2 560 ; AVX512-NEXT: vmovaps %zmm2, %zmm0 561 ; AVX512-NEXT: retq 562 %x = load <2 x double>, <2 x double>* %a0 563 %y = fmul <2 x double> %x, %a1 564 %res = fsub <2 x double> %y, %a2 565 ret <2 x double> %res 566 } 567 568 ; 569 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 570 ; 571 572 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { 573 ; FMA-LABEL: test_v4f32_mul_add_x_one_y: 574 ; FMA: # BB#0: 575 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 576 ; FMA-NEXT: retq 577 ; 578 ; FMA4-LABEL: test_v4f32_mul_add_x_one_y: 579 ; FMA4: # BB#0: 580 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 581 ; FMA4-NEXT: retq 582 ; 583 ; AVX512-LABEL: test_v4f32_mul_add_x_one_y: 584 ; AVX512: # BB#0: 585 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 586 ; AVX512-NEXT: retq 587 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 588 %m = fmul <4 x float> %a, %y 589 ret <4 x float> %m 590 } 591 592 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { 593 ; FMA-LABEL: test_v4f32_mul_y_add_x_one: 594 ; FMA: # BB#0: 595 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 596 ; FMA-NEXT: retq 597 ; 598 ; FMA4-LABEL: test_v4f32_mul_y_add_x_one: 599 ; FMA4: # BB#0: 600 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 601 ; FMA4-NEXT: retq 602 ; 603 ; AVX512-LABEL: test_v4f32_mul_y_add_x_one: 604 ; AVX512: # BB#0: 605 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 606 ; AVX512-NEXT: retq 607 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 608 %m = fmul <4 x float> %y, %a 609 ret <4 x float> %m 610 } 611 612 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { 613 ; FMA-LABEL: test_v4f32_mul_add_x_negone_y: 614 ; FMA: # BB#0: 615 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 616 ; FMA-NEXT: retq 617 ; 618 ; FMA4-LABEL: test_v4f32_mul_add_x_negone_y: 619 ; FMA4: # BB#0: 620 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 621 ; FMA4-NEXT: retq 622 ; 623 ; AVX512-LABEL: test_v4f32_mul_add_x_negone_y: 624 ; AVX512: # BB#0: 625 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 626 ; AVX512-NEXT: retq 627 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 628 %m = fmul <4 x float> %a, %y 629 ret <4 x float> %m 630 } 631 632 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { 633 ; FMA-LABEL: test_v4f32_mul_y_add_x_negone: 634 ; FMA: # BB#0: 635 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 636 ; FMA-NEXT: retq 637 ; 638 ; FMA4-LABEL: test_v4f32_mul_y_add_x_negone: 639 ; FMA4: # BB#0: 640 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 641 ; FMA4-NEXT: retq 642 ; 643 ; AVX512-LABEL: test_v4f32_mul_y_add_x_negone: 644 ; AVX512: # BB#0: 645 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 646 ; AVX512-NEXT: retq 647 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 648 %m = fmul <4 x float> %y, %a 649 ret <4 x float> %m 650 } 651 652 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { 653 ; FMA-LABEL: test_v4f32_mul_sub_one_x_y: 654 ; FMA: # BB#0: 655 ; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 656 ; FMA-NEXT: retq 657 ; 658 ; FMA4-LABEL: test_v4f32_mul_sub_one_x_y: 659 ; FMA4: # BB#0: 660 ; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 661 ; FMA4-NEXT: retq 662 ; 663 ; AVX512-LABEL: test_v4f32_mul_sub_one_x_y: 664 ; AVX512: # BB#0: 665 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 666 ; AVX512-NEXT: retq 667 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 668 %m = fmul <4 x float> %s, %y 669 ret <4 x float> %m 670 } 671 672 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { 673 ; FMA-LABEL: test_v4f32_mul_y_sub_one_x: 674 ; FMA: # BB#0: 675 ; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 676 ; FMA-NEXT: retq 677 ; 678 ; FMA4-LABEL: test_v4f32_mul_y_sub_one_x: 679 ; FMA4: # BB#0: 680 ; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0 681 ; FMA4-NEXT: retq 682 ; 683 ; AVX512-LABEL: test_v4f32_mul_y_sub_one_x: 684 ; AVX512: # BB#0: 685 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0 686 ; AVX512-NEXT: retq 687 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 688 %m = fmul <4 x float> %y, %s 689 ret <4 x float> %m 690 } 691 692 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { 693 ; FMA-LABEL: test_v4f32_mul_sub_negone_x_y: 694 ; FMA: # BB#0: 695 ; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 696 ; FMA-NEXT: retq 697 ; 698 ; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y: 699 ; FMA4: # BB#0: 700 ; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 701 ; FMA4-NEXT: retq 702 ; 703 ; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y: 704 ; AVX512: # BB#0: 705 ; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 706 ; AVX512-NEXT: retq 707 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 708 %m = fmul <4 x float> %s, %y 709 ret <4 x float> %m 710 } 711 712 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { 713 ; FMA-LABEL: test_v4f32_mul_y_sub_negone_x: 714 ; FMA: # BB#0: 715 ; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 716 ; FMA-NEXT: retq 717 ; 718 ; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x: 719 ; FMA4: # BB#0: 720 ; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0 721 ; FMA4-NEXT: retq 722 ; 723 ; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x: 724 ; AVX512: # BB#0: 725 ; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0 726 ; AVX512-NEXT: retq 727 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 728 %m = fmul <4 x float> %y, %s 729 ret <4 x float> %m 730 } 731 732 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { 733 ; FMA-LABEL: test_v4f32_mul_sub_x_one_y: 734 ; FMA: # BB#0: 735 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 736 ; FMA-NEXT: retq 737 ; 738 ; FMA4-LABEL: test_v4f32_mul_sub_x_one_y: 739 ; FMA4: # BB#0: 740 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 741 ; FMA4-NEXT: retq 742 ; 743 ; AVX512-LABEL: test_v4f32_mul_sub_x_one_y: 744 ; AVX512: # BB#0: 745 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 746 ; AVX512-NEXT: retq 747 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 748 %m = fmul <4 x float> %s, %y 749 ret <4 x float> %m 750 } 751 752 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { 753 ; FMA-LABEL: test_v4f32_mul_y_sub_x_one: 754 ; FMA: # BB#0: 755 ; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 756 ; FMA-NEXT: retq 757 ; 758 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_one: 759 ; FMA4: # BB#0: 760 ; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0 761 ; FMA4-NEXT: retq 762 ; 763 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_one: 764 ; AVX512: # BB#0: 765 ; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0 766 ; AVX512-NEXT: retq 767 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 768 %m = fmul <4 x float> %y, %s 769 ret <4 x float> %m 770 } 771 772 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { 773 ; FMA-LABEL: test_v4f32_mul_sub_x_negone_y: 774 ; FMA: # BB#0: 775 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 776 ; FMA-NEXT: retq 777 ; 778 ; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y: 779 ; FMA4: # BB#0: 780 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 781 ; FMA4-NEXT: retq 782 ; 783 ; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y: 784 ; AVX512: # BB#0: 785 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 786 ; AVX512-NEXT: retq 787 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 788 %m = fmul <4 x float> %s, %y 789 ret <4 x float> %m 790 } 791 792 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { 793 ; FMA-LABEL: test_v4f32_mul_y_sub_x_negone: 794 ; FMA: # BB#0: 795 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 796 ; FMA-NEXT: retq 797 ; 798 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone: 799 ; FMA4: # BB#0: 800 ; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0 801 ; FMA4-NEXT: retq 802 ; 803 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone: 804 ; AVX512: # BB#0: 805 ; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0 806 ; AVX512-NEXT: retq 807 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 808 %m = fmul <4 x float> %y, %s 809 ret <4 x float> %m 810 } 811 812 ; 813 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 814 ; 815 816 define float @test_f32_interp(float %x, float %y, float %t) { 817 ; FMA-LABEL: test_f32_interp: 818 ; FMA: # BB#0: 819 ; FMA-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 820 ; FMA-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0 821 ; FMA-NEXT: retq 822 ; 823 ; FMA4-LABEL: test_f32_interp: 824 ; FMA4: # BB#0: 825 ; FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1 826 ; FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0 827 ; FMA4-NEXT: retq 828 ; 829 ; AVX512-LABEL: test_f32_interp: 830 ; AVX512: # BB#0: 831 ; AVX512-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1 832 ; AVX512-NEXT: vfmadd213ss %xmm1, %xmm0, %xmm2 833 ; AVX512-NEXT: vmovaps %zmm2, %zmm0 834 ; AVX512-NEXT: retq 835 %t1 = fsub float 1.0, %t 836 %tx = fmul float %x, %t 837 %ty = fmul float %y, %t1 838 %r = fadd float %tx, %ty 839 ret float %r 840 } 841 842 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { 843 ; FMA-LABEL: test_v4f32_interp: 844 ; FMA: # BB#0: 845 ; FMA-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1 846 ; FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0 847 ; FMA-NEXT: retq 848 ; 849 ; FMA4-LABEL: test_v4f32_interp: 850 ; FMA4: # BB#0: 851 ; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1 852 ; FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0 853 ; FMA4-NEXT: retq 854 ; 855 ; AVX512-LABEL: test_v4f32_interp: 856 ; AVX512: # BB#0: 857 ; AVX512-NEXT: vmovaps %zmm2, %zmm3 858 ; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm3 859 ; AVX512-NEXT: vfmadd213ps %xmm3, %xmm2, %xmm0 860 ; AVX512-NEXT: retq 861 %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t 862 %tx = fmul <4 x float> %x, %t 863 %ty = fmul <4 x float> %y, %t1 864 %r = fadd <4 x float> %tx, %ty 865 ret <4 x float> %r 866 } 867 868 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { 869 ; FMA-LABEL: test_v8f32_interp: 870 ; FMA: # BB#0: 871 ; FMA-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1 872 ; FMA-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0 873 ; FMA-NEXT: retq 874 ; 875 ; FMA4-LABEL: test_v8f32_interp: 876 ; FMA4: # BB#0: 877 ; FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1 878 ; FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0 879 ; FMA4-NEXT: retq 880 ; 881 ; AVX512-LABEL: test_v8f32_interp: 882 ; AVX512: # BB#0: 883 ; AVX512-NEXT: vmovaps %zmm2, %zmm3 884 ; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm1, %ymm3 885 ; AVX512-NEXT: vfmadd213ps %ymm3, %ymm2, %ymm0 886 ; AVX512-NEXT: retq 887 %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 888 %tx = fmul <8 x float> %x, %t 889 %ty = fmul <8 x float> %y, %t1 890 %r = fadd <8 x float> %tx, %ty 891 ret <8 x float> %r 892 } 893 894 define double @test_f64_interp(double %x, double %y, double %t) { 895 ; FMA-LABEL: test_f64_interp: 896 ; FMA: # BB#0: 897 ; FMA-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 898 ; FMA-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0 899 ; FMA-NEXT: retq 900 ; 901 ; FMA4-LABEL: test_f64_interp: 902 ; FMA4: # BB#0: 903 ; FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1 904 ; FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0 905 ; FMA4-NEXT: retq 906 ; 907 ; AVX512-LABEL: test_f64_interp: 908 ; AVX512: # BB#0: 909 ; AVX512-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1 910 ; AVX512-NEXT: vfmadd213sd %xmm1, %xmm0, %xmm2 911 ; AVX512-NEXT: vmovaps %zmm2, %zmm0 912 ; AVX512-NEXT: retq 913 %t1 = fsub double 1.0, %t 914 %tx = fmul double %x, %t 915 %ty = fmul double %y, %t1 916 %r = fadd double %tx, %ty 917 ret double %r 918 } 919 920 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { 921 ; FMA-LABEL: test_v2f64_interp: 922 ; FMA: # BB#0: 923 ; FMA-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1 924 ; FMA-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0 925 ; FMA-NEXT: retq 926 ; 927 ; FMA4-LABEL: test_v2f64_interp: 928 ; FMA4: # BB#0: 929 ; FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1 930 ; FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0 931 ; FMA4-NEXT: retq 932 ; 933 ; AVX512-LABEL: test_v2f64_interp: 934 ; AVX512: # BB#0: 935 ; AVX512-NEXT: vmovaps %zmm2, %zmm3 936 ; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm1, %xmm3 937 ; AVX512-NEXT: vfmadd213pd %xmm3, %xmm2, %xmm0 938 ; AVX512-NEXT: retq 939 %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t 940 %tx = fmul <2 x double> %x, %t 941 %ty = fmul <2 x double> %y, %t1 942 %r = fadd <2 x double> %tx, %ty 943 ret <2 x double> %r 944 } 945 946 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { 947 ; FMA-LABEL: test_v4f64_interp: 948 ; FMA: # BB#0: 949 ; FMA-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1 950 ; FMA-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0 951 ; FMA-NEXT: retq 952 ; 953 ; FMA4-LABEL: test_v4f64_interp: 954 ; FMA4: # BB#0: 955 ; FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1 956 ; FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0 957 ; FMA4-NEXT: retq 958 ; 959 ; AVX512-LABEL: test_v4f64_interp: 960 ; AVX512: # BB#0: 961 ; AVX512-NEXT: vmovaps %zmm2, %zmm3 962 ; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm1, %ymm3 963 ; AVX512-NEXT: vfmadd213pd %ymm3, %ymm2, %ymm0 964 ; AVX512-NEXT: retq 965 %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t 966 %tx = fmul <4 x double> %x, %t 967 %ty = fmul <4 x double> %y, %t1 968 %r = fadd <4 x double> %tx, %ty 969 ret <4 x double> %r 970 } 971 972 ; 973 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 974 ; 975 976 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 977 ; FMA-LABEL: test_v4f32_fneg_fmadd: 978 ; FMA: # BB#0: 979 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 980 ; FMA-NEXT: retq 981 ; 982 ; FMA4-LABEL: test_v4f32_fneg_fmadd: 983 ; FMA4: # BB#0: 984 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 985 ; FMA4-NEXT: retq 986 ; 987 ; AVX512-LABEL: test_v4f32_fneg_fmadd: 988 ; AVX512: # BB#0: 989 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 990 ; AVX512-NEXT: retq 991 %mul = fmul <4 x float> %a0, %a1 992 %add = fadd <4 x float> %mul, %a2 993 %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 994 ret <4 x float> %neg 995 } 996 997 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 998 ; FMA-LABEL: test_v4f64_fneg_fmsub: 999 ; FMA: # BB#0: 1000 ; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1001 ; FMA-NEXT: retq 1002 ; 1003 ; FMA4-LABEL: test_v4f64_fneg_fmsub: 1004 ; FMA4: # BB#0: 1005 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1006 ; FMA4-NEXT: retq 1007 ; 1008 ; AVX512-LABEL: test_v4f64_fneg_fmsub: 1009 ; AVX512: # BB#0: 1010 ; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 1011 ; AVX512-NEXT: retq 1012 %mul = fmul <4 x double> %a0, %a1 1013 %sub = fsub <4 x double> %mul, %a2 1014 %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1015 ret <4 x double> %neg 1016 } 1017 1018 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1019 ; FMA-LABEL: test_v4f32_fneg_fnmadd: 1020 ; FMA: # BB#0: 1021 ; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1022 ; FMA-NEXT: retq 1023 ; 1024 ; FMA4-LABEL: test_v4f32_fneg_fnmadd: 1025 ; FMA4: # BB#0: 1026 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 1027 ; FMA4-NEXT: retq 1028 ; 1029 ; AVX512-LABEL: test_v4f32_fneg_fnmadd: 1030 ; AVX512: # BB#0: 1031 ; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 1032 ; AVX512-NEXT: retq 1033 %mul = fmul <4 x float> %a0, %a1 1034 %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1035 %add = fadd <4 x float> %neg0, %a2 1036 %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1037 ret <4 x float> %neg1 1038 } 1039 1040 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1041 ; FMA-LABEL: test_v4f64_fneg_fnmsub: 1042 ; FMA: # BB#0: 1043 ; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1044 ; FMA-NEXT: retq 1045 ; 1046 ; FMA4-LABEL: test_v4f64_fneg_fnmsub: 1047 ; FMA4: # BB#0: 1048 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 1049 ; FMA4-NEXT: retq 1050 ; 1051 ; AVX512-LABEL: test_v4f64_fneg_fnmsub: 1052 ; AVX512: # BB#0: 1053 ; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 1054 ; AVX512-NEXT: retq 1055 %mul = fmul <4 x double> %a0, %a1 1056 %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1057 %sub = fsub <4 x double> %neg0, %a2 1058 %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1059 ret <4 x double> %neg1 1060 } 1061 1062 ; 1063 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1064 ; 1065 1066 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { 1067 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1068 ; FMA: # BB#0: 1069 ; FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1070 ; FMA-NEXT: retq 1071 ; 1072 ; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1073 ; FMA4: # BB#0: 1074 ; FMA4-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 1075 ; FMA4-NEXT: retq 1076 ; 1077 ; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1078 ; AVX512: # BB#0: 1079 ; AVX512-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0 1080 ; AVX512-NEXT: retq 1081 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1082 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0> 1083 %a = fadd <4 x float> %m0, %m1 1084 ret <4 x float> %a 1085 } 1086 1087 ; 1088 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1089 ; 1090 1091 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 { 1092 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1093 ; FMA: # BB#0: 1094 ; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0 1095 ; FMA-NEXT: retq 1096 ; 1097 ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1098 ; FMA4: # BB#0: 1099 ; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0 1100 ; FMA4-NEXT: retq 1101 ; 1102 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1103 ; AVX512: # BB#0: 1104 ; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1 1105 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 1106 ; AVX512-NEXT: retq 1107 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1108 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0> 1109 %a = fadd <4 x float> %m1, %y 1110 ret <4 x float> %a 1111 } 1112 1113 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1114 1115 define double @test_f64_fneg_fmul(double %x, double %y) #0 { 1116 ; FMA-LABEL: test_f64_fneg_fmul: 1117 ; FMA: # BB#0: 1118 ; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1119 ; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 1120 ; FMA-NEXT: retq 1121 ; 1122 ; FMA4-LABEL: test_f64_fneg_fmul: 1123 ; FMA4: # BB#0: 1124 ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1125 ; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 1126 ; FMA4-NEXT: retq 1127 ; 1128 ; AVX512-LABEL: test_f64_fneg_fmul: 1129 ; AVX512: # BB#0: 1130 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1131 ; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 1132 ; AVX512-NEXT: vmovaps %zmm1, %zmm0 1133 ; AVX512-NEXT: retq 1134 %m = fmul nsz double %x, %y 1135 %n = fsub double -0.0, %m 1136 ret double %n 1137 } 1138 1139 define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { 1140 ; FMA-LABEL: test_v4f32_fneg_fmul: 1141 ; FMA: # BB#0: 1142 ; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2 1143 ; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1144 ; FMA-NEXT: retq 1145 ; 1146 ; FMA4-LABEL: test_v4f32_fneg_fmul: 1147 ; FMA4: # BB#0: 1148 ; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 1149 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 1150 ; FMA4-NEXT: retq 1151 ; 1152 ; AVX512-LABEL: test_v4f32_fneg_fmul: 1153 ; AVX512: # BB#0: 1154 ; AVX512-NEXT: vpxord %xmm2, %xmm2, %xmm2 1155 ; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 1156 ; AVX512-NEXT: retq 1157 %m = fmul nsz <4 x float> %x, %y 1158 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m 1159 ret <4 x float> %n 1160 } 1161 1162 define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { 1163 ; FMA-LABEL: test_v4f64_fneg_fmul: 1164 ; FMA: # BB#0: 1165 ; FMA-NEXT: vxorpd %ymm2, %ymm2, %ymm2 1166 ; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1167 ; FMA-NEXT: retq 1168 ; 1169 ; FMA4-LABEL: test_v4f64_fneg_fmul: 1170 ; FMA4: # BB#0: 1171 ; FMA4-NEXT: vxorpd %ymm2, %ymm2, %ymm2 1172 ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 1173 ; FMA4-NEXT: retq 1174 ; 1175 ; AVX512-LABEL: test_v4f64_fneg_fmul: 1176 ; AVX512: # BB#0: 1177 ; AVX512-NEXT: vpxord %ymm2, %ymm2, %ymm2 1178 ; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 1179 ; AVX512-NEXT: retq 1180 %m = fmul nsz <4 x double> %x, %y 1181 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1182 ret <4 x double> %n 1183 } 1184 1185 define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 { 1186 ; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz: 1187 ; ALL: # BB#0: 1188 ; ALL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1189 ; ALL-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0 1190 ; ALL-NEXT: retq 1191 %m = fmul <4 x double> %x, %y 1192 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1193 ret <4 x double> %n 1194 } 1195 1196 attributes #0 = { "unsafe-fp-math"="true" } 1197