Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
      6 
      7 ;
      8 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
      9 ;
     10 
     11 define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
     12 ; FMA-LABEL: test_f32_fmadd:
     13 ; FMA:       # BB#0:
     14 ; FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
     15 ; FMA-NEXT:    retq
     16 ;
     17 ; FMA4-LABEL: test_f32_fmadd:
     18 ; FMA4:       # BB#0:
     19 ; FMA4-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
     20 ; FMA4-NEXT:    retq
     21 ;
     22 ; AVX512-LABEL: test_f32_fmadd:
     23 ; AVX512:       # BB#0:
     24 ; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm0, %xmm1
     25 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
     26 ; AVX512-NEXT:    retq
     27   %x = fmul float %a0, %a1
     28   %res = fadd float %x, %a2
     29   ret float %res
     30 }
     31 
     32 define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
     33 ; FMA-LABEL: test_4f32_fmadd:
     34 ; FMA:       # BB#0:
     35 ; FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
     36 ; FMA-NEXT:    retq
     37 ;
     38 ; FMA4-LABEL: test_4f32_fmadd:
     39 ; FMA4:       # BB#0:
     40 ; FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
     41 ; FMA4-NEXT:    retq
     42 ;
     43 ; AVX512-LABEL: test_4f32_fmadd:
     44 ; AVX512:       # BB#0:
     45 ; AVX512-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
     46 ; AVX512-NEXT:    retq
     47   %x = fmul <4 x float> %a0, %a1
     48   %res = fadd <4 x float> %x, %a2
     49   ret <4 x float> %res
     50 }
     51 
     52 define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
     53 ; FMA-LABEL: test_8f32_fmadd:
     54 ; FMA:       # BB#0:
     55 ; FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
     56 ; FMA-NEXT:    retq
     57 ;
     58 ; FMA4-LABEL: test_8f32_fmadd:
     59 ; FMA4:       # BB#0:
     60 ; FMA4-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
     61 ; FMA4-NEXT:    retq
     62 ;
     63 ; AVX512-LABEL: test_8f32_fmadd:
     64 ; AVX512:       # BB#0:
     65 ; AVX512-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
     66 ; AVX512-NEXT:    retq
     67   %x = fmul <8 x float> %a0, %a1
     68   %res = fadd <8 x float> %x, %a2
     69   ret <8 x float> %res
     70 }
     71 
     72 define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
     73 ; FMA-LABEL: test_f64_fmadd:
     74 ; FMA:       # BB#0:
     75 ; FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
     76 ; FMA-NEXT:    retq
     77 ;
     78 ; FMA4-LABEL: test_f64_fmadd:
     79 ; FMA4:       # BB#0:
     80 ; FMA4-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
     81 ; FMA4-NEXT:    retq
     82 ;
     83 ; AVX512-LABEL: test_f64_fmadd:
     84 ; AVX512:       # BB#0:
     85 ; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm0, %xmm1
     86 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
     87 ; AVX512-NEXT:    retq
     88   %x = fmul double %a0, %a1
     89   %res = fadd double %x, %a2
     90   ret double %res
     91 }
     92 
     93 define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
     94 ; FMA-LABEL: test_2f64_fmadd:
     95 ; FMA:       # BB#0:
     96 ; FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
     97 ; FMA-NEXT:    retq
     98 ;
     99 ; FMA4-LABEL: test_2f64_fmadd:
    100 ; FMA4:       # BB#0:
    101 ; FMA4-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
    102 ; FMA4-NEXT:    retq
    103 ;
    104 ; AVX512-LABEL: test_2f64_fmadd:
    105 ; AVX512:       # BB#0:
    106 ; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
    107 ; AVX512-NEXT:    retq
    108   %x = fmul <2 x double> %a0, %a1
    109   %res = fadd <2 x double> %x, %a2
    110   ret <2 x double> %res
    111 }
    112 
    113 define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    114 ; FMA-LABEL: test_4f64_fmadd:
    115 ; FMA:       # BB#0:
    116 ; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
    117 ; FMA-NEXT:    retq
    118 ;
    119 ; FMA4-LABEL: test_4f64_fmadd:
    120 ; FMA4:       # BB#0:
    121 ; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
    122 ; FMA4-NEXT:    retq
    123 ;
    124 ; AVX512-LABEL: test_4f64_fmadd:
    125 ; AVX512:       # BB#0:
    126 ; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
    127 ; AVX512-NEXT:    retq
    128   %x = fmul <4 x double> %a0, %a1
    129   %res = fadd <4 x double> %x, %a2
    130   ret <4 x double> %res
    131 }
    132 
    133 ;
    134 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
    135 ;
    136 
    137 define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
    138 ; FMA-LABEL: test_f32_fmsub:
    139 ; FMA:       # BB#0:
    140 ; FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
    141 ; FMA-NEXT:    retq
    142 ;
    143 ; FMA4-LABEL: test_f32_fmsub:
    144 ; FMA4:       # BB#0:
    145 ; FMA4-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
    146 ; FMA4-NEXT:    retq
    147 ;
    148 ; AVX512-LABEL: test_f32_fmsub:
    149 ; AVX512:       # BB#0:
    150 ; AVX512-NEXT:    vfmsub213ss %xmm2, %xmm0, %xmm1
    151 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    152 ; AVX512-NEXT:    retq
    153   %x = fmul float %a0, %a1
    154   %res = fsub float %x, %a2
    155   ret float %res
    156 }
    157 
    158 define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    159 ; FMA-LABEL: test_4f32_fmsub:
    160 ; FMA:       # BB#0:
    161 ; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
    162 ; FMA-NEXT:    retq
    163 ;
    164 ; FMA4-LABEL: test_4f32_fmsub:
    165 ; FMA4:       # BB#0:
    166 ; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
    167 ; FMA4-NEXT:    retq
    168 ;
    169 ; AVX512-LABEL: test_4f32_fmsub:
    170 ; AVX512:       # BB#0:
    171 ; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
    172 ; AVX512-NEXT:    retq
    173   %x = fmul <4 x float> %a0, %a1
    174   %res = fsub <4 x float> %x, %a2
    175   ret <4 x float> %res
    176 }
    177 
    178 define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
    179 ; FMA-LABEL: test_8f32_fmsub:
    180 ; FMA:       # BB#0:
    181 ; FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
    182 ; FMA-NEXT:    retq
    183 ;
    184 ; FMA4-LABEL: test_8f32_fmsub:
    185 ; FMA4:       # BB#0:
    186 ; FMA4-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
    187 ; FMA4-NEXT:    retq
    188 ;
    189 ; AVX512-LABEL: test_8f32_fmsub:
    190 ; AVX512:       # BB#0:
    191 ; AVX512-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
    192 ; AVX512-NEXT:    retq
    193   %x = fmul <8 x float> %a0, %a1
    194   %res = fsub <8 x float> %x, %a2
    195   ret <8 x float> %res
    196 }
    197 
    198 define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
    199 ; FMA-LABEL: test_f64_fmsub:
    200 ; FMA:       # BB#0:
    201 ; FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
    202 ; FMA-NEXT:    retq
    203 ;
    204 ; FMA4-LABEL: test_f64_fmsub:
    205 ; FMA4:       # BB#0:
    206 ; FMA4-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
    207 ; FMA4-NEXT:    retq
    208 ;
    209 ; AVX512-LABEL: test_f64_fmsub:
    210 ; AVX512:       # BB#0:
    211 ; AVX512-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
    212 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    213 ; AVX512-NEXT:    retq
    214   %x = fmul double %a0, %a1
    215   %res = fsub double %x, %a2
    216   ret double %res
    217 }
    218 
    219 define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    220 ; FMA-LABEL: test_2f64_fmsub:
    221 ; FMA:       # BB#0:
    222 ; FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
    223 ; FMA-NEXT:    retq
    224 ;
    225 ; FMA4-LABEL: test_2f64_fmsub:
    226 ; FMA4:       # BB#0:
    227 ; FMA4-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
    228 ; FMA4-NEXT:    retq
    229 ;
    230 ; AVX512-LABEL: test_2f64_fmsub:
    231 ; AVX512:       # BB#0:
    232 ; AVX512-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
    233 ; AVX512-NEXT:    retq
    234   %x = fmul <2 x double> %a0, %a1
    235   %res = fsub <2 x double> %x, %a2
    236   ret <2 x double> %res
    237 }
    238 
    239 define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    240 ; FMA-LABEL: test_4f64_fmsub:
    241 ; FMA:       # BB#0:
    242 ; FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
    243 ; FMA-NEXT:    retq
    244 ;
    245 ; FMA4-LABEL: test_4f64_fmsub:
    246 ; FMA4:       # BB#0:
    247 ; FMA4-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
    248 ; FMA4-NEXT:    retq
    249 ;
    250 ; AVX512-LABEL: test_4f64_fmsub:
    251 ; AVX512:       # BB#0:
    252 ; AVX512-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
    253 ; AVX512-NEXT:    retq
    254   %x = fmul <4 x double> %a0, %a1
    255   %res = fsub <4 x double> %x, %a2
    256   ret <4 x double> %res
    257 }
    258 
    259 ;
    260 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
    261 ;
    262 
    263 define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
    264 ; FMA-LABEL: test_f32_fnmadd:
    265 ; FMA:       # BB#0:
    266 ; FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
    267 ; FMA-NEXT:    retq
    268 ;
    269 ; FMA4-LABEL: test_f32_fnmadd:
    270 ; FMA4:       # BB#0:
    271 ; FMA4-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
    272 ; FMA4-NEXT:    retq
    273 ;
    274 ; AVX512-LABEL: test_f32_fnmadd:
    275 ; AVX512:       # BB#0:
    276 ; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm1
    277 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    278 ; AVX512-NEXT:    retq
    279   %x = fmul float %a0, %a1
    280   %res = fsub float %a2, %x
    281   ret float %res
    282 }
    283 
    284 define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    285 ; FMA-LABEL: test_4f32_fnmadd:
    286 ; FMA:       # BB#0:
    287 ; FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
    288 ; FMA-NEXT:    retq
    289 ;
    290 ; FMA4-LABEL: test_4f32_fnmadd:
    291 ; FMA4:       # BB#0:
    292 ; FMA4-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
    293 ; FMA4-NEXT:    retq
    294 ;
    295 ; AVX512-LABEL: test_4f32_fnmadd:
    296 ; AVX512:       # BB#0:
    297 ; AVX512-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
    298 ; AVX512-NEXT:    retq
    299   %x = fmul <4 x float> %a0, %a1
    300   %res = fsub <4 x float> %a2, %x
    301   ret <4 x float> %res
    302 }
    303 
    304 define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
    305 ; FMA-LABEL: test_8f32_fnmadd:
    306 ; FMA:       # BB#0:
    307 ; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
    308 ; FMA-NEXT:    retq
    309 ;
    310 ; FMA4-LABEL: test_8f32_fnmadd:
    311 ; FMA4:       # BB#0:
    312 ; FMA4-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
    313 ; FMA4-NEXT:    retq
    314 ;
    315 ; AVX512-LABEL: test_8f32_fnmadd:
    316 ; AVX512:       # BB#0:
    317 ; AVX512-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
    318 ; AVX512-NEXT:    retq
    319   %x = fmul <8 x float> %a0, %a1
    320   %res = fsub <8 x float> %a2, %x
    321   ret <8 x float> %res
    322 }
    323 
    324 define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
    325 ; FMA-LABEL: test_f64_fnmadd:
    326 ; FMA:       # BB#0:
    327 ; FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
    328 ; FMA-NEXT:    retq
    329 ;
    330 ; FMA4-LABEL: test_f64_fnmadd:
    331 ; FMA4:       # BB#0:
    332 ; FMA4-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
    333 ; FMA4-NEXT:    retq
    334 ;
    335 ; AVX512-LABEL: test_f64_fnmadd:
    336 ; AVX512:       # BB#0:
    337 ; AVX512-NEXT:    vfnmadd213sd %xmm2, %xmm0, %xmm1
    338 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    339 ; AVX512-NEXT:    retq
    340   %x = fmul double %a0, %a1
    341   %res = fsub double %a2, %x
    342   ret double %res
    343 }
    344 
    345 define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    346 ; FMA-LABEL: test_2f64_fnmadd:
    347 ; FMA:       # BB#0:
    348 ; FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
    349 ; FMA-NEXT:    retq
    350 ;
    351 ; FMA4-LABEL: test_2f64_fnmadd:
    352 ; FMA4:       # BB#0:
    353 ; FMA4-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
    354 ; FMA4-NEXT:    retq
    355 ;
    356 ; AVX512-LABEL: test_2f64_fnmadd:
    357 ; AVX512:       # BB#0:
    358 ; AVX512-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
    359 ; AVX512-NEXT:    retq
    360   %x = fmul <2 x double> %a0, %a1
    361   %res = fsub <2 x double> %a2, %x
    362   ret <2 x double> %res
    363 }
    364 
    365 define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    366 ; FMA-LABEL: test_4f64_fnmadd:
    367 ; FMA:       # BB#0:
    368 ; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
    369 ; FMA-NEXT:    retq
    370 ;
    371 ; FMA4-LABEL: test_4f64_fnmadd:
    372 ; FMA4:       # BB#0:
    373 ; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
    374 ; FMA4-NEXT:    retq
    375 ;
    376 ; AVX512-LABEL: test_4f64_fnmadd:
    377 ; AVX512:       # BB#0:
    378 ; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
    379 ; AVX512-NEXT:    retq
    380   %x = fmul <4 x double> %a0, %a1
    381   %res = fsub <4 x double> %a2, %x
    382   ret <4 x double> %res
    383 }
    384 
    385 ;
    386 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
    387 ;
    388 
    389 define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
    390 ; FMA-LABEL: test_f32_fnmsub:
    391 ; FMA:       # BB#0:
    392 ; FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
    393 ; FMA-NEXT:    retq
    394 ;
    395 ; FMA4-LABEL: test_f32_fnmsub:
    396 ; FMA4:       # BB#0:
    397 ; FMA4-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
    398 ; FMA4-NEXT:    retq
    399 ;
    400 ; AVX512-LABEL: test_f32_fnmsub:
    401 ; AVX512:       # BB#0:
    402 ; AVX512-NEXT:    vfnmsub213ss %xmm2, %xmm0, %xmm1
    403 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    404 ; AVX512-NEXT:    retq
    405   %x = fmul float %a0, %a1
    406   %y = fsub float -0.000000e+00, %x
    407   %res = fsub float %y, %a2
    408   ret float %res
    409 }
    410 
    411 define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    412 ; FMA-LABEL: test_4f32_fnmsub:
    413 ; FMA:       # BB#0:
    414 ; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    415 ; FMA-NEXT:    retq
    416 ;
    417 ; FMA4-LABEL: test_4f32_fnmsub:
    418 ; FMA4:       # BB#0:
    419 ; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
    420 ; FMA4-NEXT:    retq
    421 ;
    422 ; AVX512-LABEL: test_4f32_fnmsub:
    423 ; AVX512:       # BB#0:
    424 ; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    425 ; AVX512-NEXT:    retq
    426   %x = fmul <4 x float> %a0, %a1
    427   %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
    428   %res = fsub <4 x float> %y, %a2
    429   ret <4 x float> %res
    430 }
    431 
    432 define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
    433 ; FMA-LABEL: test_8f32_fnmsub:
    434 ; FMA:       # BB#0:
    435 ; FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
    436 ; FMA-NEXT:    retq
    437 ;
    438 ; FMA4-LABEL: test_8f32_fnmsub:
    439 ; FMA4:       # BB#0:
    440 ; FMA4-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
    441 ; FMA4-NEXT:    retq
    442 ;
    443 ; AVX512-LABEL: test_8f32_fnmsub:
    444 ; AVX512:       # BB#0:
    445 ; AVX512-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
    446 ; AVX512-NEXT:    retq
    447   %x = fmul <8 x float> %a0, %a1
    448   %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
    449   %res = fsub <8 x float> %y, %a2
    450   ret <8 x float> %res
    451 }
    452 
    453 define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
    454 ; FMA-LABEL: test_f64_fnmsub:
    455 ; FMA:       # BB#0:
    456 ; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
    457 ; FMA-NEXT:    retq
    458 ;
    459 ; FMA4-LABEL: test_f64_fnmsub:
    460 ; FMA4:       # BB#0:
    461 ; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
    462 ; FMA4-NEXT:    retq
    463 ;
    464 ; AVX512-LABEL: test_f64_fnmsub:
    465 ; AVX512:       # BB#0:
    466 ; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
    467 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    468 ; AVX512-NEXT:    retq
    469   %x = fmul double %a0, %a1
    470   %y = fsub double -0.000000e+00, %x
    471   %res = fsub double %y, %a2
    472   ret double %res
    473 }
    474 
    475 define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    476 ; FMA-LABEL: test_2f64_fnmsub:
    477 ; FMA:       # BB#0:
    478 ; FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
    479 ; FMA-NEXT:    retq
    480 ;
    481 ; FMA4-LABEL: test_2f64_fnmsub:
    482 ; FMA4:       # BB#0:
    483 ; FMA4-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
    484 ; FMA4-NEXT:    retq
    485 ;
    486 ; AVX512-LABEL: test_2f64_fnmsub:
    487 ; AVX512:       # BB#0:
    488 ; AVX512-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
    489 ; AVX512-NEXT:    retq
    490   %x = fmul <2 x double> %a0, %a1
    491   %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x
    492   %res = fsub <2 x double> %y, %a2
    493   ret <2 x double> %res
    494 }
    495 
    496 define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    497 ; FMA-LABEL: test_4f64_fnmsub:
    498 ; FMA:       # BB#0:
    499 ; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
    500 ; FMA-NEXT:    retq
    501 ;
    502 ; FMA4-LABEL: test_4f64_fnmsub:
    503 ; FMA4:       # BB#0:
    504 ; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
    505 ; FMA4-NEXT:    retq
    506 ;
    507 ; AVX512-LABEL: test_4f64_fnmsub:
    508 ; AVX512:       # BB#0:
    509 ; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
    510 ; AVX512-NEXT:    retq
    511   %x = fmul <4 x double> %a0, %a1
    512   %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
    513   %res = fsub <4 x double> %y, %a2
    514   ret <4 x double> %res
    515 }
    516 
    517 ;
    518 ; Load Folding Patterns
    519 ;
    520 
    521 define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
    522 ; FMA-LABEL: test_4f32_fmadd_load:
    523 ; FMA:       # BB#0:
    524 ; FMA-NEXT:    vfmadd132ps (%rdi), %xmm1, %xmm0
    525 ; FMA-NEXT:    retq
    526 ;
    527 ; FMA4-LABEL: test_4f32_fmadd_load:
    528 ; FMA4:       # BB#0:
    529 ; FMA4-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
    530 ; FMA4-NEXT:    retq
    531 ;
    532 ; AVX512-LABEL: test_4f32_fmadd_load:
    533 ; AVX512:       # BB#0:
    534 ; AVX512-NEXT:    vmovaps (%rdi), %xmm2
    535 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm0, %xmm2
    536 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    537 ; AVX512-NEXT:    retq
    538   %x = load <4 x float>, <4 x float>* %a0
    539   %y = fmul <4 x float> %x, %a1
    540   %res = fadd <4 x float> %y, %a2
    541   ret <4 x float> %res
    542 }
    543 
    544 define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) {
    545 ; FMA-LABEL: test_2f64_fmsub_load:
    546 ; FMA:       # BB#0:
    547 ; FMA-NEXT:    vfmsub132pd (%rdi), %xmm1, %xmm0
    548 ; FMA-NEXT:    retq
    549 ;
    550 ; FMA4-LABEL: test_2f64_fmsub_load:
    551 ; FMA4:       # BB#0:
    552 ; FMA4-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
    553 ; FMA4-NEXT:    retq
    554 ;
    555 ; AVX512-LABEL: test_2f64_fmsub_load:
    556 ; AVX512:       # BB#0:
    557 ; AVX512-NEXT:    vmovapd (%rdi), %xmm2
    558 ; AVX512-NEXT:    vfmsub213pd %xmm1, %xmm0, %xmm2
    559 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    560 ; AVX512-NEXT:    retq
    561   %x = load <2 x double>, <2 x double>* %a0
    562   %y = fmul <2 x double> %x, %a1
    563   %res = fsub <2 x double> %y, %a2
    564   ret <2 x double> %res
    565 }
    566 
    567 ;
    568 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
    569 ;
    570 
    571 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
    572 ; FMA-LABEL: test_v4f32_mul_add_x_one_y:
    573 ; FMA:       # BB#0:
    574 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    575 ; FMA-NEXT:    retq
    576 ;
    577 ; FMA4-LABEL: test_v4f32_mul_add_x_one_y:
    578 ; FMA4:       # BB#0:
    579 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    580 ; FMA4-NEXT:    retq
    581 ;
    582 ; AVX512-LABEL: test_v4f32_mul_add_x_one_y:
    583 ; AVX512:       # BB#0:
    584 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    585 ; AVX512-NEXT:    retq
    586   %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    587   %m = fmul <4 x float> %a, %y
    588   ret <4 x float> %m
    589 }
    590 
    591 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
    592 ; FMA-LABEL: test_v4f32_mul_y_add_x_one:
    593 ; FMA:       # BB#0:
    594 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    595 ; FMA-NEXT:    retq
    596 ;
    597 ; FMA4-LABEL: test_v4f32_mul_y_add_x_one:
    598 ; FMA4:       # BB#0:
    599 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    600 ; FMA4-NEXT:    retq
    601 ;
    602 ; AVX512-LABEL: test_v4f32_mul_y_add_x_one:
    603 ; AVX512:       # BB#0:
    604 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    605 ; AVX512-NEXT:    retq
    606   %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    607   %m = fmul <4 x float> %y, %a
    608   ret <4 x float> %m
    609 }
    610 
    611 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
    612 ; FMA-LABEL: test_v4f32_mul_add_x_negone_y:
    613 ; FMA:       # BB#0:
    614 ; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    615 ; FMA-NEXT:    retq
    616 ;
    617 ; FMA4-LABEL: test_v4f32_mul_add_x_negone_y:
    618 ; FMA4:       # BB#0:
    619 ; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    620 ; FMA4-NEXT:    retq
    621 ;
    622 ; AVX512-LABEL: test_v4f32_mul_add_x_negone_y:
    623 ; AVX512:       # BB#0:
    624 ; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    625 ; AVX512-NEXT:    retq
    626   %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    627   %m = fmul <4 x float> %a, %y
    628   ret <4 x float> %m
    629 }
    630 
    631 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
    632 ; FMA-LABEL: test_v4f32_mul_y_add_x_negone:
    633 ; FMA:       # BB#0:
    634 ; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    635 ; FMA-NEXT:    retq
    636 ;
    637 ; FMA4-LABEL: test_v4f32_mul_y_add_x_negone:
    638 ; FMA4:       # BB#0:
    639 ; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    640 ; FMA4-NEXT:    retq
    641 ;
    642 ; AVX512-LABEL: test_v4f32_mul_y_add_x_negone:
    643 ; AVX512:       # BB#0:
    644 ; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    645 ; AVX512-NEXT:    retq
    646   %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    647   %m = fmul <4 x float> %y, %a
    648   ret <4 x float> %m
    649 }
    650 
    651 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
    652 ; FMA-LABEL: test_v4f32_mul_sub_one_x_y:
    653 ; FMA:       # BB#0:
    654 ; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    655 ; FMA-NEXT:    retq
    656 ;
    657 ; FMA4-LABEL: test_v4f32_mul_sub_one_x_y:
    658 ; FMA4:       # BB#0:
    659 ; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
    660 ; FMA4-NEXT:    retq
    661 ;
    662 ; AVX512-LABEL: test_v4f32_mul_sub_one_x_y:
    663 ; AVX512:       # BB#0:
    664 ; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    665 ; AVX512-NEXT:    retq
    666   %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
    667   %m = fmul <4 x float> %s, %y
    668   ret <4 x float> %m
    669 }
    670 
    671 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
    672 ; FMA-LABEL: test_v4f32_mul_y_sub_one_x:
    673 ; FMA:       # BB#0:
    674 ; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    675 ; FMA-NEXT:    retq
    676 ;
    677 ; FMA4-LABEL: test_v4f32_mul_y_sub_one_x:
    678 ; FMA4:       # BB#0:
    679 ; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
    680 ; FMA4-NEXT:    retq
    681 ;
    682 ; AVX512-LABEL: test_v4f32_mul_y_sub_one_x:
    683 ; AVX512:       # BB#0:
    684 ; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    685 ; AVX512-NEXT:    retq
    686   %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
    687   %m = fmul <4 x float> %y, %s
    688   ret <4 x float> %m
    689 }
    690 
    691 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
    692 ; FMA-LABEL: test_v4f32_mul_sub_negone_x_y:
    693 ; FMA:       # BB#0:
    694 ; FMA-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    695 ; FMA-NEXT:    retq
    696 ;
    697 ; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y:
    698 ; FMA4:       # BB#0:
    699 ; FMA4-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
    700 ; FMA4-NEXT:    retq
    701 ;
    702 ; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y:
    703 ; AVX512:       # BB#0:
    704 ; AVX512-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    705 ; AVX512-NEXT:    retq
    706   %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
    707   %m = fmul <4 x float> %s, %y
    708   ret <4 x float> %m
    709 }
    710 
    711 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
    712 ; FMA-LABEL: test_v4f32_mul_y_sub_negone_x:
    713 ; FMA:       # BB#0:
    714 ; FMA-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    715 ; FMA-NEXT:    retq
    716 ;
    717 ; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x:
    718 ; FMA4:       # BB#0:
    719 ; FMA4-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
    720 ; FMA4-NEXT:    retq
    721 ;
    722 ; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x:
    723 ; AVX512:       # BB#0:
    724 ; AVX512-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    725 ; AVX512-NEXT:    retq
    726   %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
    727   %m = fmul <4 x float> %y, %s
    728   ret <4 x float> %m
    729 }
    730 
    731 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
    732 ; FMA-LABEL: test_v4f32_mul_sub_x_one_y:
    733 ; FMA:       # BB#0:
    734 ; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    735 ; FMA-NEXT:    retq
    736 ;
    737 ; FMA4-LABEL: test_v4f32_mul_sub_x_one_y:
    738 ; FMA4:       # BB#0:
    739 ; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    740 ; FMA4-NEXT:    retq
    741 ;
    742 ; AVX512-LABEL: test_v4f32_mul_sub_x_one_y:
    743 ; AVX512:       # BB#0:
    744 ; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    745 ; AVX512-NEXT:    retq
    746   %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    747   %m = fmul <4 x float> %s, %y
    748   ret <4 x float> %m
    749 }
    750 
    751 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
    752 ; FMA-LABEL: test_v4f32_mul_y_sub_x_one:
    753 ; FMA:       # BB#0:
    754 ; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    755 ; FMA-NEXT:    retq
    756 ;
    757 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_one:
    758 ; FMA4:       # BB#0:
    759 ; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    760 ; FMA4-NEXT:    retq
    761 ;
    762 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_one:
    763 ; AVX512:       # BB#0:
    764 ; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    765 ; AVX512-NEXT:    retq
    766   %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    767   %m = fmul <4 x float> %y, %s
    768   ret <4 x float> %m
    769 }
    770 
    771 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
    772 ; FMA-LABEL: test_v4f32_mul_sub_x_negone_y:
    773 ; FMA:       # BB#0:
    774 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    775 ; FMA-NEXT:    retq
    776 ;
    777 ; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y:
    778 ; FMA4:       # BB#0:
    779 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    780 ; FMA4-NEXT:    retq
    781 ;
    782 ; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y:
    783 ; AVX512:       # BB#0:
    784 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    785 ; AVX512-NEXT:    retq
    786   %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    787   %m = fmul <4 x float> %s, %y
    788   ret <4 x float> %m
    789 }
    790 
    791 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
    792 ; FMA-LABEL: test_v4f32_mul_y_sub_x_negone:
    793 ; FMA:       # BB#0:
    794 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    795 ; FMA-NEXT:    retq
    796 ;
    797 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone:
    798 ; FMA4:       # BB#0:
    799 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    800 ; FMA4-NEXT:    retq
    801 ;
    802 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone:
    803 ; AVX512:       # BB#0:
    804 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    805 ; AVX512-NEXT:    retq
    806   %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    807   %m = fmul <4 x float> %y, %s
    808   ret <4 x float> %m
    809 }
    810 
    811 ;
    812 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
    813 ;
    814 
    815 define float @test_f32_interp(float %x, float %y, float %t) {
    816 ; FMA-LABEL: test_f32_interp:
    817 ; FMA:       # BB#0:
    818 ; FMA-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
    819 ; FMA-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
    820 ; FMA-NEXT:    retq
    821 ;
    822 ; FMA4-LABEL: test_f32_interp:
    823 ; FMA4:       # BB#0:
    824 ; FMA4-NEXT:    vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
    825 ; FMA4-NEXT:    vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
    826 ; FMA4-NEXT:    retq
    827 ;
    828 ; AVX512-LABEL: test_f32_interp:
    829 ; AVX512:       # BB#0:
    830 ; AVX512-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
    831 ; AVX512-NEXT:    vfmadd213ss %xmm1, %xmm0, %xmm2
    832 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    833 ; AVX512-NEXT:    retq
    834   %t1 = fsub float 1.0, %t
    835   %tx = fmul float %x, %t
    836   %ty = fmul float %y, %t1
    837   %r = fadd float %tx, %ty
    838   ret float %r
    839 }
    840 
    841 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
    842 ; FMA-LABEL: test_v4f32_interp:
    843 ; FMA:       # BB#0:
    844 ; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm2, %xmm1
    845 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
    846 ; FMA-NEXT:    retq
    847 ;
    848 ; FMA4-LABEL: test_v4f32_interp:
    849 ; FMA4:       # BB#0:
    850 ; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
    851 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
    852 ; FMA4-NEXT:    retq
    853 ;
    854 ; AVX512-LABEL: test_v4f32_interp:
    855 ; AVX512:       # BB#0:
    856 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    857 ; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm3
    858 ; AVX512-NEXT:    vfmadd213ps %xmm3, %xmm2, %xmm0
    859 ; AVX512-NEXT:    retq
    860   %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
    861   %tx = fmul <4 x float> %x, %t
    862   %ty = fmul <4 x float> %y, %t1
    863   %r = fadd <4 x float> %tx, %ty
    864   ret <4 x float> %r
    865 }
    866 
    867 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
    868 ; FMA-LABEL: test_v8f32_interp:
    869 ; FMA:       # BB#0:
    870 ; FMA-NEXT:    vfnmadd213ps %ymm1, %ymm2, %ymm1
    871 ; FMA-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
    872 ; FMA-NEXT:    retq
    873 ;
    874 ; FMA4-LABEL: test_v8f32_interp:
    875 ; FMA4:       # BB#0:
    876 ; FMA4-NEXT:    vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
    877 ; FMA4-NEXT:    vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
    878 ; FMA4-NEXT:    retq
    879 ;
    880 ; AVX512-LABEL: test_v8f32_interp:
    881 ; AVX512:       # BB#0:
    882 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    883 ; AVX512-NEXT:    vfnmadd213ps %ymm1, %ymm1, %ymm3
    884 ; AVX512-NEXT:    vfmadd213ps %ymm3, %ymm2, %ymm0
    885 ; AVX512-NEXT:    retq
    886   %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
    887   %tx = fmul <8 x float> %x, %t
    888   %ty = fmul <8 x float> %y, %t1
    889   %r = fadd <8 x float> %tx, %ty
    890   ret <8 x float> %r
    891 }
    892 
    893 define double @test_f64_interp(double %x, double %y, double %t) {
    894 ; FMA-LABEL: test_f64_interp:
    895 ; FMA:       # BB#0:
    896 ; FMA-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
    897 ; FMA-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
    898 ; FMA-NEXT:    retq
    899 ;
    900 ; FMA4-LABEL: test_f64_interp:
    901 ; FMA4:       # BB#0:
    902 ; FMA4-NEXT:    vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
    903 ; FMA4-NEXT:    vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
    904 ; FMA4-NEXT:    retq
    905 ;
    906 ; AVX512-LABEL: test_f64_interp:
    907 ; AVX512:       # BB#0:
    908 ; AVX512-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
    909 ; AVX512-NEXT:    vfmadd213sd %xmm1, %xmm0, %xmm2
    910 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    911 ; AVX512-NEXT:    retq
    912   %t1 = fsub double 1.0, %t
    913   %tx = fmul double %x, %t
    914   %ty = fmul double %y, %t1
    915   %r = fadd double %tx, %ty
    916   ret double %r
    917 }
    918 
    919 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
    920 ; FMA-LABEL: test_v2f64_interp:
    921 ; FMA:       # BB#0:
    922 ; FMA-NEXT:    vfnmadd213pd %xmm1, %xmm2, %xmm1
    923 ; FMA-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
    924 ; FMA-NEXT:    retq
    925 ;
    926 ; FMA4-LABEL: test_v2f64_interp:
    927 ; FMA4:       # BB#0:
    928 ; FMA4-NEXT:    vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
    929 ; FMA4-NEXT:    vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
    930 ; FMA4-NEXT:    retq
    931 ;
    932 ; AVX512-LABEL: test_v2f64_interp:
    933 ; AVX512:       # BB#0:
    934 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    935 ; AVX512-NEXT:    vfnmadd213pd %xmm1, %xmm1, %xmm3
    936 ; AVX512-NEXT:    vfmadd213pd %xmm3, %xmm2, %xmm0
    937 ; AVX512-NEXT:    retq
    938   %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
    939   %tx = fmul <2 x double> %x, %t
    940   %ty = fmul <2 x double> %y, %t1
    941   %r = fadd <2 x double> %tx, %ty
    942   ret <2 x double> %r
    943 }
    944 
    945 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
    946 ; FMA-LABEL: test_v4f64_interp:
    947 ; FMA:       # BB#0:
    948 ; FMA-NEXT:    vfnmadd213pd %ymm1, %ymm2, %ymm1
    949 ; FMA-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
    950 ; FMA-NEXT:    retq
    951 ;
    952 ; FMA4-LABEL: test_v4f64_interp:
    953 ; FMA4:       # BB#0:
    954 ; FMA4-NEXT:    vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
    955 ; FMA4-NEXT:    vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
    956 ; FMA4-NEXT:    retq
    957 ;
    958 ; AVX512-LABEL: test_v4f64_interp:
    959 ; AVX512:       # BB#0:
    960 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    961 ; AVX512-NEXT:    vfnmadd213pd %ymm1, %ymm1, %ymm3
    962 ; AVX512-NEXT:    vfmadd213pd %ymm3, %ymm2, %ymm0
    963 ; AVX512-NEXT:    retq
    964   %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
    965   %tx = fmul <4 x double> %x, %t
    966   %ty = fmul <4 x double> %y, %t1
    967   %r = fadd <4 x double> %tx, %ty
    968   ret <4 x double> %r
    969 }
    970 
    971 ;
    972 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
    973 ;
    974 
    975 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
    976 ; FMA-LABEL: test_v4f32_fneg_fmadd:
    977 ; FMA:       # BB#0:
    978 ; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    979 ; FMA-NEXT:    retq
    980 ;
    981 ; FMA4-LABEL: test_v4f32_fneg_fmadd:
    982 ; FMA4:       # BB#0:
    983 ; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
    984 ; FMA4-NEXT:    retq
    985 ;
    986 ; AVX512-LABEL: test_v4f32_fneg_fmadd:
    987 ; AVX512:       # BB#0:
    988 ; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    989 ; AVX512-NEXT:    retq
    990   %mul = fmul <4 x float> %a0, %a1
    991   %add = fadd <4 x float> %mul, %a2
    992   %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
    993   ret <4 x float> %neg
    994 }
    995 
    996 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
    997 ; FMA-LABEL: test_v4f64_fneg_fmsub:
    998 ; FMA:       # BB#0:
    999 ; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
   1000 ; FMA-NEXT:    retq
   1001 ;
   1002 ; FMA4-LABEL: test_v4f64_fneg_fmsub:
   1003 ; FMA4:       # BB#0:
   1004 ; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
   1005 ; FMA4-NEXT:    retq
   1006 ;
   1007 ; AVX512-LABEL: test_v4f64_fneg_fmsub:
   1008 ; AVX512:       # BB#0:
   1009 ; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
   1010 ; AVX512-NEXT:    retq
   1011   %mul = fmul <4 x double> %a0, %a1
   1012   %sub = fsub <4 x double> %mul, %a2
   1013   %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   1014   ret <4 x double> %neg
   1015 }
   1016 
   1017 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
   1018 ; FMA-LABEL: test_v4f32_fneg_fnmadd:
   1019 ; FMA:       # BB#0:
   1020 ; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
   1021 ; FMA-NEXT:    retq
   1022 ;
   1023 ; FMA4-LABEL: test_v4f32_fneg_fnmadd:
   1024 ; FMA4:       # BB#0:
   1025 ; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
   1026 ; FMA4-NEXT:    retq
   1027 ;
   1028 ; AVX512-LABEL: test_v4f32_fneg_fnmadd:
   1029 ; AVX512:       # BB#0:
   1030 ; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
   1031 ; AVX512-NEXT:    retq
   1032   %mul = fmul <4 x float> %a0, %a1
   1033   %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
   1034   %add = fadd <4 x float> %neg0, %a2
   1035   %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
   1036   ret <4 x float> %neg1
   1037 }
   1038 
   1039 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
   1040 ; FMA-LABEL: test_v4f64_fneg_fnmsub:
   1041 ; FMA:       # BB#0:
   1042 ; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
   1043 ; FMA-NEXT:    retq
   1044 ;
   1045 ; FMA4-LABEL: test_v4f64_fneg_fnmsub:
   1046 ; FMA4:       # BB#0:
   1047 ; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
   1048 ; FMA4-NEXT:    retq
   1049 ;
   1050 ; AVX512-LABEL: test_v4f64_fneg_fnmsub:
   1051 ; AVX512:       # BB#0:
   1052 ; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
   1053 ; AVX512-NEXT:    retq
   1054   %mul = fmul <4 x double> %a0, %a1
   1055   %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
   1056   %sub = fsub <4 x double> %neg0, %a2
   1057   %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   1058   ret <4 x double> %neg1
   1059 }
   1060 
   1061 ;
   1062 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
   1063 ;
   1064 
   1065 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
   1066 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
   1067 ; FMA:       # BB#0:
   1068 ; FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
   1069 ; FMA-NEXT:    retq
   1070 ;
   1071 ; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
   1072 ; FMA4:       # BB#0:
   1073 ; FMA4-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
   1074 ; FMA4-NEXT:    retq
   1075 ;
   1076 ; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
   1077 ; AVX512:       # BB#0:
   1078 ; AVX512-NEXT:    vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0
   1079 ; AVX512-NEXT:    retq
   1080   %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
   1081   %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
   1082   %a  = fadd <4 x float> %m0, %m1
   1083   ret <4 x float> %a
   1084 }
   1085 
   1086 ;
   1087 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
   1088 ;
   1089 
   1090 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
   1091 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
   1092 ; FMA:       # BB#0:
   1093 ; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
   1094 ; FMA-NEXT:    retq
   1095 ;
   1096 ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
   1097 ; FMA4:       # BB#0:
   1098 ; FMA4-NEXT:    vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
   1099 ; FMA4-NEXT:    retq
   1100 ;
   1101 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
   1102 ; AVX512:       # BB#0:
   1103 ; AVX512-NEXT:    vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1
   1104 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
   1105 ; AVX512-NEXT:    retq
   1106   %m0 = fmul <4 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0>
   1107   %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
   1108   %a  = fadd <4 x float> %m1, %y
   1109   ret <4 x float> %a
   1110 }
   1111 
   1112 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
   1113 
   1114 define double @test_f64_fneg_fmul(double %x, double %y) #0 {
   1115 ; FMA-LABEL: test_f64_fneg_fmul:
   1116 ; FMA:       # BB#0:
   1117 ; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1118 ; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
   1119 ; FMA-NEXT:    retq
   1120 ;
   1121 ; FMA4-LABEL: test_f64_fneg_fmul:
   1122 ; FMA4:       # BB#0:
   1123 ; FMA4-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1124 ; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
   1125 ; FMA4-NEXT:    retq
   1126 ;
   1127 ; AVX512-LABEL: test_f64_fneg_fmul:
   1128 ; AVX512:       # BB#0:
   1129 ; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1130 ; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
   1131 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
   1132 ; AVX512-NEXT:    retq
   1133   %m = fmul nsz double %x, %y
   1134   %n = fsub double -0.0, %m
   1135   ret double %n
   1136 }
   1137 
   1138 define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
   1139 ; FMA-LABEL: test_v4f32_fneg_fmul:
   1140 ; FMA:       # BB#0:
   1141 ; FMA-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1142 ; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
   1143 ; FMA-NEXT:    retq
   1144 ;
   1145 ; FMA4-LABEL: test_v4f32_fneg_fmul:
   1146 ; FMA4:       # BB#0:
   1147 ; FMA4-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1148 ; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
   1149 ; FMA4-NEXT:    retq
   1150 ;
   1151 ; AVX512-LABEL: test_v4f32_fneg_fmul:
   1152 ; AVX512:       # BB#0:
   1153 ; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1154 ; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
   1155 ; AVX512-NEXT:    retq
   1156   %m = fmul nsz <4 x float> %x, %y
   1157   %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
   1158   ret <4 x float> %n
   1159 }
   1160 
   1161 define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
   1162 ; FMA-LABEL: test_v4f64_fneg_fmul:
   1163 ; FMA:       # BB#0:
   1164 ; FMA-NEXT:    vxorpd %ymm2, %ymm2, %ymm2
   1165 ; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
   1166 ; FMA-NEXT:    retq
   1167 ;
   1168 ; FMA4-LABEL: test_v4f64_fneg_fmul:
   1169 ; FMA4:       # BB#0:
   1170 ; FMA4-NEXT:    vxorpd %ymm2, %ymm2, %ymm2
   1171 ; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
   1172 ; FMA4-NEXT:    retq
   1173 ;
   1174 ; AVX512-LABEL: test_v4f64_fneg_fmul:
   1175 ; AVX512:       # BB#0:
   1176 ; AVX512-NEXT:    vxorps %ymm2, %ymm2, %ymm2
   1177 ; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
   1178 ; AVX512-NEXT:    retq
   1179   %m = fmul nsz <4 x double> %x, %y
   1180   %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
   1181   ret <4 x double> %n
   1182 }
   1183 
   1184 define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
   1185 ; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz:
   1186 ; ALL:       # BB#0:
   1187 ; ALL-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
   1188 ; ALL-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
   1189 ; ALL-NEXT:    retq
   1190   %m = fmul <4 x double> %x, %y
   1191   %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
   1192   ret <4 x double> %n
   1193 }
   1194 
   1195 attributes #0 = { "unsafe-fp-math"="true" }
   1196