Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
      2 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
      7 
      8 ;
      9 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
     10 ;
     11 
     12 define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
     13 ; FMA-LABEL: test_f32_fmadd:
     14 ; FMA:       # BB#0:
     15 ; FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
     16 ; FMA-NEXT:    retq
     17 ;
     18 ; FMA4-LABEL: test_f32_fmadd:
     19 ; FMA4:       # BB#0:
     20 ; FMA4-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
     21 ; FMA4-NEXT:    retq
     22 ;
     23 ; AVX512-LABEL: test_f32_fmadd:
     24 ; AVX512:       # BB#0:
     25 ; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm0, %xmm1
     26 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
     27 ; AVX512-NEXT:    retq
     28   %x = fmul float %a0, %a1
     29   %res = fadd float %x, %a2
     30   ret float %res
     31 }
     32 
     33 define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
     34 ; FMA-LABEL: test_4f32_fmadd:
     35 ; FMA:       # BB#0:
     36 ; FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
     37 ; FMA-NEXT:    retq
     38 ;
     39 ; FMA4-LABEL: test_4f32_fmadd:
     40 ; FMA4:       # BB#0:
     41 ; FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
     42 ; FMA4-NEXT:    retq
     43 ;
     44 ; AVX512-LABEL: test_4f32_fmadd:
     45 ; AVX512:       # BB#0:
     46 ; AVX512-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
     47 ; AVX512-NEXT:    retq
     48   %x = fmul <4 x float> %a0, %a1
     49   %res = fadd <4 x float> %x, %a2
     50   ret <4 x float> %res
     51 }
     52 
     53 define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
     54 ; FMA-LABEL: test_8f32_fmadd:
     55 ; FMA:       # BB#0:
     56 ; FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
     57 ; FMA-NEXT:    retq
     58 ;
     59 ; FMA4-LABEL: test_8f32_fmadd:
     60 ; FMA4:       # BB#0:
     61 ; FMA4-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
     62 ; FMA4-NEXT:    retq
     63 ;
     64 ; AVX512-LABEL: test_8f32_fmadd:
     65 ; AVX512:       # BB#0:
     66 ; AVX512-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
     67 ; AVX512-NEXT:    retq
     68   %x = fmul <8 x float> %a0, %a1
     69   %res = fadd <8 x float> %x, %a2
     70   ret <8 x float> %res
     71 }
     72 
     73 define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
     74 ; FMA-LABEL: test_f64_fmadd:
     75 ; FMA:       # BB#0:
     76 ; FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
     77 ; FMA-NEXT:    retq
     78 ;
     79 ; FMA4-LABEL: test_f64_fmadd:
     80 ; FMA4:       # BB#0:
     81 ; FMA4-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
     82 ; FMA4-NEXT:    retq
     83 ;
     84 ; AVX512-LABEL: test_f64_fmadd:
     85 ; AVX512:       # BB#0:
     86 ; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm0, %xmm1
     87 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
     88 ; AVX512-NEXT:    retq
     89   %x = fmul double %a0, %a1
     90   %res = fadd double %x, %a2
     91   ret double %res
     92 }
     93 
     94 define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
     95 ; FMA-LABEL: test_2f64_fmadd:
     96 ; FMA:       # BB#0:
     97 ; FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
     98 ; FMA-NEXT:    retq
     99 ;
    100 ; FMA4-LABEL: test_2f64_fmadd:
    101 ; FMA4:       # BB#0:
    102 ; FMA4-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
    103 ; FMA4-NEXT:    retq
    104 ;
    105 ; AVX512-LABEL: test_2f64_fmadd:
    106 ; AVX512:       # BB#0:
    107 ; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
    108 ; AVX512-NEXT:    retq
    109   %x = fmul <2 x double> %a0, %a1
    110   %res = fadd <2 x double> %x, %a2
    111   ret <2 x double> %res
    112 }
    113 
    114 define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    115 ; FMA-LABEL: test_4f64_fmadd:
    116 ; FMA:       # BB#0:
    117 ; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
    118 ; FMA-NEXT:    retq
    119 ;
    120 ; FMA4-LABEL: test_4f64_fmadd:
    121 ; FMA4:       # BB#0:
    122 ; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
    123 ; FMA4-NEXT:    retq
    124 ;
    125 ; AVX512-LABEL: test_4f64_fmadd:
    126 ; AVX512:       # BB#0:
    127 ; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
    128 ; AVX512-NEXT:    retq
    129   %x = fmul <4 x double> %a0, %a1
    130   %res = fadd <4 x double> %x, %a2
    131   ret <4 x double> %res
    132 }
    133 
    134 ;
    135 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
    136 ;
    137 
    138 define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
    139 ; FMA-LABEL: test_f32_fmsub:
    140 ; FMA:       # BB#0:
    141 ; FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
    142 ; FMA-NEXT:    retq
    143 ;
    144 ; FMA4-LABEL: test_f32_fmsub:
    145 ; FMA4:       # BB#0:
    146 ; FMA4-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
    147 ; FMA4-NEXT:    retq
    148 ;
    149 ; AVX512-LABEL: test_f32_fmsub:
    150 ; AVX512:       # BB#0:
    151 ; AVX512-NEXT:    vfmsub213ss %xmm2, %xmm0, %xmm1
    152 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    153 ; AVX512-NEXT:    retq
    154   %x = fmul float %a0, %a1
    155   %res = fsub float %x, %a2
    156   ret float %res
    157 }
    158 
    159 define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    160 ; FMA-LABEL: test_4f32_fmsub:
    161 ; FMA:       # BB#0:
    162 ; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
    163 ; FMA-NEXT:    retq
    164 ;
    165 ; FMA4-LABEL: test_4f32_fmsub:
    166 ; FMA4:       # BB#0:
    167 ; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
    168 ; FMA4-NEXT:    retq
    169 ;
    170 ; AVX512-LABEL: test_4f32_fmsub:
    171 ; AVX512:       # BB#0:
    172 ; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
    173 ; AVX512-NEXT:    retq
    174   %x = fmul <4 x float> %a0, %a1
    175   %res = fsub <4 x float> %x, %a2
    176   ret <4 x float> %res
    177 }
    178 
    179 define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
    180 ; FMA-LABEL: test_8f32_fmsub:
    181 ; FMA:       # BB#0:
    182 ; FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
    183 ; FMA-NEXT:    retq
    184 ;
    185 ; FMA4-LABEL: test_8f32_fmsub:
    186 ; FMA4:       # BB#0:
    187 ; FMA4-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
    188 ; FMA4-NEXT:    retq
    189 ;
    190 ; AVX512-LABEL: test_8f32_fmsub:
    191 ; AVX512:       # BB#0:
    192 ; AVX512-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
    193 ; AVX512-NEXT:    retq
    194   %x = fmul <8 x float> %a0, %a1
    195   %res = fsub <8 x float> %x, %a2
    196   ret <8 x float> %res
    197 }
    198 
    199 define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
    200 ; FMA-LABEL: test_f64_fmsub:
    201 ; FMA:       # BB#0:
    202 ; FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
    203 ; FMA-NEXT:    retq
    204 ;
    205 ; FMA4-LABEL: test_f64_fmsub:
    206 ; FMA4:       # BB#0:
    207 ; FMA4-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
    208 ; FMA4-NEXT:    retq
    209 ;
    210 ; AVX512-LABEL: test_f64_fmsub:
    211 ; AVX512:       # BB#0:
    212 ; AVX512-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
    213 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    214 ; AVX512-NEXT:    retq
    215   %x = fmul double %a0, %a1
    216   %res = fsub double %x, %a2
    217   ret double %res
    218 }
    219 
    220 define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    221 ; FMA-LABEL: test_2f64_fmsub:
    222 ; FMA:       # BB#0:
    223 ; FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
    224 ; FMA-NEXT:    retq
    225 ;
    226 ; FMA4-LABEL: test_2f64_fmsub:
    227 ; FMA4:       # BB#0:
    228 ; FMA4-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
    229 ; FMA4-NEXT:    retq
    230 ;
    231 ; AVX512-LABEL: test_2f64_fmsub:
    232 ; AVX512:       # BB#0:
    233 ; AVX512-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
    234 ; AVX512-NEXT:    retq
    235   %x = fmul <2 x double> %a0, %a1
    236   %res = fsub <2 x double> %x, %a2
    237   ret <2 x double> %res
    238 }
    239 
    240 define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    241 ; FMA-LABEL: test_4f64_fmsub:
    242 ; FMA:       # BB#0:
    243 ; FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
    244 ; FMA-NEXT:    retq
    245 ;
    246 ; FMA4-LABEL: test_4f64_fmsub:
    247 ; FMA4:       # BB#0:
    248 ; FMA4-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
    249 ; FMA4-NEXT:    retq
    250 ;
    251 ; AVX512-LABEL: test_4f64_fmsub:
    252 ; AVX512:       # BB#0:
    253 ; AVX512-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
    254 ; AVX512-NEXT:    retq
    255   %x = fmul <4 x double> %a0, %a1
    256   %res = fsub <4 x double> %x, %a2
    257   ret <4 x double> %res
    258 }
    259 
    260 ;
    261 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
    262 ;
    263 
    264 define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
    265 ; FMA-LABEL: test_f32_fnmadd:
    266 ; FMA:       # BB#0:
    267 ; FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
    268 ; FMA-NEXT:    retq
    269 ;
    270 ; FMA4-LABEL: test_f32_fnmadd:
    271 ; FMA4:       # BB#0:
    272 ; FMA4-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
    273 ; FMA4-NEXT:    retq
    274 ;
    275 ; AVX512-LABEL: test_f32_fnmadd:
    276 ; AVX512:       # BB#0:
    277 ; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm1
    278 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    279 ; AVX512-NEXT:    retq
    280   %x = fmul float %a0, %a1
    281   %res = fsub float %a2, %x
    282   ret float %res
    283 }
    284 
    285 define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    286 ; FMA-LABEL: test_4f32_fnmadd:
    287 ; FMA:       # BB#0:
    288 ; FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
    289 ; FMA-NEXT:    retq
    290 ;
    291 ; FMA4-LABEL: test_4f32_fnmadd:
    292 ; FMA4:       # BB#0:
    293 ; FMA4-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
    294 ; FMA4-NEXT:    retq
    295 ;
    296 ; AVX512-LABEL: test_4f32_fnmadd:
    297 ; AVX512:       # BB#0:
    298 ; AVX512-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
    299 ; AVX512-NEXT:    retq
    300   %x = fmul <4 x float> %a0, %a1
    301   %res = fsub <4 x float> %a2, %x
    302   ret <4 x float> %res
    303 }
    304 
    305 define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
    306 ; FMA-LABEL: test_8f32_fnmadd:
    307 ; FMA:       # BB#0:
    308 ; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
    309 ; FMA-NEXT:    retq
    310 ;
    311 ; FMA4-LABEL: test_8f32_fnmadd:
    312 ; FMA4:       # BB#0:
    313 ; FMA4-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
    314 ; FMA4-NEXT:    retq
    315 ;
    316 ; AVX512-LABEL: test_8f32_fnmadd:
    317 ; AVX512:       # BB#0:
    318 ; AVX512-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
    319 ; AVX512-NEXT:    retq
    320   %x = fmul <8 x float> %a0, %a1
    321   %res = fsub <8 x float> %a2, %x
    322   ret <8 x float> %res
    323 }
    324 
    325 define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
    326 ; FMA-LABEL: test_f64_fnmadd:
    327 ; FMA:       # BB#0:
    328 ; FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
    329 ; FMA-NEXT:    retq
    330 ;
    331 ; FMA4-LABEL: test_f64_fnmadd:
    332 ; FMA4:       # BB#0:
    333 ; FMA4-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
    334 ; FMA4-NEXT:    retq
    335 ;
    336 ; AVX512-LABEL: test_f64_fnmadd:
    337 ; AVX512:       # BB#0:
    338 ; AVX512-NEXT:    vfnmadd213sd %xmm2, %xmm0, %xmm1
    339 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    340 ; AVX512-NEXT:    retq
    341   %x = fmul double %a0, %a1
    342   %res = fsub double %a2, %x
    343   ret double %res
    344 }
    345 
    346 define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    347 ; FMA-LABEL: test_2f64_fnmadd:
    348 ; FMA:       # BB#0:
    349 ; FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
    350 ; FMA-NEXT:    retq
    351 ;
    352 ; FMA4-LABEL: test_2f64_fnmadd:
    353 ; FMA4:       # BB#0:
    354 ; FMA4-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
    355 ; FMA4-NEXT:    retq
    356 ;
    357 ; AVX512-LABEL: test_2f64_fnmadd:
    358 ; AVX512:       # BB#0:
    359 ; AVX512-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
    360 ; AVX512-NEXT:    retq
    361   %x = fmul <2 x double> %a0, %a1
    362   %res = fsub <2 x double> %a2, %x
    363   ret <2 x double> %res
    364 }
    365 
    366 define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    367 ; FMA-LABEL: test_4f64_fnmadd:
    368 ; FMA:       # BB#0:
    369 ; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
    370 ; FMA-NEXT:    retq
    371 ;
    372 ; FMA4-LABEL: test_4f64_fnmadd:
    373 ; FMA4:       # BB#0:
    374 ; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
    375 ; FMA4-NEXT:    retq
    376 ;
    377 ; AVX512-LABEL: test_4f64_fnmadd:
    378 ; AVX512:       # BB#0:
    379 ; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
    380 ; AVX512-NEXT:    retq
    381   %x = fmul <4 x double> %a0, %a1
    382   %res = fsub <4 x double> %a2, %x
    383   ret <4 x double> %res
    384 }
    385 
    386 ;
    387 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
    388 ;
    389 
    390 define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
    391 ; FMA-LABEL: test_f32_fnmsub:
    392 ; FMA:       # BB#0:
    393 ; FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
    394 ; FMA-NEXT:    retq
    395 ;
    396 ; FMA4-LABEL: test_f32_fnmsub:
    397 ; FMA4:       # BB#0:
    398 ; FMA4-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
    399 ; FMA4-NEXT:    retq
    400 ;
    401 ; AVX512-LABEL: test_f32_fnmsub:
    402 ; AVX512:       # BB#0:
    403 ; AVX512-NEXT:    vfnmsub213ss %xmm2, %xmm0, %xmm1
    404 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    405 ; AVX512-NEXT:    retq
    406   %x = fmul float %a0, %a1
    407   %y = fsub float -0.000000e+00, %x
    408   %res = fsub float %y, %a2
    409   ret float %res
    410 }
    411 
    412 define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    413 ; FMA-LABEL: test_4f32_fnmsub:
    414 ; FMA:       # BB#0:
    415 ; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    416 ; FMA-NEXT:    retq
    417 ;
    418 ; FMA4-LABEL: test_4f32_fnmsub:
    419 ; FMA4:       # BB#0:
    420 ; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
    421 ; FMA4-NEXT:    retq
    422 ;
    423 ; AVX512-LABEL: test_4f32_fnmsub:
    424 ; AVX512:       # BB#0:
    425 ; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    426 ; AVX512-NEXT:    retq
    427   %x = fmul <4 x float> %a0, %a1
    428   %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
    429   %res = fsub <4 x float> %y, %a2
    430   ret <4 x float> %res
    431 }
    432 
    433 define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
    434 ; FMA-LABEL: test_8f32_fnmsub:
    435 ; FMA:       # BB#0:
    436 ; FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
    437 ; FMA-NEXT:    retq
    438 ;
    439 ; FMA4-LABEL: test_8f32_fnmsub:
    440 ; FMA4:       # BB#0:
    441 ; FMA4-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
    442 ; FMA4-NEXT:    retq
    443 ;
    444 ; AVX512-LABEL: test_8f32_fnmsub:
    445 ; AVX512:       # BB#0:
    446 ; AVX512-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
    447 ; AVX512-NEXT:    retq
    448   %x = fmul <8 x float> %a0, %a1
    449   %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
    450   %res = fsub <8 x float> %y, %a2
    451   ret <8 x float> %res
    452 }
    453 
    454 define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
    455 ; FMA-LABEL: test_f64_fnmsub:
    456 ; FMA:       # BB#0:
    457 ; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
    458 ; FMA-NEXT:    retq
    459 ;
    460 ; FMA4-LABEL: test_f64_fnmsub:
    461 ; FMA4:       # BB#0:
    462 ; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
    463 ; FMA4-NEXT:    retq
    464 ;
    465 ; AVX512-LABEL: test_f64_fnmsub:
    466 ; AVX512:       # BB#0:
    467 ; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
    468 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    469 ; AVX512-NEXT:    retq
    470   %x = fmul double %a0, %a1
    471   %y = fsub double -0.000000e+00, %x
    472   %res = fsub double %y, %a2
    473   ret double %res
    474 }
    475 
    476 define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    477 ; FMA-LABEL: test_2f64_fnmsub:
    478 ; FMA:       # BB#0:
    479 ; FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
    480 ; FMA-NEXT:    retq
    481 ;
    482 ; FMA4-LABEL: test_2f64_fnmsub:
    483 ; FMA4:       # BB#0:
    484 ; FMA4-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
    485 ; FMA4-NEXT:    retq
    486 ;
    487 ; AVX512-LABEL: test_2f64_fnmsub:
    488 ; AVX512:       # BB#0:
    489 ; AVX512-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
    490 ; AVX512-NEXT:    retq
    491   %x = fmul <2 x double> %a0, %a1
    492   %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x
    493   %res = fsub <2 x double> %y, %a2
    494   ret <2 x double> %res
    495 }
    496 
    497 define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    498 ; FMA-LABEL: test_4f64_fnmsub:
    499 ; FMA:       # BB#0:
    500 ; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
    501 ; FMA-NEXT:    retq
    502 ;
    503 ; FMA4-LABEL: test_4f64_fnmsub:
    504 ; FMA4:       # BB#0:
    505 ; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
    506 ; FMA4-NEXT:    retq
    507 ;
    508 ; AVX512-LABEL: test_4f64_fnmsub:
    509 ; AVX512:       # BB#0:
    510 ; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
    511 ; AVX512-NEXT:    retq
    512   %x = fmul <4 x double> %a0, %a1
    513   %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
    514   %res = fsub <4 x double> %y, %a2
    515   ret <4 x double> %res
    516 }
    517 
    518 ;
    519 ; Load Folding Patterns
    520 ;
    521 
    522 define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
    523 ; FMA-LABEL: test_4f32_fmadd_load:
    524 ; FMA:       # BB#0:
    525 ; FMA-NEXT:    vfmadd132ps (%rdi), %xmm1, %xmm0
    526 ; FMA-NEXT:    retq
    527 ;
    528 ; FMA4-LABEL: test_4f32_fmadd_load:
    529 ; FMA4:       # BB#0:
    530 ; FMA4-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
    531 ; FMA4-NEXT:    retq
    532 ;
    533 ; AVX512-LABEL: test_4f32_fmadd_load:
    534 ; AVX512:       # BB#0:
    535 ; AVX512-NEXT:    vmovaps (%rdi), %xmm2
    536 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm0, %xmm2
    537 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    538 ; AVX512-NEXT:    retq
    539   %x = load <4 x float>, <4 x float>* %a0
    540   %y = fmul <4 x float> %x, %a1
    541   %res = fadd <4 x float> %y, %a2
    542   ret <4 x float> %res
    543 }
    544 
    545 define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) {
    546 ; FMA-LABEL: test_2f64_fmsub_load:
    547 ; FMA:       # BB#0:
    548 ; FMA-NEXT:    vfmsub132pd (%rdi), %xmm1, %xmm0
    549 ; FMA-NEXT:    retq
    550 ;
    551 ; FMA4-LABEL: test_2f64_fmsub_load:
    552 ; FMA4:       # BB#0:
    553 ; FMA4-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
    554 ; FMA4-NEXT:    retq
    555 ;
    556 ; AVX512-LABEL: test_2f64_fmsub_load:
    557 ; AVX512:       # BB#0:
    558 ; AVX512-NEXT:    vmovapd (%rdi), %xmm2
    559 ; AVX512-NEXT:    vfmsub213pd %xmm1, %xmm0, %xmm2
    560 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    561 ; AVX512-NEXT:    retq
    562   %x = load <2 x double>, <2 x double>* %a0
    563   %y = fmul <2 x double> %x, %a1
    564   %res = fsub <2 x double> %y, %a2
    565   ret <2 x double> %res
    566 }
    567 
    568 ;
    569 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
    570 ;
    571 
    572 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
    573 ; FMA-LABEL: test_v4f32_mul_add_x_one_y:
    574 ; FMA:       # BB#0:
    575 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    576 ; FMA-NEXT:    retq
    577 ;
    578 ; FMA4-LABEL: test_v4f32_mul_add_x_one_y:
    579 ; FMA4:       # BB#0:
    580 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    581 ; FMA4-NEXT:    retq
    582 ;
    583 ; AVX512-LABEL: test_v4f32_mul_add_x_one_y:
    584 ; AVX512:       # BB#0:
    585 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    586 ; AVX512-NEXT:    retq
    587   %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    588   %m = fmul <4 x float> %a, %y
    589   ret <4 x float> %m
    590 }
    591 
    592 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
    593 ; FMA-LABEL: test_v4f32_mul_y_add_x_one:
    594 ; FMA:       # BB#0:
    595 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    596 ; FMA-NEXT:    retq
    597 ;
    598 ; FMA4-LABEL: test_v4f32_mul_y_add_x_one:
    599 ; FMA4:       # BB#0:
    600 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    601 ; FMA4-NEXT:    retq
    602 ;
    603 ; AVX512-LABEL: test_v4f32_mul_y_add_x_one:
    604 ; AVX512:       # BB#0:
    605 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    606 ; AVX512-NEXT:    retq
    607   %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    608   %m = fmul <4 x float> %y, %a
    609   ret <4 x float> %m
    610 }
    611 
    612 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
    613 ; FMA-LABEL: test_v4f32_mul_add_x_negone_y:
    614 ; FMA:       # BB#0:
    615 ; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    616 ; FMA-NEXT:    retq
    617 ;
    618 ; FMA4-LABEL: test_v4f32_mul_add_x_negone_y:
    619 ; FMA4:       # BB#0:
    620 ; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    621 ; FMA4-NEXT:    retq
    622 ;
    623 ; AVX512-LABEL: test_v4f32_mul_add_x_negone_y:
    624 ; AVX512:       # BB#0:
    625 ; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    626 ; AVX512-NEXT:    retq
    627   %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    628   %m = fmul <4 x float> %a, %y
    629   ret <4 x float> %m
    630 }
    631 
    632 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
    633 ; FMA-LABEL: test_v4f32_mul_y_add_x_negone:
    634 ; FMA:       # BB#0:
    635 ; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    636 ; FMA-NEXT:    retq
    637 ;
    638 ; FMA4-LABEL: test_v4f32_mul_y_add_x_negone:
    639 ; FMA4:       # BB#0:
    640 ; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    641 ; FMA4-NEXT:    retq
    642 ;
    643 ; AVX512-LABEL: test_v4f32_mul_y_add_x_negone:
    644 ; AVX512:       # BB#0:
    645 ; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    646 ; AVX512-NEXT:    retq
    647   %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    648   %m = fmul <4 x float> %y, %a
    649   ret <4 x float> %m
    650 }
    651 
    652 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
    653 ; FMA-LABEL: test_v4f32_mul_sub_one_x_y:
    654 ; FMA:       # BB#0:
    655 ; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    656 ; FMA-NEXT:    retq
    657 ;
    658 ; FMA4-LABEL: test_v4f32_mul_sub_one_x_y:
    659 ; FMA4:       # BB#0:
    660 ; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
    661 ; FMA4-NEXT:    retq
    662 ;
    663 ; AVX512-LABEL: test_v4f32_mul_sub_one_x_y:
    664 ; AVX512:       # BB#0:
    665 ; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    666 ; AVX512-NEXT:    retq
    667   %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
    668   %m = fmul <4 x float> %s, %y
    669   ret <4 x float> %m
    670 }
    671 
    672 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
    673 ; FMA-LABEL: test_v4f32_mul_y_sub_one_x:
    674 ; FMA:       # BB#0:
    675 ; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    676 ; FMA-NEXT:    retq
    677 ;
    678 ; FMA4-LABEL: test_v4f32_mul_y_sub_one_x:
    679 ; FMA4:       # BB#0:
    680 ; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
    681 ; FMA4-NEXT:    retq
    682 ;
    683 ; AVX512-LABEL: test_v4f32_mul_y_sub_one_x:
    684 ; AVX512:       # BB#0:
    685 ; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    686 ; AVX512-NEXT:    retq
    687   %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
    688   %m = fmul <4 x float> %y, %s
    689   ret <4 x float> %m
    690 }
    691 
    692 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
    693 ; FMA-LABEL: test_v4f32_mul_sub_negone_x_y:
    694 ; FMA:       # BB#0:
    695 ; FMA-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    696 ; FMA-NEXT:    retq
    697 ;
    698 ; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y:
    699 ; FMA4:       # BB#0:
    700 ; FMA4-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
    701 ; FMA4-NEXT:    retq
    702 ;
    703 ; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y:
    704 ; AVX512:       # BB#0:
    705 ; AVX512-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    706 ; AVX512-NEXT:    retq
    707   %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
    708   %m = fmul <4 x float> %s, %y
    709   ret <4 x float> %m
    710 }
    711 
    712 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
    713 ; FMA-LABEL: test_v4f32_mul_y_sub_negone_x:
    714 ; FMA:       # BB#0:
    715 ; FMA-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    716 ; FMA-NEXT:    retq
    717 ;
    718 ; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x:
    719 ; FMA4:       # BB#0:
    720 ; FMA4-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
    721 ; FMA4-NEXT:    retq
    722 ;
    723 ; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x:
    724 ; AVX512:       # BB#0:
    725 ; AVX512-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    726 ; AVX512-NEXT:    retq
    727   %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
    728   %m = fmul <4 x float> %y, %s
    729   ret <4 x float> %m
    730 }
    731 
    732 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
    733 ; FMA-LABEL: test_v4f32_mul_sub_x_one_y:
    734 ; FMA:       # BB#0:
    735 ; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    736 ; FMA-NEXT:    retq
    737 ;
    738 ; FMA4-LABEL: test_v4f32_mul_sub_x_one_y:
    739 ; FMA4:       # BB#0:
    740 ; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    741 ; FMA4-NEXT:    retq
    742 ;
    743 ; AVX512-LABEL: test_v4f32_mul_sub_x_one_y:
    744 ; AVX512:       # BB#0:
    745 ; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    746 ; AVX512-NEXT:    retq
    747   %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    748   %m = fmul <4 x float> %s, %y
    749   ret <4 x float> %m
    750 }
    751 
    752 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
    753 ; FMA-LABEL: test_v4f32_mul_y_sub_x_one:
    754 ; FMA:       # BB#0:
    755 ; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    756 ; FMA-NEXT:    retq
    757 ;
    758 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_one:
    759 ; FMA4:       # BB#0:
    760 ; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    761 ; FMA4-NEXT:    retq
    762 ;
    763 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_one:
    764 ; AVX512:       # BB#0:
    765 ; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    766 ; AVX512-NEXT:    retq
    767   %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    768   %m = fmul <4 x float> %y, %s
    769   ret <4 x float> %m
    770 }
    771 
    772 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
    773 ; FMA-LABEL: test_v4f32_mul_sub_x_negone_y:
    774 ; FMA:       # BB#0:
    775 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    776 ; FMA-NEXT:    retq
    777 ;
    778 ; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y:
    779 ; FMA4:       # BB#0:
    780 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    781 ; FMA4-NEXT:    retq
    782 ;
    783 ; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y:
    784 ; AVX512:       # BB#0:
    785 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    786 ; AVX512-NEXT:    retq
    787   %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    788   %m = fmul <4 x float> %s, %y
    789   ret <4 x float> %m
    790 }
    791 
    792 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
    793 ; FMA-LABEL: test_v4f32_mul_y_sub_x_negone:
    794 ; FMA:       # BB#0:
    795 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    796 ; FMA-NEXT:    retq
    797 ;
    798 ; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone:
    799 ; FMA4:       # BB#0:
    800 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    801 ; FMA4-NEXT:    retq
    802 ;
    803 ; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone:
    804 ; AVX512:       # BB#0:
    805 ; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    806 ; AVX512-NEXT:    retq
    807   %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    808   %m = fmul <4 x float> %y, %s
    809   ret <4 x float> %m
    810 }
    811 
    812 ;
    813 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
    814 ;
    815 
    816 define float @test_f32_interp(float %x, float %y, float %t) {
    817 ; FMA-LABEL: test_f32_interp:
    818 ; FMA:       # BB#0:
    819 ; FMA-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
    820 ; FMA-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
    821 ; FMA-NEXT:    retq
    822 ;
    823 ; FMA4-LABEL: test_f32_interp:
    824 ; FMA4:       # BB#0:
    825 ; FMA4-NEXT:    vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
    826 ; FMA4-NEXT:    vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
    827 ; FMA4-NEXT:    retq
    828 ;
    829 ; AVX512-LABEL: test_f32_interp:
    830 ; AVX512:       # BB#0:
    831 ; AVX512-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
    832 ; AVX512-NEXT:    vfmadd213ss %xmm1, %xmm0, %xmm2
    833 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    834 ; AVX512-NEXT:    retq
    835   %t1 = fsub float 1.0, %t
    836   %tx = fmul float %x, %t
    837   %ty = fmul float %y, %t1
    838   %r = fadd float %tx, %ty
    839   ret float %r
    840 }
    841 
    842 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
    843 ; FMA-LABEL: test_v4f32_interp:
    844 ; FMA:       # BB#0:
    845 ; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm2, %xmm1
    846 ; FMA-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
    847 ; FMA-NEXT:    retq
    848 ;
    849 ; FMA4-LABEL: test_v4f32_interp:
    850 ; FMA4:       # BB#0:
    851 ; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
    852 ; FMA4-NEXT:    vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
    853 ; FMA4-NEXT:    retq
    854 ;
    855 ; AVX512-LABEL: test_v4f32_interp:
    856 ; AVX512:       # BB#0:
    857 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    858 ; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm3
    859 ; AVX512-NEXT:    vfmadd213ps %xmm3, %xmm2, %xmm0
    860 ; AVX512-NEXT:    retq
    861   %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
    862   %tx = fmul <4 x float> %x, %t
    863   %ty = fmul <4 x float> %y, %t1
    864   %r = fadd <4 x float> %tx, %ty
    865   ret <4 x float> %r
    866 }
    867 
    868 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
    869 ; FMA-LABEL: test_v8f32_interp:
    870 ; FMA:       # BB#0:
    871 ; FMA-NEXT:    vfnmadd213ps %ymm1, %ymm2, %ymm1
    872 ; FMA-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
    873 ; FMA-NEXT:    retq
    874 ;
    875 ; FMA4-LABEL: test_v8f32_interp:
    876 ; FMA4:       # BB#0:
    877 ; FMA4-NEXT:    vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
    878 ; FMA4-NEXT:    vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
    879 ; FMA4-NEXT:    retq
    880 ;
    881 ; AVX512-LABEL: test_v8f32_interp:
    882 ; AVX512:       # BB#0:
    883 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    884 ; AVX512-NEXT:    vfnmadd213ps %ymm1, %ymm1, %ymm3
    885 ; AVX512-NEXT:    vfmadd213ps %ymm3, %ymm2, %ymm0
    886 ; AVX512-NEXT:    retq
    887   %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
    888   %tx = fmul <8 x float> %x, %t
    889   %ty = fmul <8 x float> %y, %t1
    890   %r = fadd <8 x float> %tx, %ty
    891   ret <8 x float> %r
    892 }
    893 
    894 define double @test_f64_interp(double %x, double %y, double %t) {
    895 ; FMA-LABEL: test_f64_interp:
    896 ; FMA:       # BB#0:
    897 ; FMA-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
    898 ; FMA-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
    899 ; FMA-NEXT:    retq
    900 ;
    901 ; FMA4-LABEL: test_f64_interp:
    902 ; FMA4:       # BB#0:
    903 ; FMA4-NEXT:    vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
    904 ; FMA4-NEXT:    vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
    905 ; FMA4-NEXT:    retq
    906 ;
    907 ; AVX512-LABEL: test_f64_interp:
    908 ; AVX512:       # BB#0:
    909 ; AVX512-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
    910 ; AVX512-NEXT:    vfmadd213sd %xmm1, %xmm0, %xmm2
    911 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    912 ; AVX512-NEXT:    retq
    913   %t1 = fsub double 1.0, %t
    914   %tx = fmul double %x, %t
    915   %ty = fmul double %y, %t1
    916   %r = fadd double %tx, %ty
    917   ret double %r
    918 }
    919 
    920 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
    921 ; FMA-LABEL: test_v2f64_interp:
    922 ; FMA:       # BB#0:
    923 ; FMA-NEXT:    vfnmadd213pd %xmm1, %xmm2, %xmm1
    924 ; FMA-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
    925 ; FMA-NEXT:    retq
    926 ;
    927 ; FMA4-LABEL: test_v2f64_interp:
    928 ; FMA4:       # BB#0:
    929 ; FMA4-NEXT:    vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
    930 ; FMA4-NEXT:    vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
    931 ; FMA4-NEXT:    retq
    932 ;
    933 ; AVX512-LABEL: test_v2f64_interp:
    934 ; AVX512:       # BB#0:
    935 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    936 ; AVX512-NEXT:    vfnmadd213pd %xmm1, %xmm1, %xmm3
    937 ; AVX512-NEXT:    vfmadd213pd %xmm3, %xmm2, %xmm0
    938 ; AVX512-NEXT:    retq
    939   %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
    940   %tx = fmul <2 x double> %x, %t
    941   %ty = fmul <2 x double> %y, %t1
    942   %r = fadd <2 x double> %tx, %ty
    943   ret <2 x double> %r
    944 }
    945 
    946 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
    947 ; FMA-LABEL: test_v4f64_interp:
    948 ; FMA:       # BB#0:
    949 ; FMA-NEXT:    vfnmadd213pd %ymm1, %ymm2, %ymm1
    950 ; FMA-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
    951 ; FMA-NEXT:    retq
    952 ;
    953 ; FMA4-LABEL: test_v4f64_interp:
    954 ; FMA4:       # BB#0:
    955 ; FMA4-NEXT:    vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
    956 ; FMA4-NEXT:    vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
    957 ; FMA4-NEXT:    retq
    958 ;
    959 ; AVX512-LABEL: test_v4f64_interp:
    960 ; AVX512:       # BB#0:
    961 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    962 ; AVX512-NEXT:    vfnmadd213pd %ymm1, %ymm1, %ymm3
    963 ; AVX512-NEXT:    vfmadd213pd %ymm3, %ymm2, %ymm0
    964 ; AVX512-NEXT:    retq
    965   %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
    966   %tx = fmul <4 x double> %x, %t
    967   %ty = fmul <4 x double> %y, %t1
    968   %r = fadd <4 x double> %tx, %ty
    969   ret <4 x double> %r
    970 }
    971 
    972 ;
    973 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
    974 ;
    975 
    976 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
    977 ; FMA-LABEL: test_v4f32_fneg_fmadd:
    978 ; FMA:       # BB#0:
    979 ; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    980 ; FMA-NEXT:    retq
    981 ;
    982 ; FMA4-LABEL: test_v4f32_fneg_fmadd:
    983 ; FMA4:       # BB#0:
    984 ; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
    985 ; FMA4-NEXT:    retq
    986 ;
    987 ; AVX512-LABEL: test_v4f32_fneg_fmadd:
    988 ; AVX512:       # BB#0:
    989 ; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    990 ; AVX512-NEXT:    retq
    991   %mul = fmul <4 x float> %a0, %a1
    992   %add = fadd <4 x float> %mul, %a2
    993   %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
    994   ret <4 x float> %neg
    995 }
    996 
    997 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
    998 ; FMA-LABEL: test_v4f64_fneg_fmsub:
    999 ; FMA:       # BB#0:
   1000 ; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
   1001 ; FMA-NEXT:    retq
   1002 ;
   1003 ; FMA4-LABEL: test_v4f64_fneg_fmsub:
   1004 ; FMA4:       # BB#0:
   1005 ; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
   1006 ; FMA4-NEXT:    retq
   1007 ;
   1008 ; AVX512-LABEL: test_v4f64_fneg_fmsub:
   1009 ; AVX512:       # BB#0:
   1010 ; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
   1011 ; AVX512-NEXT:    retq
   1012   %mul = fmul <4 x double> %a0, %a1
   1013   %sub = fsub <4 x double> %mul, %a2
   1014   %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   1015   ret <4 x double> %neg
   1016 }
   1017 
   1018 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
   1019 ; FMA-LABEL: test_v4f32_fneg_fnmadd:
   1020 ; FMA:       # BB#0:
   1021 ; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
   1022 ; FMA-NEXT:    retq
   1023 ;
   1024 ; FMA4-LABEL: test_v4f32_fneg_fnmadd:
   1025 ; FMA4:       # BB#0:
   1026 ; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
   1027 ; FMA4-NEXT:    retq
   1028 ;
   1029 ; AVX512-LABEL: test_v4f32_fneg_fnmadd:
   1030 ; AVX512:       # BB#0:
   1031 ; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
   1032 ; AVX512-NEXT:    retq
   1033   %mul = fmul <4 x float> %a0, %a1
   1034   %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
   1035   %add = fadd <4 x float> %neg0, %a2
   1036   %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
   1037   ret <4 x float> %neg1
   1038 }
   1039 
   1040 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
   1041 ; FMA-LABEL: test_v4f64_fneg_fnmsub:
   1042 ; FMA:       # BB#0:
   1043 ; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
   1044 ; FMA-NEXT:    retq
   1045 ;
   1046 ; FMA4-LABEL: test_v4f64_fneg_fnmsub:
   1047 ; FMA4:       # BB#0:
   1048 ; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
   1049 ; FMA4-NEXT:    retq
   1050 ;
   1051 ; AVX512-LABEL: test_v4f64_fneg_fnmsub:
   1052 ; AVX512:       # BB#0:
   1053 ; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
   1054 ; AVX512-NEXT:    retq
   1055   %mul = fmul <4 x double> %a0, %a1
   1056   %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
   1057   %sub = fsub <4 x double> %neg0, %a2
   1058   %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   1059   ret <4 x double> %neg1
   1060 }
   1061 
   1062 ;
   1063 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
   1064 ;
   1065 
   1066 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
   1067 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
   1068 ; FMA:       # BB#0:
   1069 ; FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
   1070 ; FMA-NEXT:    retq
   1071 ;
   1072 ; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
   1073 ; FMA4:       # BB#0:
   1074 ; FMA4-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
   1075 ; FMA4-NEXT:    retq
   1076 ;
   1077 ; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
   1078 ; AVX512:       # BB#0:
   1079 ; AVX512-NEXT:    vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0
   1080 ; AVX512-NEXT:    retq
   1081   %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
   1082   %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
   1083   %a  = fadd <4 x float> %m0, %m1
   1084   ret <4 x float> %a
   1085 }
   1086 
   1087 ;
   1088 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
   1089 ;
   1090 
   1091 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
   1092 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
   1093 ; FMA:       # BB#0:
   1094 ; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
   1095 ; FMA-NEXT:    retq
   1096 ;
   1097 ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
   1098 ; FMA4:       # BB#0:
   1099 ; FMA4-NEXT:    vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
   1100 ; FMA4-NEXT:    retq
   1101 ;
   1102 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
   1103 ; AVX512:       # BB#0:
   1104 ; AVX512-NEXT:    vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1
   1105 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
   1106 ; AVX512-NEXT:    retq
   1107   %m0 = fmul <4 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0>
   1108   %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
   1109   %a  = fadd <4 x float> %m1, %y
   1110   ret <4 x float> %a
   1111 }
   1112 
   1113 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
   1114 
   1115 define double @test_f64_fneg_fmul(double %x, double %y) #0 {
   1116 ; FMA-LABEL: test_f64_fneg_fmul:
   1117 ; FMA:       # BB#0:
   1118 ; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1119 ; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
   1120 ; FMA-NEXT:    retq
   1121 ;
   1122 ; FMA4-LABEL: test_f64_fneg_fmul:
   1123 ; FMA4:       # BB#0:
   1124 ; FMA4-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1125 ; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
   1126 ; FMA4-NEXT:    retq
   1127 ;
   1128 ; AVX512-LABEL: test_f64_fneg_fmul:
   1129 ; AVX512:       # BB#0:
   1130 ; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1131 ; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
   1132 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
   1133 ; AVX512-NEXT:    retq
   1134   %m = fmul nsz double %x, %y
   1135   %n = fsub double -0.0, %m
   1136   ret double %n
   1137 }
   1138 
   1139 define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
   1140 ; FMA-LABEL: test_v4f32_fneg_fmul:
   1141 ; FMA:       # BB#0:
   1142 ; FMA-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1143 ; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
   1144 ; FMA-NEXT:    retq
   1145 ;
   1146 ; FMA4-LABEL: test_v4f32_fneg_fmul:
   1147 ; FMA4:       # BB#0:
   1148 ; FMA4-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1149 ; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
   1150 ; FMA4-NEXT:    retq
   1151 ;
   1152 ; AVX512-LABEL: test_v4f32_fneg_fmul:
   1153 ; AVX512:       # BB#0:
   1154 ; AVX512-NEXT:    vpxord %xmm2, %xmm2, %xmm2
   1155 ; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
   1156 ; AVX512-NEXT:    retq
   1157   %m = fmul nsz <4 x float> %x, %y
   1158   %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
   1159   ret <4 x float> %n
   1160 }
   1161 
   1162 define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
   1163 ; FMA-LABEL: test_v4f64_fneg_fmul:
   1164 ; FMA:       # BB#0:
   1165 ; FMA-NEXT:    vxorpd %ymm2, %ymm2, %ymm2
   1166 ; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
   1167 ; FMA-NEXT:    retq
   1168 ;
   1169 ; FMA4-LABEL: test_v4f64_fneg_fmul:
   1170 ; FMA4:       # BB#0:
   1171 ; FMA4-NEXT:    vxorpd %ymm2, %ymm2, %ymm2
   1172 ; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
   1173 ; FMA4-NEXT:    retq
   1174 ;
   1175 ; AVX512-LABEL: test_v4f64_fneg_fmul:
   1176 ; AVX512:       # BB#0:
   1177 ; AVX512-NEXT:    vpxord %ymm2, %ymm2, %ymm2
   1178 ; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
   1179 ; AVX512-NEXT:    retq
   1180   %m = fmul nsz <4 x double> %x, %y
   1181   %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
   1182   ret <4 x double> %n
   1183 }
   1184 
   1185 define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
   1186 ; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz:
   1187 ; ALL:       # BB#0:
   1188 ; ALL-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
   1189 ; ALL-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
   1190 ; ALL-NEXT:    retq
   1191   %m = fmul <4 x double> %x, %y
   1192   %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
   1193   ret <4 x double> %n
   1194 }
   1195 
   1196 attributes #0 = { "unsafe-fp-math"="true" }
   1197