Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-INFS
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-INFS
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-NOINFS
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-NOINFS
     10 
     11 ;
     12 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
     13 ;
     14 
     15 define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
     16 ; FMA-LABEL: test_f32_fmadd:
     17 ; FMA:       # %bb.0:
     18 ; FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
     19 ; FMA-NEXT:    retq
     20 ;
     21 ; FMA4-LABEL: test_f32_fmadd:
     22 ; FMA4:       # %bb.0:
     23 ; FMA4-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
     24 ; FMA4-NEXT:    retq
     25 ;
     26 ; AVX512-LABEL: test_f32_fmadd:
     27 ; AVX512:       # %bb.0:
     28 ; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
     29 ; AVX512-NEXT:    retq
     30   %x = fmul float %a0, %a1
     31   %res = fadd float %x, %a2
     32   ret float %res
     33 }
     34 
     35 define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
     36 ; FMA-LABEL: test_4f32_fmadd:
     37 ; FMA:       # %bb.0:
     38 ; FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
     39 ; FMA-NEXT:    retq
     40 ;
     41 ; FMA4-LABEL: test_4f32_fmadd:
     42 ; FMA4:       # %bb.0:
     43 ; FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
     44 ; FMA4-NEXT:    retq
     45 ;
     46 ; AVX512-LABEL: test_4f32_fmadd:
     47 ; AVX512:       # %bb.0:
     48 ; AVX512-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
     49 ; AVX512-NEXT:    retq
     50   %x = fmul <4 x float> %a0, %a1
     51   %res = fadd <4 x float> %x, %a2
     52   ret <4 x float> %res
     53 }
     54 
     55 define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
     56 ; FMA-LABEL: test_8f32_fmadd:
     57 ; FMA:       # %bb.0:
     58 ; FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
     59 ; FMA-NEXT:    retq
     60 ;
     61 ; FMA4-LABEL: test_8f32_fmadd:
     62 ; FMA4:       # %bb.0:
     63 ; FMA4-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
     64 ; FMA4-NEXT:    retq
     65 ;
     66 ; AVX512-LABEL: test_8f32_fmadd:
     67 ; AVX512:       # %bb.0:
     68 ; AVX512-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
     69 ; AVX512-NEXT:    retq
     70   %x = fmul <8 x float> %a0, %a1
     71   %res = fadd <8 x float> %x, %a2
     72   ret <8 x float> %res
     73 }
     74 
     75 define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
     76 ; FMA-LABEL: test_f64_fmadd:
     77 ; FMA:       # %bb.0:
     78 ; FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
     79 ; FMA-NEXT:    retq
     80 ;
     81 ; FMA4-LABEL: test_f64_fmadd:
     82 ; FMA4:       # %bb.0:
     83 ; FMA4-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
     84 ; FMA4-NEXT:    retq
     85 ;
     86 ; AVX512-LABEL: test_f64_fmadd:
     87 ; AVX512:       # %bb.0:
     88 ; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
     89 ; AVX512-NEXT:    retq
     90   %x = fmul double %a0, %a1
     91   %res = fadd double %x, %a2
     92   ret double %res
     93 }
     94 
     95 define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
     96 ; FMA-LABEL: test_2f64_fmadd:
     97 ; FMA:       # %bb.0:
     98 ; FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
     99 ; FMA-NEXT:    retq
    100 ;
    101 ; FMA4-LABEL: test_2f64_fmadd:
    102 ; FMA4:       # %bb.0:
    103 ; FMA4-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
    104 ; FMA4-NEXT:    retq
    105 ;
    106 ; AVX512-LABEL: test_2f64_fmadd:
    107 ; AVX512:       # %bb.0:
    108 ; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
    109 ; AVX512-NEXT:    retq
    110   %x = fmul <2 x double> %a0, %a1
    111   %res = fadd <2 x double> %x, %a2
    112   ret <2 x double> %res
    113 }
    114 
    115 define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    116 ; FMA-LABEL: test_4f64_fmadd:
    117 ; FMA:       # %bb.0:
    118 ; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
    119 ; FMA-NEXT:    retq
    120 ;
    121 ; FMA4-LABEL: test_4f64_fmadd:
    122 ; FMA4:       # %bb.0:
    123 ; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
    124 ; FMA4-NEXT:    retq
    125 ;
    126 ; AVX512-LABEL: test_4f64_fmadd:
    127 ; AVX512:       # %bb.0:
    128 ; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
    129 ; AVX512-NEXT:    retq
    130   %x = fmul <4 x double> %a0, %a1
    131   %res = fadd <4 x double> %x, %a2
    132   ret <4 x double> %res
    133 }
    134 
    135 ;
    136 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
    137 ;
    138 
    139 define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
    140 ; FMA-LABEL: test_f32_fmsub:
    141 ; FMA:       # %bb.0:
    142 ; FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
    143 ; FMA-NEXT:    retq
    144 ;
    145 ; FMA4-LABEL: test_f32_fmsub:
    146 ; FMA4:       # %bb.0:
    147 ; FMA4-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
    148 ; FMA4-NEXT:    retq
    149 ;
    150 ; AVX512-LABEL: test_f32_fmsub:
    151 ; AVX512:       # %bb.0:
    152 ; AVX512-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
    153 ; AVX512-NEXT:    retq
    154   %x = fmul float %a0, %a1
    155   %res = fsub float %x, %a2
    156   ret float %res
    157 }
    158 
    159 define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    160 ; FMA-LABEL: test_4f32_fmsub:
    161 ; FMA:       # %bb.0:
    162 ; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
    163 ; FMA-NEXT:    retq
    164 ;
    165 ; FMA4-LABEL: test_4f32_fmsub:
    166 ; FMA4:       # %bb.0:
    167 ; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
    168 ; FMA4-NEXT:    retq
    169 ;
    170 ; AVX512-LABEL: test_4f32_fmsub:
    171 ; AVX512:       # %bb.0:
    172 ; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
    173 ; AVX512-NEXT:    retq
    174   %x = fmul <4 x float> %a0, %a1
    175   %res = fsub <4 x float> %x, %a2
    176   ret <4 x float> %res
    177 }
    178 
    179 define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
    180 ; FMA-LABEL: test_8f32_fmsub:
    181 ; FMA:       # %bb.0:
    182 ; FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
    183 ; FMA-NEXT:    retq
    184 ;
    185 ; FMA4-LABEL: test_8f32_fmsub:
    186 ; FMA4:       # %bb.0:
    187 ; FMA4-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
    188 ; FMA4-NEXT:    retq
    189 ;
    190 ; AVX512-LABEL: test_8f32_fmsub:
    191 ; AVX512:       # %bb.0:
    192 ; AVX512-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
    193 ; AVX512-NEXT:    retq
    194   %x = fmul <8 x float> %a0, %a1
    195   %res = fsub <8 x float> %x, %a2
    196   ret <8 x float> %res
    197 }
    198 
    199 define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
    200 ; FMA-LABEL: test_f64_fmsub:
    201 ; FMA:       # %bb.0:
    202 ; FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
    203 ; FMA-NEXT:    retq
    204 ;
    205 ; FMA4-LABEL: test_f64_fmsub:
    206 ; FMA4:       # %bb.0:
    207 ; FMA4-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
    208 ; FMA4-NEXT:    retq
    209 ;
    210 ; AVX512-LABEL: test_f64_fmsub:
    211 ; AVX512:       # %bb.0:
    212 ; AVX512-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
    213 ; AVX512-NEXT:    retq
    214   %x = fmul double %a0, %a1
    215   %res = fsub double %x, %a2
    216   ret double %res
    217 }
    218 
    219 define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    220 ; FMA-LABEL: test_2f64_fmsub:
    221 ; FMA:       # %bb.0:
    222 ; FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
    223 ; FMA-NEXT:    retq
    224 ;
    225 ; FMA4-LABEL: test_2f64_fmsub:
    226 ; FMA4:       # %bb.0:
    227 ; FMA4-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
    228 ; FMA4-NEXT:    retq
    229 ;
    230 ; AVX512-LABEL: test_2f64_fmsub:
    231 ; AVX512:       # %bb.0:
    232 ; AVX512-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
    233 ; AVX512-NEXT:    retq
    234   %x = fmul <2 x double> %a0, %a1
    235   %res = fsub <2 x double> %x, %a2
    236   ret <2 x double> %res
    237 }
    238 
    239 define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    240 ; FMA-LABEL: test_4f64_fmsub:
    241 ; FMA:       # %bb.0:
    242 ; FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
    243 ; FMA-NEXT:    retq
    244 ;
    245 ; FMA4-LABEL: test_4f64_fmsub:
    246 ; FMA4:       # %bb.0:
    247 ; FMA4-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
    248 ; FMA4-NEXT:    retq
    249 ;
    250 ; AVX512-LABEL: test_4f64_fmsub:
    251 ; AVX512:       # %bb.0:
    252 ; AVX512-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
    253 ; AVX512-NEXT:    retq
    254   %x = fmul <4 x double> %a0, %a1
    255   %res = fsub <4 x double> %x, %a2
    256   ret <4 x double> %res
    257 }
    258 
    259 ;
    260 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
    261 ;
    262 
    263 define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
    264 ; FMA-LABEL: test_f32_fnmadd:
    265 ; FMA:       # %bb.0:
    266 ; FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
    267 ; FMA-NEXT:    retq
    268 ;
    269 ; FMA4-LABEL: test_f32_fnmadd:
    270 ; FMA4:       # %bb.0:
    271 ; FMA4-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
    272 ; FMA4-NEXT:    retq
    273 ;
    274 ; AVX512-LABEL: test_f32_fnmadd:
    275 ; AVX512:       # %bb.0:
    276 ; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
    277 ; AVX512-NEXT:    retq
    278   %x = fmul float %a0, %a1
    279   %res = fsub float %a2, %x
    280   ret float %res
    281 }
    282 
    283 define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    284 ; FMA-LABEL: test_4f32_fnmadd:
    285 ; FMA:       # %bb.0:
    286 ; FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
    287 ; FMA-NEXT:    retq
    288 ;
    289 ; FMA4-LABEL: test_4f32_fnmadd:
    290 ; FMA4:       # %bb.0:
    291 ; FMA4-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
    292 ; FMA4-NEXT:    retq
    293 ;
    294 ; AVX512-LABEL: test_4f32_fnmadd:
    295 ; AVX512:       # %bb.0:
    296 ; AVX512-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
    297 ; AVX512-NEXT:    retq
    298   %x = fmul <4 x float> %a0, %a1
    299   %res = fsub <4 x float> %a2, %x
    300   ret <4 x float> %res
    301 }
    302 
    303 define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
    304 ; FMA-LABEL: test_8f32_fnmadd:
    305 ; FMA:       # %bb.0:
    306 ; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
    307 ; FMA-NEXT:    retq
    308 ;
    309 ; FMA4-LABEL: test_8f32_fnmadd:
    310 ; FMA4:       # %bb.0:
    311 ; FMA4-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
    312 ; FMA4-NEXT:    retq
    313 ;
    314 ; AVX512-LABEL: test_8f32_fnmadd:
    315 ; AVX512:       # %bb.0:
    316 ; AVX512-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
    317 ; AVX512-NEXT:    retq
    318   %x = fmul <8 x float> %a0, %a1
    319   %res = fsub <8 x float> %a2, %x
    320   ret <8 x float> %res
    321 }
    322 
    323 define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
    324 ; FMA-LABEL: test_f64_fnmadd:
    325 ; FMA:       # %bb.0:
    326 ; FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
    327 ; FMA-NEXT:    retq
    328 ;
    329 ; FMA4-LABEL: test_f64_fnmadd:
    330 ; FMA4:       # %bb.0:
    331 ; FMA4-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
    332 ; FMA4-NEXT:    retq
    333 ;
    334 ; AVX512-LABEL: test_f64_fnmadd:
    335 ; AVX512:       # %bb.0:
    336 ; AVX512-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
    337 ; AVX512-NEXT:    retq
    338   %x = fmul double %a0, %a1
    339   %res = fsub double %a2, %x
    340   ret double %res
    341 }
    342 
    343 define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    344 ; FMA-LABEL: test_2f64_fnmadd:
    345 ; FMA:       # %bb.0:
    346 ; FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
    347 ; FMA-NEXT:    retq
    348 ;
    349 ; FMA4-LABEL: test_2f64_fnmadd:
    350 ; FMA4:       # %bb.0:
    351 ; FMA4-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
    352 ; FMA4-NEXT:    retq
    353 ;
    354 ; AVX512-LABEL: test_2f64_fnmadd:
    355 ; AVX512:       # %bb.0:
    356 ; AVX512-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
    357 ; AVX512-NEXT:    retq
    358   %x = fmul <2 x double> %a0, %a1
    359   %res = fsub <2 x double> %a2, %x
    360   ret <2 x double> %res
    361 }
    362 
    363 define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    364 ; FMA-LABEL: test_4f64_fnmadd:
    365 ; FMA:       # %bb.0:
    366 ; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
    367 ; FMA-NEXT:    retq
    368 ;
    369 ; FMA4-LABEL: test_4f64_fnmadd:
    370 ; FMA4:       # %bb.0:
    371 ; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
    372 ; FMA4-NEXT:    retq
    373 ;
    374 ; AVX512-LABEL: test_4f64_fnmadd:
    375 ; AVX512:       # %bb.0:
    376 ; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
    377 ; AVX512-NEXT:    retq
    378   %x = fmul <4 x double> %a0, %a1
    379   %res = fsub <4 x double> %a2, %x
    380   ret <4 x double> %res
    381 }
    382 
    383 ;
    384 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
    385 ;
    386 
    387 define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
    388 ; FMA-LABEL: test_f32_fnmsub:
    389 ; FMA:       # %bb.0:
    390 ; FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
    391 ; FMA-NEXT:    retq
    392 ;
    393 ; FMA4-LABEL: test_f32_fnmsub:
    394 ; FMA4:       # %bb.0:
    395 ; FMA4-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
    396 ; FMA4-NEXT:    retq
    397 ;
    398 ; AVX512-LABEL: test_f32_fnmsub:
    399 ; AVX512:       # %bb.0:
    400 ; AVX512-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
    401 ; AVX512-NEXT:    retq
    402   %x = fmul float %a0, %a1
    403   %y = fsub float -0.000000e+00, %x
    404   %res = fsub float %y, %a2
    405   ret float %res
    406 }
    407 
    408 define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    409 ; FMA-LABEL: test_4f32_fnmsub:
    410 ; FMA:       # %bb.0:
    411 ; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    412 ; FMA-NEXT:    retq
    413 ;
    414 ; FMA4-LABEL: test_4f32_fnmsub:
    415 ; FMA4:       # %bb.0:
    416 ; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
    417 ; FMA4-NEXT:    retq
    418 ;
    419 ; AVX512-LABEL: test_4f32_fnmsub:
    420 ; AVX512:       # %bb.0:
    421 ; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
    422 ; AVX512-NEXT:    retq
    423   %x = fmul <4 x float> %a0, %a1
    424   %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
    425   %res = fsub <4 x float> %y, %a2
    426   ret <4 x float> %res
    427 }
    428 
    429 define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
    430 ; FMA-LABEL: test_8f32_fnmsub:
    431 ; FMA:       # %bb.0:
    432 ; FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
    433 ; FMA-NEXT:    retq
    434 ;
    435 ; FMA4-LABEL: test_8f32_fnmsub:
    436 ; FMA4:       # %bb.0:
    437 ; FMA4-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
    438 ; FMA4-NEXT:    retq
    439 ;
    440 ; AVX512-LABEL: test_8f32_fnmsub:
    441 ; AVX512:       # %bb.0:
    442 ; AVX512-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
    443 ; AVX512-NEXT:    retq
    444   %x = fmul <8 x float> %a0, %a1
    445   %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
    446   %res = fsub <8 x float> %y, %a2
    447   ret <8 x float> %res
    448 }
    449 
    450 define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
    451 ; FMA-LABEL: test_f64_fnmsub:
    452 ; FMA:       # %bb.0:
    453 ; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
    454 ; FMA-NEXT:    retq
    455 ;
    456 ; FMA4-LABEL: test_f64_fnmsub:
    457 ; FMA4:       # %bb.0:
    458 ; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
    459 ; FMA4-NEXT:    retq
    460 ;
    461 ; AVX512-LABEL: test_f64_fnmsub:
    462 ; AVX512:       # %bb.0:
    463 ; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
    464 ; AVX512-NEXT:    retq
    465   %x = fmul double %a0, %a1
    466   %y = fsub double -0.000000e+00, %x
    467   %res = fsub double %y, %a2
    468   ret double %res
    469 }
    470 
    471 define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    472 ; FMA-LABEL: test_2f64_fnmsub:
    473 ; FMA:       # %bb.0:
    474 ; FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
    475 ; FMA-NEXT:    retq
    476 ;
    477 ; FMA4-LABEL: test_2f64_fnmsub:
    478 ; FMA4:       # %bb.0:
    479 ; FMA4-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
    480 ; FMA4-NEXT:    retq
    481 ;
    482 ; AVX512-LABEL: test_2f64_fnmsub:
    483 ; AVX512:       # %bb.0:
    484 ; AVX512-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
    485 ; AVX512-NEXT:    retq
    486   %x = fmul <2 x double> %a0, %a1
    487   %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x
    488   %res = fsub <2 x double> %y, %a2
    489   ret <2 x double> %res
    490 }
    491 
    492 define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
    493 ; FMA-LABEL: test_4f64_fnmsub:
    494 ; FMA:       # %bb.0:
    495 ; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
    496 ; FMA-NEXT:    retq
    497 ;
    498 ; FMA4-LABEL: test_4f64_fnmsub:
    499 ; FMA4:       # %bb.0:
    500 ; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
    501 ; FMA4-NEXT:    retq
    502 ;
    503 ; AVX512-LABEL: test_4f64_fnmsub:
    504 ; AVX512:       # %bb.0:
    505 ; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
    506 ; AVX512-NEXT:    retq
    507   %x = fmul <4 x double> %a0, %a1
    508   %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
    509   %res = fsub <4 x double> %y, %a2
    510   ret <4 x double> %res
    511 }
    512 
    513 ;
    514 ; Load Folding Patterns
    515 ;
    516 
    517 define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
    518 ; FMA-LABEL: test_4f32_fmadd_load:
    519 ; FMA:       # %bb.0:
    520 ; FMA-NEXT:    vfmadd132ps (%rdi), %xmm1, %xmm0
    521 ; FMA-NEXT:    retq
    522 ;
    523 ; FMA4-LABEL: test_4f32_fmadd_load:
    524 ; FMA4:       # %bb.0:
    525 ; FMA4-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
    526 ; FMA4-NEXT:    retq
    527 ;
    528 ; AVX512-LABEL: test_4f32_fmadd_load:
    529 ; AVX512:       # %bb.0:
    530 ; AVX512-NEXT:    vfmadd132ps (%rdi), %xmm1, %xmm0
    531 ; AVX512-NEXT:    retq
    532   %x = load <4 x float>, <4 x float>* %a0
    533   %y = fmul <4 x float> %x, %a1
    534   %res = fadd <4 x float> %y, %a2
    535   ret <4 x float> %res
    536 }
    537 
    538 define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) {
    539 ; FMA-LABEL: test_2f64_fmsub_load:
    540 ; FMA:       # %bb.0:
    541 ; FMA-NEXT:    vfmsub132pd (%rdi), %xmm1, %xmm0
    542 ; FMA-NEXT:    retq
    543 ;
    544 ; FMA4-LABEL: test_2f64_fmsub_load:
    545 ; FMA4:       # %bb.0:
    546 ; FMA4-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
    547 ; FMA4-NEXT:    retq
    548 ;
    549 ; AVX512-LABEL: test_2f64_fmsub_load:
    550 ; AVX512:       # %bb.0:
    551 ; AVX512-NEXT:    vfmsub132pd (%rdi), %xmm1, %xmm0
    552 ; AVX512-NEXT:    retq
    553   %x = load <2 x double>, <2 x double>* %a0
    554   %y = fmul <2 x double> %x, %a1
    555   %res = fsub <2 x double> %y, %a2
    556   ret <2 x double> %res
    557 }
    558 
    559 ;
    560 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
    561 ;
    562 
    563 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
    564 ; FMA-INFS-LABEL: test_v4f32_mul_add_x_one_y:
    565 ; FMA-INFS:       # %bb.0:
    566 ; FMA-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    567 ; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    568 ; FMA-INFS-NEXT:    retq
    569 ;
    570 ; FMA4-INFS-LABEL: test_v4f32_mul_add_x_one_y:
    571 ; FMA4-INFS:       # %bb.0:
    572 ; FMA4-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    573 ; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    574 ; FMA4-INFS-NEXT:    retq
    575 ;
    576 ; AVX512-INFS-LABEL: test_v4f32_mul_add_x_one_y:
    577 ; AVX512-INFS:       # %bb.0:
    578 ; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
    579 ; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    580 ; AVX512-INFS-NEXT:    retq
    581 ;
    582 ; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
    583 ; FMA-NOINFS:       # %bb.0:
    584 ; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    585 ; FMA-NOINFS-NEXT:    retq
    586 ;
    587 ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
    588 ; FMA4-NOINFS:       # %bb.0:
    589 ; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    590 ; FMA4-NOINFS-NEXT:    retq
    591 ;
    592 ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
    593 ; AVX512-NOINFS:       # %bb.0:
    594 ; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    595 ; AVX512-NOINFS-NEXT:    retq
    596   %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    597   %m = fmul <4 x float> %a, %y
    598   ret <4 x float> %m
    599 }
    600 
    601 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
    602 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one:
    603 ; FMA-INFS:       # %bb.0:
    604 ; FMA-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    605 ; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    606 ; FMA-INFS-NEXT:    retq
    607 ;
    608 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one:
    609 ; FMA4-INFS:       # %bb.0:
    610 ; FMA4-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    611 ; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    612 ; FMA4-INFS-NEXT:    retq
    613 ;
    614 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one:
    615 ; AVX512-INFS:       # %bb.0:
    616 ; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
    617 ; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    618 ; AVX512-INFS-NEXT:    retq
    619 ;
    620 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
    621 ; FMA-NOINFS:       # %bb.0:
    622 ; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    623 ; FMA-NOINFS-NEXT:    retq
    624 ;
    625 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
    626 ; FMA4-NOINFS:       # %bb.0:
    627 ; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    628 ; FMA4-NOINFS-NEXT:    retq
    629 ;
    630 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
    631 ; AVX512-NOINFS:       # %bb.0:
    632 ; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    633 ; AVX512-NOINFS-NEXT:    retq
    634   %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    635   %m = fmul <4 x float> %y, %a
    636   ret <4 x float> %m
    637 }
    638 
    639 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
    640 ; FMA-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
    641 ; FMA-INFS:       # %bb.0:
    642 ; FMA-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    643 ; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    644 ; FMA-INFS-NEXT:    retq
    645 ;
    646 ; FMA4-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
    647 ; FMA4-INFS:       # %bb.0:
    648 ; FMA4-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    649 ; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    650 ; FMA4-INFS-NEXT:    retq
    651 ;
    652 ; AVX512-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
    653 ; AVX512-INFS:       # %bb.0:
    654 ; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
    655 ; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    656 ; AVX512-INFS-NEXT:    retq
    657 ;
    658 ; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
    659 ; FMA-NOINFS:       # %bb.0:
    660 ; FMA-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    661 ; FMA-NOINFS-NEXT:    retq
    662 ;
    663 ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
    664 ; FMA4-NOINFS:       # %bb.0:
    665 ; FMA4-NOINFS-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    666 ; FMA4-NOINFS-NEXT:    retq
    667 ;
    668 ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
    669 ; AVX512-NOINFS:       # %bb.0:
    670 ; AVX512-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    671 ; AVX512-NOINFS-NEXT:    retq
    672   %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    673   %m = fmul <4 x float> %a, %y
    674   ret <4 x float> %m
    675 }
    676 
    677 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
    678 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
    679 ; FMA-INFS:       # %bb.0:
    680 ; FMA-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    681 ; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    682 ; FMA-INFS-NEXT:    retq
    683 ;
    684 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
    685 ; FMA4-INFS:       # %bb.0:
    686 ; FMA4-INFS-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
    687 ; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    688 ; FMA4-INFS-NEXT:    retq
    689 ;
    690 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
    691 ; AVX512-INFS:       # %bb.0:
    692 ; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
    693 ; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    694 ; AVX512-INFS-NEXT:    retq
    695 ;
    696 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
    697 ; FMA-NOINFS:       # %bb.0:
    698 ; FMA-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    699 ; FMA-NOINFS-NEXT:    retq
    700 ;
    701 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
    702 ; FMA4-NOINFS:       # %bb.0:
    703 ; FMA4-NOINFS-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    704 ; FMA4-NOINFS-NEXT:    retq
    705 ;
    706 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
    707 ; AVX512-NOINFS:       # %bb.0:
    708 ; AVX512-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    709 ; AVX512-NOINFS-NEXT:    retq
    710   %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    711   %m = fmul <4 x float> %y, %a
    712   ret <4 x float> %m
    713 }
    714 
    715 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
    716 ; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
    717 ; FMA-INFS:       # %bb.0:
    718 ; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
    719 ; FMA-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    720 ; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    721 ; FMA-INFS-NEXT:    retq
    722 ;
    723 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
    724 ; FMA4-INFS:       # %bb.0:
    725 ; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
    726 ; FMA4-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    727 ; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    728 ; FMA4-INFS-NEXT:    retq
    729 ;
    730 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
    731 ; AVX512-INFS:       # %bb.0:
    732 ; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
    733 ; AVX512-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    734 ; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    735 ; AVX512-INFS-NEXT:    retq
    736 ;
    737 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
    738 ; FMA-NOINFS:       # %bb.0:
    739 ; FMA-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    740 ; FMA-NOINFS-NEXT:    retq
    741 ;
    742 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
    743 ; FMA4-NOINFS:       # %bb.0:
    744 ; FMA4-NOINFS-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
    745 ; FMA4-NOINFS-NEXT:    retq
    746 ;
    747 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
    748 ; AVX512-NOINFS:       # %bb.0:
    749 ; AVX512-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    750 ; AVX512-NOINFS-NEXT:    retq
    751   %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
    752   %m = fmul <4 x float> %s, %y
    753   ret <4 x float> %m
    754 }
    755 
    756 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
    757 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
    758 ; FMA-INFS:       # %bb.0:
    759 ; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
    760 ; FMA-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    761 ; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    762 ; FMA-INFS-NEXT:    retq
    763 ;
    764 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
    765 ; FMA4-INFS:       # %bb.0:
    766 ; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
    767 ; FMA4-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    768 ; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    769 ; FMA4-INFS-NEXT:    retq
    770 ;
    771 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
    772 ; AVX512-INFS:       # %bb.0:
    773 ; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1,1,1,1]
    774 ; AVX512-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    775 ; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    776 ; AVX512-INFS-NEXT:    retq
    777 ;
    778 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
    779 ; FMA-NOINFS:       # %bb.0:
    780 ; FMA-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    781 ; FMA-NOINFS-NEXT:    retq
    782 ;
    783 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
    784 ; FMA4-NOINFS:       # %bb.0:
    785 ; FMA4-NOINFS-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
    786 ; FMA4-NOINFS-NEXT:    retq
    787 ;
    788 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
    789 ; AVX512-NOINFS:       # %bb.0:
    790 ; AVX512-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
    791 ; AVX512-NOINFS-NEXT:    retq
    792   %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
    793   %m = fmul <4 x float> %y, %s
    794   ret <4 x float> %m
    795 }
    796 
    797 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
    798 ; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
    799 ; FMA-INFS:       # %bb.0:
    800 ; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
    801 ; FMA-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    802 ; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    803 ; FMA-INFS-NEXT:    retq
    804 ;
    805 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
    806 ; FMA4-INFS:       # %bb.0:
    807 ; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
    808 ; FMA4-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    809 ; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    810 ; FMA4-INFS-NEXT:    retq
    811 ;
    812 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
    813 ; AVX512-INFS:       # %bb.0:
    814 ; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1]
    815 ; AVX512-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    816 ; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    817 ; AVX512-INFS-NEXT:    retq
    818 ;
    819 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
    820 ; FMA-NOINFS:       # %bb.0:
    821 ; FMA-NOINFS-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    822 ; FMA-NOINFS-NEXT:    retq
    823 ;
    824 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
    825 ; FMA4-NOINFS:       # %bb.0:
    826 ; FMA4-NOINFS-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
    827 ; FMA4-NOINFS-NEXT:    retq
    828 ;
    829 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
    830 ; AVX512-NOINFS:       # %bb.0:
    831 ; AVX512-NOINFS-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    832 ; AVX512-NOINFS-NEXT:    retq
    833   %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
    834   %m = fmul <4 x float> %s, %y
    835   ret <4 x float> %m
    836 }
    837 
    838 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
    839 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
    840 ; FMA-INFS:       # %bb.0:
    841 ; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
    842 ; FMA-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    843 ; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    844 ; FMA-INFS-NEXT:    retq
    845 ;
    846 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
    847 ; FMA4-INFS:       # %bb.0:
    848 ; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm2 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
    849 ; FMA4-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    850 ; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    851 ; FMA4-INFS-NEXT:    retq
    852 ;
    853 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
    854 ; AVX512-INFS:       # %bb.0:
    855 ; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-1,-1,-1,-1]
    856 ; AVX512-INFS-NEXT:    vsubps %xmm0, %xmm2, %xmm0
    857 ; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    858 ; AVX512-INFS-NEXT:    retq
    859 ;
    860 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
    861 ; FMA-NOINFS:       # %bb.0:
    862 ; FMA-NOINFS-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    863 ; FMA-NOINFS-NEXT:    retq
    864 ;
    865 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
    866 ; FMA4-NOINFS:       # %bb.0:
    867 ; FMA4-NOINFS-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
    868 ; FMA4-NOINFS-NEXT:    retq
    869 ;
    870 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
    871 ; AVX512-NOINFS:       # %bb.0:
    872 ; AVX512-NOINFS-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
    873 ; AVX512-NOINFS-NEXT:    retq
    874   %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
    875   %m = fmul <4 x float> %y, %s
    876   ret <4 x float> %m
    877 }
    878 
    879 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
    880 ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
    881 ; FMA-INFS:       # %bb.0:
    882 ; FMA-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
    883 ; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    884 ; FMA-INFS-NEXT:    retq
    885 ;
    886 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
    887 ; FMA4-INFS:       # %bb.0:
    888 ; FMA4-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
    889 ; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    890 ; FMA4-INFS-NEXT:    retq
    891 ;
    892 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
    893 ; AVX512-INFS:       # %bb.0:
    894 ; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
    895 ; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    896 ; AVX512-INFS-NEXT:    retq
    897 ;
    898 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
    899 ; FMA-NOINFS:       # %bb.0:
    900 ; FMA-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    901 ; FMA-NOINFS-NEXT:    retq
    902 ;
    903 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
    904 ; FMA4-NOINFS:       # %bb.0:
    905 ; FMA4-NOINFS-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    906 ; FMA4-NOINFS-NEXT:    retq
    907 ;
    908 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
    909 ; AVX512-NOINFS:       # %bb.0:
    910 ; AVX512-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    911 ; AVX512-NOINFS-NEXT:    retq
    912   %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    913   %m = fmul <4 x float> %s, %y
    914   ret <4 x float> %m
    915 }
    916 
    917 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
    918 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
    919 ; FMA-INFS:       # %bb.0:
    920 ; FMA-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
    921 ; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    922 ; FMA-INFS-NEXT:    retq
    923 ;
    924 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
    925 ; FMA4-INFS:       # %bb.0:
    926 ; FMA4-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
    927 ; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    928 ; FMA4-INFS-NEXT:    retq
    929 ;
    930 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
    931 ; AVX512-INFS:       # %bb.0:
    932 ; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
    933 ; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    934 ; AVX512-INFS-NEXT:    retq
    935 ;
    936 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
    937 ; FMA-NOINFS:       # %bb.0:
    938 ; FMA-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    939 ; FMA-NOINFS-NEXT:    retq
    940 ;
    941 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
    942 ; FMA4-NOINFS:       # %bb.0:
    943 ; FMA4-NOINFS-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
    944 ; FMA4-NOINFS-NEXT:    retq
    945 ;
    946 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
    947 ; AVX512-NOINFS:       # %bb.0:
    948 ; AVX512-NOINFS-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
    949 ; AVX512-NOINFS-NEXT:    retq
    950   %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
    951   %m = fmul <4 x float> %y, %s
    952   ret <4 x float> %m
    953 }
    954 
    955 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
    956 ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
    957 ; FMA-INFS:       # %bb.0:
    958 ; FMA-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
    959 ; FMA-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    960 ; FMA-INFS-NEXT:    retq
    961 ;
    962 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
    963 ; FMA4-INFS:       # %bb.0:
    964 ; FMA4-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
    965 ; FMA4-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    966 ; FMA4-INFS-NEXT:    retq
    967 ;
    968 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
    969 ; AVX512-INFS:       # %bb.0:
    970 ; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
    971 ; AVX512-INFS-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    972 ; AVX512-INFS-NEXT:    retq
    973 ;
    974 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
    975 ; FMA-NOINFS:       # %bb.0:
    976 ; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    977 ; FMA-NOINFS-NEXT:    retq
    978 ;
    979 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
    980 ; FMA4-NOINFS:       # %bb.0:
    981 ; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
    982 ; FMA4-NOINFS-NEXT:    retq
    983 ;
    984 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
    985 ; AVX512-NOINFS:       # %bb.0:
    986 ; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
    987 ; AVX512-NOINFS-NEXT:    retq
    988   %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
    989   %m = fmul <4 x float> %s, %y
    990   ret <4 x float> %m
    991 }
    992 
    993 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
    994 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
    995 ; FMA-INFS:       # %bb.0:
    996 ; FMA-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
    997 ; FMA-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
    998 ; FMA-INFS-NEXT:    retq
    999 ;
   1000 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
   1001 ; FMA4-INFS:       # %bb.0:
   1002 ; FMA4-INFS-NEXT:    vsubps {{.*}}(%rip), %xmm0, %xmm0
   1003 ; FMA4-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
   1004 ; FMA4-INFS-NEXT:    retq
   1005 ;
   1006 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
   1007 ; AVX512-INFS:       # %bb.0:
   1008 ; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0
   1009 ; AVX512-INFS-NEXT:    vmulps %xmm0, %xmm1, %xmm0
   1010 ; AVX512-INFS-NEXT:    retq
   1011 ;
   1012 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
   1013 ; FMA-NOINFS:       # %bb.0:
   1014 ; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
   1015 ; FMA-NOINFS-NEXT:    retq
   1016 ;
   1017 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
   1018 ; FMA4-NOINFS:       # %bb.0:
   1019 ; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
   1020 ; FMA4-NOINFS-NEXT:    retq
   1021 ;
   1022 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
   1023 ; AVX512-NOINFS:       # %bb.0:
   1024 ; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
   1025 ; AVX512-NOINFS-NEXT:    retq
   1026   %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
   1027   %m = fmul <4 x float> %y, %s
   1028   ret <4 x float> %m
   1029 }
   1030 
   1031 ;
   1032 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
   1033 ;
   1034 
   1035 define float @test_f32_interp(float %x, float %y, float %t) {
   1036 ; FMA-INFS-LABEL: test_f32_interp:
   1037 ; FMA-INFS:       # %bb.0:
   1038 ; FMA-INFS-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
   1039 ; FMA-INFS-NEXT:    vsubss %xmm2, %xmm3, %xmm3
   1040 ; FMA-INFS-NEXT:    vmulss %xmm3, %xmm1, %xmm1
   1041 ; FMA-INFS-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
   1042 ; FMA-INFS-NEXT:    retq
   1043 ;
   1044 ; FMA4-INFS-LABEL: test_f32_interp:
   1045 ; FMA4-INFS:       # %bb.0:
   1046 ; FMA4-INFS-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
   1047 ; FMA4-INFS-NEXT:    vsubss %xmm2, %xmm3, %xmm3
   1048 ; FMA4-INFS-NEXT:    vmulss %xmm3, %xmm1, %xmm1
   1049 ; FMA4-INFS-NEXT:    vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
   1050 ; FMA4-INFS-NEXT:    retq
   1051 ;
   1052 ; AVX512-INFS-LABEL: test_f32_interp:
   1053 ; AVX512-INFS:       # %bb.0:
   1054 ; AVX512-INFS-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
   1055 ; AVX512-INFS-NEXT:    vsubss %xmm2, %xmm3, %xmm3
   1056 ; AVX512-INFS-NEXT:    vmulss %xmm3, %xmm1, %xmm1
   1057 ; AVX512-INFS-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
   1058 ; AVX512-INFS-NEXT:    retq
   1059 ;
   1060 ; FMA-NOINFS-LABEL: test_f32_interp:
   1061 ; FMA-NOINFS:       # %bb.0:
   1062 ; FMA-NOINFS-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
   1063 ; FMA-NOINFS-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
   1064 ; FMA-NOINFS-NEXT:    retq
   1065 ;
   1066 ; FMA4-NOINFS-LABEL: test_f32_interp:
   1067 ; FMA4-NOINFS:       # %bb.0:
   1068 ; FMA4-NOINFS-NEXT:    vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
   1069 ; FMA4-NOINFS-NEXT:    vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
   1070 ; FMA4-NOINFS-NEXT:    retq
   1071 ;
   1072 ; AVX512-NOINFS-LABEL: test_f32_interp:
   1073 ; AVX512-NOINFS:       # %bb.0:
   1074 ; AVX512-NOINFS-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
   1075 ; AVX512-NOINFS-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
   1076 ; AVX512-NOINFS-NEXT:    retq
   1077   %t1 = fsub float 1.0, %t
   1078   %tx = fmul float %x, %t
   1079   %ty = fmul float %y, %t1
   1080   %r = fadd float %tx, %ty
   1081   ret float %r
   1082 }
   1083 
   1084 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
   1085 ; FMA-INFS-LABEL: test_v4f32_interp:
   1086 ; FMA-INFS:       # %bb.0:
   1087 ; FMA-INFS-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
   1088 ; FMA-INFS-NEXT:    vsubps %xmm2, %xmm3, %xmm3
   1089 ; FMA-INFS-NEXT:    vmulps %xmm3, %xmm1, %xmm1
   1090 ; FMA-INFS-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
   1091 ; FMA-INFS-NEXT:    retq
   1092 ;
   1093 ; FMA4-INFS-LABEL: test_v4f32_interp:
   1094 ; FMA4-INFS:       # %bb.0:
   1095 ; FMA4-INFS-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
   1096 ; FMA4-INFS-NEXT:    vsubps %xmm2, %xmm3, %xmm3
   1097 ; FMA4-INFS-NEXT:    vmulps %xmm3, %xmm1, %xmm1
   1098 ; FMA4-INFS-NEXT:    vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
   1099 ; FMA4-INFS-NEXT:    retq
   1100 ;
   1101 ; AVX512-INFS-LABEL: test_v4f32_interp:
   1102 ; AVX512-INFS:       # %bb.0:
   1103 ; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
   1104 ; AVX512-INFS-NEXT:    vsubps %xmm2, %xmm3, %xmm3
   1105 ; AVX512-INFS-NEXT:    vmulps %xmm3, %xmm1, %xmm1
   1106 ; AVX512-INFS-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
   1107 ; AVX512-INFS-NEXT:    retq
   1108 ;
   1109 ; FMA-NOINFS-LABEL: test_v4f32_interp:
   1110 ; FMA-NOINFS:       # %bb.0:
   1111 ; FMA-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm2, %xmm1
   1112 ; FMA-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
   1113 ; FMA-NOINFS-NEXT:    retq
   1114 ;
   1115 ; FMA4-NOINFS-LABEL: test_v4f32_interp:
   1116 ; FMA4-NOINFS:       # %bb.0:
   1117 ; FMA4-NOINFS-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
   1118 ; FMA4-NOINFS-NEXT:    vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
   1119 ; FMA4-NOINFS-NEXT:    retq
   1120 ;
   1121 ; AVX512-NOINFS-LABEL: test_v4f32_interp:
   1122 ; AVX512-NOINFS:       # %bb.0:
   1123 ; AVX512-NOINFS-NEXT:    vfnmadd213ps %xmm1, %xmm2, %xmm1
   1124 ; AVX512-NOINFS-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
   1125 ; AVX512-NOINFS-NEXT:    retq
   1126   %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
   1127   %tx = fmul <4 x float> %x, %t
   1128   %ty = fmul <4 x float> %y, %t1
   1129   %r = fadd <4 x float> %tx, %ty
   1130   ret <4 x float> %r
   1131 }
   1132 
   1133 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
   1134 ; FMA-INFS-LABEL: test_v8f32_interp:
   1135 ; FMA-INFS:       # %bb.0:
   1136 ; FMA-INFS-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
   1137 ; FMA-INFS-NEXT:    vsubps %ymm2, %ymm3, %ymm3
   1138 ; FMA-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
   1139 ; FMA-INFS-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
   1140 ; FMA-INFS-NEXT:    retq
   1141 ;
   1142 ; FMA4-INFS-LABEL: test_v8f32_interp:
   1143 ; FMA4-INFS:       # %bb.0:
   1144 ; FMA4-INFS-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
   1145 ; FMA4-INFS-NEXT:    vsubps %ymm2, %ymm3, %ymm3
   1146 ; FMA4-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
   1147 ; FMA4-INFS-NEXT:    vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
   1148 ; FMA4-INFS-NEXT:    retq
   1149 ;
   1150 ; AVX512-INFS-LABEL: test_v8f32_interp:
   1151 ; AVX512-INFS:       # %bb.0:
   1152 ; AVX512-INFS-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
   1153 ; AVX512-INFS-NEXT:    vsubps %ymm2, %ymm3, %ymm3
   1154 ; AVX512-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
   1155 ; AVX512-INFS-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
   1156 ; AVX512-INFS-NEXT:    retq
   1157 ;
   1158 ; FMA-NOINFS-LABEL: test_v8f32_interp:
   1159 ; FMA-NOINFS:       # %bb.0:
   1160 ; FMA-NOINFS-NEXT:    vfnmadd213ps %ymm1, %ymm2, %ymm1
   1161 ; FMA-NOINFS-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
   1162 ; FMA-NOINFS-NEXT:    retq
   1163 ;
   1164 ; FMA4-NOINFS-LABEL: test_v8f32_interp:
   1165 ; FMA4-NOINFS:       # %bb.0:
   1166 ; FMA4-NOINFS-NEXT:    vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
   1167 ; FMA4-NOINFS-NEXT:    vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
   1168 ; FMA4-NOINFS-NEXT:    retq
   1169 ;
   1170 ; AVX512-NOINFS-LABEL: test_v8f32_interp:
   1171 ; AVX512-NOINFS:       # %bb.0:
   1172 ; AVX512-NOINFS-NEXT:    vfnmadd213ps %ymm1, %ymm2, %ymm1
   1173 ; AVX512-NOINFS-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
   1174 ; AVX512-NOINFS-NEXT:    retq
   1175   %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
   1176   %tx = fmul <8 x float> %x, %t
   1177   %ty = fmul <8 x float> %y, %t1
   1178   %r = fadd <8 x float> %tx, %ty
   1179   ret <8 x float> %r
   1180 }
   1181 
   1182 define double @test_f64_interp(double %x, double %y, double %t) {
   1183 ; FMA-INFS-LABEL: test_f64_interp:
   1184 ; FMA-INFS:       # %bb.0:
   1185 ; FMA-INFS-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
   1186 ; FMA-INFS-NEXT:    vsubsd %xmm2, %xmm3, %xmm3
   1187 ; FMA-INFS-NEXT:    vmulsd %xmm3, %xmm1, %xmm1
   1188 ; FMA-INFS-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
   1189 ; FMA-INFS-NEXT:    retq
   1190 ;
   1191 ; FMA4-INFS-LABEL: test_f64_interp:
   1192 ; FMA4-INFS:       # %bb.0:
   1193 ; FMA4-INFS-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
   1194 ; FMA4-INFS-NEXT:    vsubsd %xmm2, %xmm3, %xmm3
   1195 ; FMA4-INFS-NEXT:    vmulsd %xmm3, %xmm1, %xmm1
   1196 ; FMA4-INFS-NEXT:    vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
   1197 ; FMA4-INFS-NEXT:    retq
   1198 ;
   1199 ; AVX512-INFS-LABEL: test_f64_interp:
   1200 ; AVX512-INFS:       # %bb.0:
   1201 ; AVX512-INFS-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
   1202 ; AVX512-INFS-NEXT:    vsubsd %xmm2, %xmm3, %xmm3
   1203 ; AVX512-INFS-NEXT:    vmulsd %xmm3, %xmm1, %xmm1
   1204 ; AVX512-INFS-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
   1205 ; AVX512-INFS-NEXT:    retq
   1206 ;
   1207 ; FMA-NOINFS-LABEL: test_f64_interp:
   1208 ; FMA-NOINFS:       # %bb.0:
   1209 ; FMA-NOINFS-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
   1210 ; FMA-NOINFS-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
   1211 ; FMA-NOINFS-NEXT:    retq
   1212 ;
   1213 ; FMA4-NOINFS-LABEL: test_f64_interp:
   1214 ; FMA4-NOINFS:       # %bb.0:
   1215 ; FMA4-NOINFS-NEXT:    vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
   1216 ; FMA4-NOINFS-NEXT:    vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
   1217 ; FMA4-NOINFS-NEXT:    retq
   1218 ;
   1219 ; AVX512-NOINFS-LABEL: test_f64_interp:
   1220 ; AVX512-NOINFS:       # %bb.0:
   1221 ; AVX512-NOINFS-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
   1222 ; AVX512-NOINFS-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
   1223 ; AVX512-NOINFS-NEXT:    retq
   1224   %t1 = fsub double 1.0, %t
   1225   %tx = fmul double %x, %t
   1226   %ty = fmul double %y, %t1
   1227   %r = fadd double %tx, %ty
   1228   ret double %r
   1229 }
   1230 
   1231 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
   1232 ; FMA-INFS-LABEL: test_v2f64_interp:
   1233 ; FMA-INFS:       # %bb.0:
   1234 ; FMA-INFS-NEXT:    vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
   1235 ; FMA-INFS-NEXT:    vsubpd %xmm2, %xmm3, %xmm3
   1236 ; FMA-INFS-NEXT:    vmulpd %xmm3, %xmm1, %xmm1
   1237 ; FMA-INFS-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
   1238 ; FMA-INFS-NEXT:    retq
   1239 ;
   1240 ; FMA4-INFS-LABEL: test_v2f64_interp:
   1241 ; FMA4-INFS:       # %bb.0:
   1242 ; FMA4-INFS-NEXT:    vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
   1243 ; FMA4-INFS-NEXT:    vsubpd %xmm2, %xmm3, %xmm3
   1244 ; FMA4-INFS-NEXT:    vmulpd %xmm3, %xmm1, %xmm1
   1245 ; FMA4-INFS-NEXT:    vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
   1246 ; FMA4-INFS-NEXT:    retq
   1247 ;
   1248 ; AVX512-INFS-LABEL: test_v2f64_interp:
   1249 ; AVX512-INFS:       # %bb.0:
   1250 ; AVX512-INFS-NEXT:    vmovapd {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00]
   1251 ; AVX512-INFS-NEXT:    vsubpd %xmm2, %xmm3, %xmm3
   1252 ; AVX512-INFS-NEXT:    vmulpd %xmm3, %xmm1, %xmm1
   1253 ; AVX512-INFS-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
   1254 ; AVX512-INFS-NEXT:    retq
   1255 ;
   1256 ; FMA-NOINFS-LABEL: test_v2f64_interp:
   1257 ; FMA-NOINFS:       # %bb.0:
   1258 ; FMA-NOINFS-NEXT:    vfnmadd213pd %xmm1, %xmm2, %xmm1
   1259 ; FMA-NOINFS-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
   1260 ; FMA-NOINFS-NEXT:    retq
   1261 ;
   1262 ; FMA4-NOINFS-LABEL: test_v2f64_interp:
   1263 ; FMA4-NOINFS:       # %bb.0:
   1264 ; FMA4-NOINFS-NEXT:    vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
   1265 ; FMA4-NOINFS-NEXT:    vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
   1266 ; FMA4-NOINFS-NEXT:    retq
   1267 ;
   1268 ; AVX512-NOINFS-LABEL: test_v2f64_interp:
   1269 ; AVX512-NOINFS:       # %bb.0:
   1270 ; AVX512-NOINFS-NEXT:    vfnmadd213pd %xmm1, %xmm2, %xmm1
   1271 ; AVX512-NOINFS-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
   1272 ; AVX512-NOINFS-NEXT:    retq
   1273   %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
   1274   %tx = fmul <2 x double> %x, %t
   1275   %ty = fmul <2 x double> %y, %t1
   1276   %r = fadd <2 x double> %tx, %ty
   1277   ret <2 x double> %r
   1278 }
   1279 
   1280 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
   1281 ; FMA-INFS-LABEL: test_v4f64_interp:
   1282 ; FMA-INFS:       # %bb.0:
   1283 ; FMA-INFS-NEXT:    vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
   1284 ; FMA-INFS-NEXT:    vsubpd %ymm2, %ymm3, %ymm3
   1285 ; FMA-INFS-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
   1286 ; FMA-INFS-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
   1287 ; FMA-INFS-NEXT:    retq
   1288 ;
   1289 ; FMA4-INFS-LABEL: test_v4f64_interp:
   1290 ; FMA4-INFS:       # %bb.0:
   1291 ; FMA4-INFS-NEXT:    vmovapd {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
   1292 ; FMA4-INFS-NEXT:    vsubpd %ymm2, %ymm3, %ymm3
   1293 ; FMA4-INFS-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
   1294 ; FMA4-INFS-NEXT:    vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
   1295 ; FMA4-INFS-NEXT:    retq
   1296 ;
   1297 ; AVX512-INFS-LABEL: test_v4f64_interp:
   1298 ; AVX512-INFS:       # %bb.0:
   1299 ; AVX512-INFS-NEXT:    vbroadcastsd {{.*#+}} ymm3 = [1,1,1,1]
   1300 ; AVX512-INFS-NEXT:    vsubpd %ymm2, %ymm3, %ymm3
   1301 ; AVX512-INFS-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
   1302 ; AVX512-INFS-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
   1303 ; AVX512-INFS-NEXT:    retq
   1304 ;
   1305 ; FMA-NOINFS-LABEL: test_v4f64_interp:
   1306 ; FMA-NOINFS:       # %bb.0:
   1307 ; FMA-NOINFS-NEXT:    vfnmadd213pd %ymm1, %ymm2, %ymm1
   1308 ; FMA-NOINFS-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
   1309 ; FMA-NOINFS-NEXT:    retq
   1310 ;
   1311 ; FMA4-NOINFS-LABEL: test_v4f64_interp:
   1312 ; FMA4-NOINFS:       # %bb.0:
   1313 ; FMA4-NOINFS-NEXT:    vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
   1314 ; FMA4-NOINFS-NEXT:    vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
   1315 ; FMA4-NOINFS-NEXT:    retq
   1316 ;
   1317 ; AVX512-NOINFS-LABEL: test_v4f64_interp:
   1318 ; AVX512-NOINFS:       # %bb.0:
   1319 ; AVX512-NOINFS-NEXT:    vfnmadd213pd %ymm1, %ymm2, %ymm1
   1320 ; AVX512-NOINFS-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
   1321 ; AVX512-NOINFS-NEXT:    retq
   1322   %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
   1323   %tx = fmul <4 x double> %x, %t
   1324   %ty = fmul <4 x double> %y, %t1
   1325   %r = fadd <4 x double> %tx, %ty
   1326   ret <4 x double> %r
   1327 }
   1328 
   1329 ;
   1330 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
   1331 ;
   1332 
   1333 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
   1334 ; FMA-LABEL: test_v4f32_fneg_fmadd:
   1335 ; FMA:       # %bb.0:
   1336 ; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
   1337 ; FMA-NEXT:    retq
   1338 ;
   1339 ; FMA4-LABEL: test_v4f32_fneg_fmadd:
   1340 ; FMA4:       # %bb.0:
   1341 ; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
   1342 ; FMA4-NEXT:    retq
   1343 ;
   1344 ; AVX512-LABEL: test_v4f32_fneg_fmadd:
   1345 ; AVX512:       # %bb.0:
   1346 ; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
   1347 ; AVX512-NEXT:    retq
   1348   %mul = fmul <4 x float> %a0, %a1
   1349   %add = fadd <4 x float> %mul, %a2
   1350   %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
   1351   ret <4 x float> %neg
   1352 }
   1353 
   1354 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
   1355 ; FMA-LABEL: test_v4f64_fneg_fmsub:
   1356 ; FMA:       # %bb.0:
   1357 ; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
   1358 ; FMA-NEXT:    retq
   1359 ;
   1360 ; FMA4-LABEL: test_v4f64_fneg_fmsub:
   1361 ; FMA4:       # %bb.0:
   1362 ; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
   1363 ; FMA4-NEXT:    retq
   1364 ;
   1365 ; AVX512-LABEL: test_v4f64_fneg_fmsub:
   1366 ; AVX512:       # %bb.0:
   1367 ; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
   1368 ; AVX512-NEXT:    retq
   1369   %mul = fmul <4 x double> %a0, %a1
   1370   %sub = fsub <4 x double> %mul, %a2
   1371   %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   1372   ret <4 x double> %neg
   1373 }
   1374 
   1375 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
   1376 ; FMA-LABEL: test_v4f32_fneg_fnmadd:
   1377 ; FMA:       # %bb.0:
   1378 ; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
   1379 ; FMA-NEXT:    retq
   1380 ;
   1381 ; FMA4-LABEL: test_v4f32_fneg_fnmadd:
   1382 ; FMA4:       # %bb.0:
   1383 ; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
   1384 ; FMA4-NEXT:    retq
   1385 ;
   1386 ; AVX512-LABEL: test_v4f32_fneg_fnmadd:
   1387 ; AVX512:       # %bb.0:
   1388 ; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
   1389 ; AVX512-NEXT:    retq
   1390   %mul = fmul <4 x float> %a0, %a1
   1391   %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
   1392   %add = fadd <4 x float> %neg0, %a2
   1393   %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
   1394   ret <4 x float> %neg1
   1395 }
   1396 
   1397 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
   1398 ; FMA-LABEL: test_v4f64_fneg_fnmsub:
   1399 ; FMA:       # %bb.0:
   1400 ; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
   1401 ; FMA-NEXT:    retq
   1402 ;
   1403 ; FMA4-LABEL: test_v4f64_fneg_fnmsub:
   1404 ; FMA4:       # %bb.0:
   1405 ; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
   1406 ; FMA4-NEXT:    retq
   1407 ;
   1408 ; AVX512-LABEL: test_v4f64_fneg_fnmsub:
   1409 ; AVX512:       # %bb.0:
   1410 ; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
   1411 ; AVX512-NEXT:    retq
   1412   %mul = fmul <4 x double> %a0, %a1
   1413   %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
   1414   %sub = fsub <4 x double> %neg0, %a2
   1415   %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
   1416   ret <4 x double> %neg1
   1417 }
   1418 
   1419 ;
   1420 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
   1421 ;
   1422 
   1423 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
   1424 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
   1425 ; FMA:       # %bb.0:
   1426 ; FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
   1427 ; FMA-NEXT:    retq
   1428 ;
   1429 ; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
   1430 ; FMA4:       # %bb.0:
   1431 ; FMA4-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
   1432 ; FMA4-NEXT:    retq
   1433 ;
   1434 ; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
   1435 ; AVX512:       # %bb.0:
   1436 ; AVX512-NEXT:    vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0
   1437 ; AVX512-NEXT:    retq
   1438   %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
   1439   %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
   1440   %a  = fadd <4 x float> %m0, %m1
   1441   ret <4 x float> %a
   1442 }
   1443 
   1444 ;
   1445 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
   1446 ;
   1447 
   1448 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
   1449 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
   1450 ; FMA:       # %bb.0:
   1451 ; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
   1452 ; FMA-NEXT:    retq
   1453 ;
   1454 ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
   1455 ; FMA4:       # %bb.0:
   1456 ; FMA4-NEXT:    vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
   1457 ; FMA4-NEXT:    retq
   1458 ;
   1459 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
   1460 ; AVX512:       # %bb.0:
   1461 ; AVX512-NEXT:    vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
   1462 ; AVX512-NEXT:    retq
   1463   %m0 = fmul <4 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0>
   1464   %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
   1465   %a  = fadd <4 x float> %m1, %y
   1466   ret <4 x float> %a
   1467 }
   1468 
   1469 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
   1470 
   1471 define double @test_f64_fneg_fmul(double %x, double %y) #0 {
   1472 ; FMA-LABEL: test_f64_fneg_fmul:
   1473 ; FMA:       # %bb.0:
   1474 ; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1475 ; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
   1476 ; FMA-NEXT:    retq
   1477 ;
   1478 ; FMA4-LABEL: test_f64_fneg_fmul:
   1479 ; FMA4:       # %bb.0:
   1480 ; FMA4-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1481 ; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
   1482 ; FMA4-NEXT:    retq
   1483 ;
   1484 ; AVX512-LABEL: test_f64_fneg_fmul:
   1485 ; AVX512:       # %bb.0:
   1486 ; AVX512-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1487 ; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
   1488 ; AVX512-NEXT:    retq
   1489   %m = fmul nsz double %x, %y
   1490   %n = fsub double -0.0, %m
   1491   ret double %n
   1492 }
   1493 
   1494 define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
   1495 ; FMA-LABEL: test_v4f32_fneg_fmul:
   1496 ; FMA:       # %bb.0:
   1497 ; FMA-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1498 ; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
   1499 ; FMA-NEXT:    retq
   1500 ;
   1501 ; FMA4-LABEL: test_v4f32_fneg_fmul:
   1502 ; FMA4:       # %bb.0:
   1503 ; FMA4-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1504 ; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
   1505 ; FMA4-NEXT:    retq
   1506 ;
   1507 ; AVX512-LABEL: test_v4f32_fneg_fmul:
   1508 ; AVX512:       # %bb.0:
   1509 ; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1510 ; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
   1511 ; AVX512-NEXT:    retq
   1512   %m = fmul nsz <4 x float> %x, %y
   1513   %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
   1514   ret <4 x float> %n
   1515 }
   1516 
   1517 define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
   1518 ; FMA-LABEL: test_v4f64_fneg_fmul:
   1519 ; FMA:       # %bb.0:
   1520 ; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1521 ; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
   1522 ; FMA-NEXT:    retq
   1523 ;
   1524 ; FMA4-LABEL: test_v4f64_fneg_fmul:
   1525 ; FMA4:       # %bb.0:
   1526 ; FMA4-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1527 ; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
   1528 ; FMA4-NEXT:    retq
   1529 ;
   1530 ; AVX512-LABEL: test_v4f64_fneg_fmul:
   1531 ; AVX512:       # %bb.0:
   1532 ; AVX512-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1533 ; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
   1534 ; AVX512-NEXT:    retq
   1535   %m = fmul nsz <4 x double> %x, %y
   1536   %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
   1537   ret <4 x double> %n
   1538 }
   1539 
   1540 define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
   1541 ; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz:
   1542 ; FMA:       # %bb.0:
   1543 ; FMA-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
   1544 ; FMA-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
   1545 ; FMA-NEXT:    retq
   1546 ;
   1547 ; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz:
   1548 ; FMA4:       # %bb.0:
   1549 ; FMA4-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
   1550 ; FMA4-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
   1551 ; FMA4-NEXT:    retq
   1552 ;
   1553 ; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz:
   1554 ; AVX512:       # %bb.0:
   1555 ; AVX512-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
   1556 ; AVX512-NEXT:    vxorpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
   1557 ; AVX512-NEXT:    retq
   1558   %m = fmul <4 x double> %x, %y
   1559   %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
   1560   ret <4 x double> %n
   1561 }
   1562 
   1563 attributes #0 = { "unsafe-fp-math"="true" }
   1564