Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512
      6 
      7 ;
      8 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
      9 ;
     10 
     11 define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     12 ; FMA-LABEL: test_16f32_fmadd:
     13 ; FMA:       # BB#0:
     14 ; FMA-NEXT:    vfmadd213ps %ymm4, %ymm2, %ymm0
     15 ; FMA-NEXT:    vfmadd213ps %ymm5, %ymm3, %ymm1
     16 ; FMA-NEXT:    retq
     17 ;
     18 ; FMA4-LABEL: test_16f32_fmadd:
     19 ; FMA4:       # BB#0:
     20 ; FMA4-NEXT:    vfmaddps %ymm4, %ymm2, %ymm0, %ymm0
     21 ; FMA4-NEXT:    vfmaddps %ymm5, %ymm3, %ymm1, %ymm1
     22 ; FMA4-NEXT:    retq
     23 ;
     24 ; AVX512-LABEL: test_16f32_fmadd:
     25 ; AVX512:       # BB#0:
     26 ; AVX512-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
     27 ; AVX512-NEXT:    retq
     28   %x = fmul <16 x float> %a0, %a1
     29   %res = fadd <16 x float> %x, %a2
     30   ret <16 x float> %res
     31 }
     32 
     33 define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     34 ; FMA-LABEL: test_8f64_fmadd:
     35 ; FMA:       # BB#0:
     36 ; FMA-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0
     37 ; FMA-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1
     38 ; FMA-NEXT:    retq
     39 ;
     40 ; FMA4-LABEL: test_8f64_fmadd:
     41 ; FMA4:       # BB#0:
     42 ; FMA4-NEXT:    vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
     43 ; FMA4-NEXT:    vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
     44 ; FMA4-NEXT:    retq
     45 ;
     46 ; AVX512-LABEL: test_8f64_fmadd:
     47 ; AVX512:       # BB#0:
     48 ; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
     49 ; AVX512-NEXT:    retq
     50   %x = fmul <8 x double> %a0, %a1
     51   %res = fadd <8 x double> %x, %a2
     52   ret <8 x double> %res
     53 }
     54 
     55 ;
     56 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
     57 ;
     58 
     59 define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     60 ; FMA-LABEL: test_16f32_fmsub:
     61 ; FMA:       # BB#0:
     62 ; FMA-NEXT:    vfmsub213ps %ymm4, %ymm2, %ymm0
     63 ; FMA-NEXT:    vfmsub213ps %ymm5, %ymm3, %ymm1
     64 ; FMA-NEXT:    retq
     65 ;
     66 ; FMA4-LABEL: test_16f32_fmsub:
     67 ; FMA4:       # BB#0:
     68 ; FMA4-NEXT:    vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
     69 ; FMA4-NEXT:    vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
     70 ; FMA4-NEXT:    retq
     71 ;
     72 ; AVX512-LABEL: test_16f32_fmsub:
     73 ; AVX512:       # BB#0:
     74 ; AVX512-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
     75 ; AVX512-NEXT:    retq
     76   %x = fmul <16 x float> %a0, %a1
     77   %res = fsub <16 x float> %x, %a2
     78   ret <16 x float> %res
     79 }
     80 
     81 define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     82 ; FMA-LABEL: test_8f64_fmsub:
     83 ; FMA:       # BB#0:
     84 ; FMA-NEXT:    vfmsub213pd %ymm4, %ymm2, %ymm0
     85 ; FMA-NEXT:    vfmsub213pd %ymm5, %ymm3, %ymm1
     86 ; FMA-NEXT:    retq
     87 ;
     88 ; FMA4-LABEL: test_8f64_fmsub:
     89 ; FMA4:       # BB#0:
     90 ; FMA4-NEXT:    vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0
     91 ; FMA4-NEXT:    vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1
     92 ; FMA4-NEXT:    retq
     93 ;
     94 ; AVX512-LABEL: test_8f64_fmsub:
     95 ; AVX512:       # BB#0:
     96 ; AVX512-NEXT:    vfmsub213pd %zmm2, %zmm1, %zmm0
     97 ; AVX512-NEXT:    retq
     98   %x = fmul <8 x double> %a0, %a1
     99   %res = fsub <8 x double> %x, %a2
    100   ret <8 x double> %res
    101 }
    102 
    103 ;
    104 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
    105 ;
    106 
    107 define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    108 ; FMA-LABEL: test_16f32_fnmadd:
    109 ; FMA:       # BB#0:
    110 ; FMA-NEXT:    vfnmadd213ps %ymm4, %ymm2, %ymm0
    111 ; FMA-NEXT:    vfnmadd213ps %ymm5, %ymm3, %ymm1
    112 ; FMA-NEXT:    retq
    113 ;
    114 ; FMA4-LABEL: test_16f32_fnmadd:
    115 ; FMA4:       # BB#0:
    116 ; FMA4-NEXT:    vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0
    117 ; FMA4-NEXT:    vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1
    118 ; FMA4-NEXT:    retq
    119 ;
    120 ; AVX512-LABEL: test_16f32_fnmadd:
    121 ; AVX512:       # BB#0:
    122 ; AVX512-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
    123 ; AVX512-NEXT:    retq
    124   %x = fmul <16 x float> %a0, %a1
    125   %res = fsub <16 x float> %a2, %x
    126   ret <16 x float> %res
    127 }
    128 
    129 define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    130 ; FMA-LABEL: test_8f64_fnmadd:
    131 ; FMA:       # BB#0:
    132 ; FMA-NEXT:    vfnmadd213pd %ymm4, %ymm2, %ymm0
    133 ; FMA-NEXT:    vfnmadd213pd %ymm5, %ymm3, %ymm1
    134 ; FMA-NEXT:    retq
    135 ;
    136 ; FMA4-LABEL: test_8f64_fnmadd:
    137 ; FMA4:       # BB#0:
    138 ; FMA4-NEXT:    vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
    139 ; FMA4-NEXT:    vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
    140 ; FMA4-NEXT:    retq
    141 ;
    142 ; AVX512-LABEL: test_8f64_fnmadd:
    143 ; AVX512:       # BB#0:
    144 ; AVX512-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
    145 ; AVX512-NEXT:    retq
    146   %x = fmul <8 x double> %a0, %a1
    147   %res = fsub <8 x double> %a2, %x
    148   ret <8 x double> %res
    149 }
    150 
    151 ;
    152 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
    153 ;
    154 
    155 define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    156 ; FMA-LABEL: test_16f32_fnmsub:
    157 ; FMA:       # BB#0:
    158 ; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
    159 ; FMA-NEXT:    vfnmsub213ps %ymm5, %ymm3, %ymm1
    160 ; FMA-NEXT:    retq
    161 ;
    162 ; FMA4-LABEL: test_16f32_fnmsub:
    163 ; FMA4:       # BB#0:
    164 ; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
    165 ; FMA4-NEXT:    vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
    166 ; FMA4-NEXT:    retq
    167 ;
    168 ; AVX512-LABEL: test_16f32_fnmsub:
    169 ; AVX512:       # BB#0:
    170 ; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
    171 ; AVX512-NEXT:    retq
    172   %x = fmul <16 x float> %a0, %a1
    173   %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
    174   %res = fsub <16 x float> %y, %a2
    175   ret <16 x float> %res
    176 }
    177 
    178 define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    179 ; FMA-LABEL: test_8f64_fnmsub:
    180 ; FMA:       # BB#0:
    181 ; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm2, %ymm0
    182 ; FMA-NEXT:    vfnmsub213pd %ymm5, %ymm3, %ymm1
    183 ; FMA-NEXT:    retq
    184 ;
    185 ; FMA4-LABEL: test_8f64_fnmsub:
    186 ; FMA4:       # BB#0:
    187 ; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
    188 ; FMA4-NEXT:    vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1
    189 ; FMA4-NEXT:    retq
    190 ;
    191 ; AVX512-LABEL: test_8f64_fnmsub:
    192 ; AVX512:       # BB#0:
    193 ; AVX512-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
    194 ; AVX512-NEXT:    retq
    195   %x = fmul <8 x double> %a0, %a1
    196   %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
    197   %res = fsub <8 x double> %y, %a2
    198   ret <8 x double> %res
    199 }
    200 
    201 ;
    202 ; Load Folding Patterns
    203 ;
    204 
    205 define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, <16 x float> %a2) {
    206 ; FMA-LABEL: test_16f32_fmadd_load:
    207 ; FMA:       # BB#0:
    208 ; FMA-NEXT:    vfmadd132ps (%rdi), %ymm2, %ymm0
    209 ; FMA-NEXT:    vfmadd132ps 32(%rdi), %ymm3, %ymm1
    210 ; FMA-NEXT:    retq
    211 ;
    212 ; FMA4-LABEL: test_16f32_fmadd_load:
    213 ; FMA4:       # BB#0:
    214 ; FMA4-NEXT:    vfmaddps %ymm2, (%rdi), %ymm0, %ymm0
    215 ; FMA4-NEXT:    vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1
    216 ; FMA4-NEXT:    retq
    217 ;
    218 ; AVX512-LABEL: test_16f32_fmadd_load:
    219 ; AVX512:       # BB#0:
    220 ; AVX512-NEXT:    vmovaps (%rdi), %zmm2
    221 ; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm0, %zmm2
    222 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    223 ; AVX512-NEXT:    retq
    224   %x = load <16 x float>, <16 x float>* %a0
    225   %y = fmul <16 x float> %x, %a1
    226   %res = fadd <16 x float> %y, %a2
    227   ret <16 x float> %res
    228 }
    229 
    230 define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <8 x double> %a2) {
    231 ; FMA-LABEL: test_8f64_fmsub_load:
    232 ; FMA:       # BB#0:
    233 ; FMA-NEXT:    vfmsub132pd (%rdi), %ymm2, %ymm0
    234 ; FMA-NEXT:    vfmsub132pd 32(%rdi), %ymm3, %ymm1
    235 ; FMA-NEXT:    retq
    236 ;
    237 ; FMA4-LABEL: test_8f64_fmsub_load:
    238 ; FMA4:       # BB#0:
    239 ; FMA4-NEXT:    vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0
    240 ; FMA4-NEXT:    vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1
    241 ; FMA4-NEXT:    retq
    242 ;
    243 ; AVX512-LABEL: test_8f64_fmsub_load:
    244 ; AVX512:       # BB#0:
    245 ; AVX512-NEXT:    vmovapd (%rdi), %zmm2
    246 ; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm0, %zmm2
    247 ; AVX512-NEXT:    vmovaps %zmm2, %zmm0
    248 ; AVX512-NEXT:    retq
    249   %x = load <8 x double>, <8 x double>* %a0
    250   %y = fmul <8 x double> %x, %a1
    251   %res = fsub <8 x double> %y, %a2
    252   ret <8 x double> %res
    253 }
    254 
    255 ;
    256 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
    257 ;
    258 
    259 define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
    260 ; FMA-LABEL: test_v16f32_mul_add_x_one_y:
    261 ; FMA:       # BB#0:
    262 ; FMA-NEXT:    vfmadd213ps %ymm2, %ymm2, %ymm0
    263 ; FMA-NEXT:    vfmadd213ps %ymm3, %ymm3, %ymm1
    264 ; FMA-NEXT:    retq
    265 ;
    266 ; FMA4-LABEL: test_v16f32_mul_add_x_one_y:
    267 ; FMA4:       # BB#0:
    268 ; FMA4-NEXT:    vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
    269 ; FMA4-NEXT:    vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
    270 ; FMA4-NEXT:    retq
    271 ;
    272 ; AVX512-LABEL: test_v16f32_mul_add_x_one_y:
    273 ; AVX512:       # BB#0:
    274 ; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm1, %zmm0
    275 ; AVX512-NEXT:    retq
    276   %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
    277   %m = fmul <16 x float> %a, %y
    278   ret <16 x float> %m
    279 }
    280 
    281 define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
    282 ; FMA-LABEL: test_v8f64_mul_y_add_x_one:
    283 ; FMA:       # BB#0:
    284 ; FMA-NEXT:    vfmadd213pd %ymm2, %ymm2, %ymm0
    285 ; FMA-NEXT:    vfmadd213pd %ymm3, %ymm3, %ymm1
    286 ; FMA-NEXT:    retq
    287 ;
    288 ; FMA4-LABEL: test_v8f64_mul_y_add_x_one:
    289 ; FMA4:       # BB#0:
    290 ; FMA4-NEXT:    vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
    291 ; FMA4-NEXT:    vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
    292 ; FMA4-NEXT:    retq
    293 ;
    294 ; AVX512-LABEL: test_v8f64_mul_y_add_x_one:
    295 ; AVX512:       # BB#0:
    296 ; AVX512-NEXT:    vfmadd213pd %zmm1, %zmm1, %zmm0
    297 ; AVX512-NEXT:    retq
    298   %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
    299   %m = fmul <8 x double> %y, %a
    300   ret <8 x double> %m
    301 }
    302 
    303 define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
    304 ; FMA-LABEL: test_v16f32_mul_add_x_negone_y:
    305 ; FMA:       # BB#0:
    306 ; FMA-NEXT:    vfmsub213ps %ymm2, %ymm2, %ymm0
    307 ; FMA-NEXT:    vfmsub213ps %ymm3, %ymm3, %ymm1
    308 ; FMA-NEXT:    retq
    309 ;
    310 ; FMA4-LABEL: test_v16f32_mul_add_x_negone_y:
    311 ; FMA4:       # BB#0:
    312 ; FMA4-NEXT:    vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
    313 ; FMA4-NEXT:    vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
    314 ; FMA4-NEXT:    retq
    315 ;
    316 ; AVX512-LABEL: test_v16f32_mul_add_x_negone_y:
    317 ; AVX512:       # BB#0:
    318 ; AVX512-NEXT:    vfmsub213ps %zmm1, %zmm1, %zmm0
    319 ; AVX512-NEXT:    retq
    320   %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
    321   %m = fmul <16 x float> %a, %y
    322   ret <16 x float> %m
    323 }
    324 
    325 define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
    326 ; FMA-LABEL: test_v8f64_mul_y_add_x_negone:
    327 ; FMA:       # BB#0:
    328 ; FMA-NEXT:    vfmsub213pd %ymm2, %ymm2, %ymm0
    329 ; FMA-NEXT:    vfmsub213pd %ymm3, %ymm3, %ymm1
    330 ; FMA-NEXT:    retq
    331 ;
    332 ; FMA4-LABEL: test_v8f64_mul_y_add_x_negone:
    333 ; FMA4:       # BB#0:
    334 ; FMA4-NEXT:    vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
    335 ; FMA4-NEXT:    vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
    336 ; FMA4-NEXT:    retq
    337 ;
    338 ; AVX512-LABEL: test_v8f64_mul_y_add_x_negone:
    339 ; AVX512:       # BB#0:
    340 ; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm1, %zmm0
    341 ; AVX512-NEXT:    retq
    342   %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
    343   %m = fmul <8 x double> %y, %a
    344   ret <8 x double> %m
    345 }
    346 
    347 define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
    348 ; FMA-LABEL: test_v16f32_mul_sub_one_x_y:
    349 ; FMA:       # BB#0:
    350 ; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm2, %ymm0
    351 ; FMA-NEXT:    vfnmadd213ps %ymm3, %ymm3, %ymm1
    352 ; FMA-NEXT:    retq
    353 ;
    354 ; FMA4-LABEL: test_v16f32_mul_sub_one_x_y:
    355 ; FMA4:       # BB#0:
    356 ; FMA4-NEXT:    vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
    357 ; FMA4-NEXT:    vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
    358 ; FMA4-NEXT:    retq
    359 ;
    360 ; AVX512-LABEL: test_v16f32_mul_sub_one_x_y:
    361 ; AVX512:       # BB#0:
    362 ; AVX512-NEXT:    vfnmadd213ps %zmm1, %zmm1, %zmm0
    363 ; AVX512-NEXT:    retq
    364   %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
    365   %m = fmul <16 x float> %s, %y
    366   ret <16 x float> %m
    367 }
    368 
    369 define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
    370 ; FMA-LABEL: test_v8f64_mul_y_sub_one_x:
    371 ; FMA:       # BB#0:
    372 ; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm2, %ymm0
    373 ; FMA-NEXT:    vfnmadd213pd %ymm3, %ymm3, %ymm1
    374 ; FMA-NEXT:    retq
    375 ;
    376 ; FMA4-LABEL: test_v8f64_mul_y_sub_one_x:
    377 ; FMA4:       # BB#0:
    378 ; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
    379 ; FMA4-NEXT:    vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
    380 ; FMA4-NEXT:    retq
    381 ;
    382 ; AVX512-LABEL: test_v8f64_mul_y_sub_one_x:
    383 ; AVX512:       # BB#0:
    384 ; AVX512-NEXT:    vfnmadd213pd %zmm1, %zmm1, %zmm0
    385 ; AVX512-NEXT:    retq
    386   %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x
    387   %m = fmul <8 x double> %y, %s
    388   ret <8 x double> %m
    389 }
    390 
    391 define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
    392 ; FMA-LABEL: test_v16f32_mul_sub_negone_x_y:
    393 ; FMA:       # BB#0:
    394 ; FMA-NEXT:    vfnmsub213ps %ymm2, %ymm2, %ymm0
    395 ; FMA-NEXT:    vfnmsub213ps %ymm3, %ymm3, %ymm1
    396 ; FMA-NEXT:    retq
    397 ;
    398 ; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y:
    399 ; FMA4:       # BB#0:
    400 ; FMA4-NEXT:    vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
    401 ; FMA4-NEXT:    vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
    402 ; FMA4-NEXT:    retq
    403 ;
    404 ; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y:
    405 ; AVX512:       # BB#0:
    406 ; AVX512-NEXT:    vfnmsub213ps %zmm1, %zmm1, %zmm0
    407 ; AVX512-NEXT:    retq
    408   %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x
    409   %m = fmul <16 x float> %s, %y
    410   ret <16 x float> %m
    411 }
    412 
    413 define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
    414 ; FMA-LABEL: test_v8f64_mul_y_sub_negone_x:
    415 ; FMA:       # BB#0:
    416 ; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm2, %ymm0
    417 ; FMA-NEXT:    vfnmsub213pd %ymm3, %ymm3, %ymm1
    418 ; FMA-NEXT:    retq
    419 ;
    420 ; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x:
    421 ; FMA4:       # BB#0:
    422 ; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
    423 ; FMA4-NEXT:    vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
    424 ; FMA4-NEXT:    retq
    425 ;
    426 ; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x:
    427 ; AVX512:       # BB#0:
    428 ; AVX512-NEXT:    vfnmsub213pd %zmm1, %zmm1, %zmm0
    429 ; AVX512-NEXT:    retq
    430   %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
    431   %m = fmul <8 x double> %y, %s
    432   ret <8 x double> %m
    433 }
    434 
    435 define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
    436 ; FMA-LABEL: test_v16f32_mul_sub_x_one_y:
    437 ; FMA:       # BB#0:
    438 ; FMA-NEXT:    vfmsub213ps %ymm2, %ymm2, %ymm0
    439 ; FMA-NEXT:    vfmsub213ps %ymm3, %ymm3, %ymm1
    440 ; FMA-NEXT:    retq
    441 ;
    442 ; FMA4-LABEL: test_v16f32_mul_sub_x_one_y:
    443 ; FMA4:       # BB#0:
    444 ; FMA4-NEXT:    vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
    445 ; FMA4-NEXT:    vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
    446 ; FMA4-NEXT:    retq
    447 ;
    448 ; AVX512-LABEL: test_v16f32_mul_sub_x_one_y:
    449 ; AVX512:       # BB#0:
    450 ; AVX512-NEXT:    vfmsub213ps %zmm1, %zmm1, %zmm0
    451 ; AVX512-NEXT:    retq
    452   %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
    453   %m = fmul <16 x float> %s, %y
    454   ret <16 x float> %m
    455 }
    456 
    457 define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
    458 ; FMA-LABEL: test_v8f64_mul_y_sub_x_one:
    459 ; FMA:       # BB#0:
    460 ; FMA-NEXT:    vfmsub213pd %ymm2, %ymm2, %ymm0
    461 ; FMA-NEXT:    vfmsub213pd %ymm3, %ymm3, %ymm1
    462 ; FMA-NEXT:    retq
    463 ;
    464 ; FMA4-LABEL: test_v8f64_mul_y_sub_x_one:
    465 ; FMA4:       # BB#0:
    466 ; FMA4-NEXT:    vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
    467 ; FMA4-NEXT:    vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
    468 ; FMA4-NEXT:    retq
    469 ;
    470 ; AVX512-LABEL: test_v8f64_mul_y_sub_x_one:
    471 ; AVX512:       # BB#0:
    472 ; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm1, %zmm0
    473 ; AVX512-NEXT:    retq
    474   %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
    475   %m = fmul <8 x double> %y, %s
    476   ret <8 x double> %m
    477 }
    478 
    479 define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
    480 ; FMA-LABEL: test_v16f32_mul_sub_x_negone_y:
    481 ; FMA:       # BB#0:
    482 ; FMA-NEXT:    vfmadd213ps %ymm2, %ymm2, %ymm0
    483 ; FMA-NEXT:    vfmadd213ps %ymm3, %ymm3, %ymm1
    484 ; FMA-NEXT:    retq
    485 ;
    486 ; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y:
    487 ; FMA4:       # BB#0:
    488 ; FMA4-NEXT:    vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
    489 ; FMA4-NEXT:    vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
    490 ; FMA4-NEXT:    retq
    491 ;
    492 ; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y:
    493 ; AVX512:       # BB#0:
    494 ; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm1, %zmm0
    495 ; AVX512-NEXT:    retq
    496   %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
    497   %m = fmul <16 x float> %s, %y
    498   ret <16 x float> %m
    499 }
    500 
    501 define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
    502 ; FMA-LABEL: test_v8f64_mul_y_sub_x_negone:
    503 ; FMA:       # BB#0:
    504 ; FMA-NEXT:    vfmadd213pd %ymm2, %ymm2, %ymm0
    505 ; FMA-NEXT:    vfmadd213pd %ymm3, %ymm3, %ymm1
    506 ; FMA-NEXT:    retq
    507 ;
    508 ; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone:
    509 ; FMA4:       # BB#0:
    510 ; FMA4-NEXT:    vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
    511 ; FMA4-NEXT:    vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
    512 ; FMA4-NEXT:    retq
    513 ;
    514 ; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone:
    515 ; AVX512:       # BB#0:
    516 ; AVX512-NEXT:    vfmadd213pd %zmm1, %zmm1, %zmm0
    517 ; AVX512-NEXT:    retq
    518   %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
    519   %m = fmul <8 x double> %y, %s
    520   ret <8 x double> %m
    521 }
    522 
    523 ;
    524 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
    525 ;
    526 
    527 define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
    528 ; FMA-LABEL: test_v16f32_interp:
    529 ; FMA:       # BB#0:
    530 ; FMA-NEXT:    vfnmadd213ps %ymm3, %ymm5, %ymm3
    531 ; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm4, %ymm2
    532 ; FMA-NEXT:    vfmadd213ps %ymm2, %ymm4, %ymm0
    533 ; FMA-NEXT:    vfmadd213ps %ymm3, %ymm5, %ymm1
    534 ; FMA-NEXT:    retq
    535 ;
    536 ; FMA4-LABEL: test_v16f32_interp:
    537 ; FMA4:       # BB#0:
    538 ; FMA4-NEXT:    vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3
    539 ; FMA4-NEXT:    vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2
    540 ; FMA4-NEXT:    vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
    541 ; FMA4-NEXT:    vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
    542 ; FMA4-NEXT:    retq
    543 ;
    544 ; AVX512-LABEL: test_v16f32_interp:
    545 ; AVX512:       # BB#0:
    546 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    547 ; AVX512-NEXT:    vfnmadd213ps %zmm1, %zmm1, %zmm3
    548 ; AVX512-NEXT:    vfmadd213ps %zmm3, %zmm2, %zmm0
    549 ; AVX512-NEXT:    retq
    550   %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
    551   %tx = fmul <16 x float> %x, %t
    552   %ty = fmul <16 x float> %y, %t1
    553   %r = fadd <16 x float> %tx, %ty
    554   ret <16 x float> %r
    555 }
    556 
    557 define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
    558 ; FMA-LABEL: test_v8f64_interp:
    559 ; FMA:       # BB#0:
    560 ; FMA-NEXT:    vfnmadd213pd %ymm3, %ymm5, %ymm3
    561 ; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm4, %ymm2
    562 ; FMA-NEXT:    vfmadd213pd %ymm2, %ymm4, %ymm0
    563 ; FMA-NEXT:    vfmadd213pd %ymm3, %ymm5, %ymm1
    564 ; FMA-NEXT:    retq
    565 ;
    566 ; FMA4-LABEL: test_v8f64_interp:
    567 ; FMA4:       # BB#0:
    568 ; FMA4-NEXT:    vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3
    569 ; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2
    570 ; FMA4-NEXT:    vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
    571 ; FMA4-NEXT:    vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
    572 ; FMA4-NEXT:    retq
    573 ;
    574 ; AVX512-LABEL: test_v8f64_interp:
    575 ; AVX512:       # BB#0:
    576 ; AVX512-NEXT:    vmovaps %zmm2, %zmm3
    577 ; AVX512-NEXT:    vfnmadd213pd %zmm1, %zmm1, %zmm3
    578 ; AVX512-NEXT:    vfmadd213pd %zmm3, %zmm2, %zmm0
    579 ; AVX512-NEXT:    retq
    580   %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
    581   %tx = fmul <8 x double> %x, %t
    582   %ty = fmul <8 x double> %y, %t1
    583   %r = fadd <8 x double> %tx, %ty
    584   ret <8 x double> %r
    585 }
    586 
    587 ;
    588 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
    589 ;
    590 
    591 define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
    592 ; FMA-LABEL: test_v16f32_fneg_fmadd:
    593 ; FMA:       # BB#0:
    594 ; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
    595 ; FMA-NEXT:    vfnmsub213ps %ymm5, %ymm3, %ymm1
    596 ; FMA-NEXT:    retq
    597 ;
    598 ; FMA4-LABEL: test_v16f32_fneg_fmadd:
    599 ; FMA4:       # BB#0:
    600 ; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
    601 ; FMA4-NEXT:    vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
    602 ; FMA4-NEXT:    retq
    603 ;
    604 ; AVX512-LABEL: test_v16f32_fneg_fmadd:
    605 ; AVX512:       # BB#0:
    606 ; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
    607 ; AVX512-NEXT:    retq
    608   %mul = fmul <16 x float> %a0, %a1
    609   %add = fadd <16 x float> %mul, %a2
    610   %neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
    611   ret <16 x float> %neg
    612 }
    613 
    614 define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
    615 ; FMA-LABEL: test_v8f64_fneg_fmsub:
    616 ; FMA:       # BB#0:
    617 ; FMA-NEXT:    vfnmadd213pd %ymm4, %ymm2, %ymm0
    618 ; FMA-NEXT:    vfnmadd213pd %ymm5, %ymm3, %ymm1
    619 ; FMA-NEXT:    retq
    620 ;
    621 ; FMA4-LABEL: test_v8f64_fneg_fmsub:
    622 ; FMA4:       # BB#0:
    623 ; FMA4-NEXT:    vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
    624 ; FMA4-NEXT:    vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
    625 ; FMA4-NEXT:    retq
    626 ;
    627 ; AVX512-LABEL: test_v8f64_fneg_fmsub:
    628 ; AVX512:       # BB#0:
    629 ; AVX512-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
    630 ; AVX512-NEXT:    retq
    631   %mul = fmul <8 x double> %a0, %a1
    632   %sub = fsub <8 x double> %mul, %a2
    633   %neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
    634   ret <8 x double> %neg
    635 }
    636 
    637 define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
    638 ; FMA-LABEL: test_v16f32_fneg_fnmadd:
    639 ; FMA:       # BB#0:
    640 ; FMA-NEXT:    vfmsub213ps %ymm4, %ymm2, %ymm0
    641 ; FMA-NEXT:    vfmsub213ps %ymm5, %ymm3, %ymm1
    642 ; FMA-NEXT:    retq
    643 ;
    644 ; FMA4-LABEL: test_v16f32_fneg_fnmadd:
    645 ; FMA4:       # BB#0:
    646 ; FMA4-NEXT:    vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
    647 ; FMA4-NEXT:    vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
    648 ; FMA4-NEXT:    retq
    649 ;
    650 ; AVX512-LABEL: test_v16f32_fneg_fnmadd:
    651 ; AVX512:       # BB#0:
    652 ; AVX512-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
    653 ; AVX512-NEXT:    retq
    654   %mul = fmul <16 x float> %a0, %a1
    655   %neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
    656   %add = fadd <16 x float> %neg0, %a2
    657   %neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
    658   ret <16 x float> %neg1
    659 }
    660 
    661 define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
    662 ; FMA-LABEL: test_v8f64_fneg_fnmsub:
    663 ; FMA:       # BB#0:
    664 ; FMA-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0
    665 ; FMA-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1
    666 ; FMA-NEXT:    retq
    667 ;
    668 ; FMA4-LABEL: test_v8f64_fneg_fnmsub:
    669 ; FMA4:       # BB#0:
    670 ; FMA4-NEXT:    vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
    671 ; FMA4-NEXT:    vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
    672 ; FMA4-NEXT:    retq
    673 ;
    674 ; AVX512-LABEL: test_v8f64_fneg_fnmsub:
    675 ; AVX512:       # BB#0:
    676 ; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
    677 ; AVX512-NEXT:    retq
    678   %mul = fmul <8 x double> %a0, %a1
    679   %neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
    680   %sub = fsub <8 x double> %neg0, %a2
    681   %neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
    682   ret <8 x double> %neg1
    683 }
    684 
    685 ;
    686 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
    687 ;
    688 
    689 define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
    690 ; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
    691 ; FMA:       # BB#0:
    692 ; FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
    693 ; FMA-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
    694 ; FMA-NEXT:    retq
    695 ;
    696 ; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
    697 ; FMA4:       # BB#0:
    698 ; FMA4-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
    699 ; FMA4-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
    700 ; FMA4-NEXT:    retq
    701 ;
    702 ; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
    703 ; AVX512:       # BB#0:
    704 ; AVX512-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
    705 ; AVX512-NEXT:    retq
    706   %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
    707   %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
    708   %a  = fadd <16 x float> %m0, %m1
    709   ret <16 x float> %a
    710 }
    711 
    712 ;
    713 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
    714 ;
    715 
    716 define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 {
    717 ; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
    718 ; FMA:       # BB#0:
    719 ; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0
    720 ; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1
    721 ; FMA-NEXT:    retq
    722 ;
    723 ; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
    724 ; FMA4:       # BB#0:
    725 ; FMA4-NEXT:    vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
    726 ; FMA4-NEXT:    vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
    727 ; FMA4-NEXT:    retq
    728 ;
    729 ; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
    730 ; AVX512:       # BB#0:
    731 ; AVX512-NEXT:    vfmadd231ps {{.*}}(%rip), %zmm0, %zmm1
    732 ; AVX512-NEXT:    vmovaps %zmm1, %zmm0
    733 ; AVX512-NEXT:    retq
    734   %m0 = fmul <16 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
    735   %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
    736   %a  = fadd <16 x float> %m1, %y
    737   ret <16 x float> %a
    738 }
    739 
    740 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
    741 
    742 define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 {
    743 ; FMA-LABEL: test_v16f32_fneg_fmul:
    744 ; FMA:       # BB#0:
    745 ; FMA-NEXT:    vxorps %ymm4, %ymm4, %ymm4
    746 ; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
    747 ; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm3, %ymm1
    748 ; FMA-NEXT:    retq
    749 ;
    750 ; FMA4-LABEL: test_v16f32_fneg_fmul:
    751 ; FMA4:       # BB#0:
    752 ; FMA4-NEXT:    vxorps %ymm4, %ymm4, %ymm4
    753 ; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
    754 ; FMA4-NEXT:    vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1
    755 ; FMA4-NEXT:    retq
    756 ;
    757 ; AVX512-LABEL: test_v16f32_fneg_fmul:
    758 ; AVX512:       # BB#0:
    759 ; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    760 ; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
    761 ; AVX512-NEXT:    retq
    762   %m = fmul nsz <16 x float> %x, %y
    763   %n = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %m
    764   ret <16 x float> %n
    765 }
    766 
    767 define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
    768 ; FMA-LABEL: test_v8f64_fneg_fmul:
    769 ; FMA:       # BB#0:
    770 ; FMA-NEXT:    vxorpd %ymm4, %ymm4, %ymm4
    771 ; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm2, %ymm0
    772 ; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm3, %ymm1
    773 ; FMA-NEXT:    retq
    774 ;
    775 ; FMA4-LABEL: test_v8f64_fneg_fmul:
    776 ; FMA4:       # BB#0:
    777 ; FMA4-NEXT:    vxorpd %ymm4, %ymm4, %ymm4
    778 ; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
    779 ; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1
    780 ; FMA4-NEXT:    retq
    781 ;
    782 ; AVX512-LABEL: test_v8f64_fneg_fmul:
    783 ; AVX512:       # BB#0:
    784 ; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    785 ; AVX512-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
    786 ; AVX512-NEXT:    retq
    787   %m = fmul nsz <8 x double> %x, %y
    788   %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
    789   ret <8 x double> %n
    790 }
    791 
    792 define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 {
    793 ; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz:
    794 ; FMA:       # BB#0:
    795 ; FMA-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
    796 ; FMA-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
    797 ; FMA-NEXT:    vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
    798 ; FMA-NEXT:    vxorpd %ymm2, %ymm0, %ymm0
    799 ; FMA-NEXT:    vxorpd %ymm2, %ymm1, %ymm1
    800 ; FMA-NEXT:    retq
    801 ;
    802 ; FMA4-LABEL: test_v8f64_fneg_fmul_no_nsz:
    803 ; FMA4:       # BB#0:
    804 ; FMA4-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
    805 ; FMA4-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
    806 ; FMA4-NEXT:    vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
    807 ; FMA4-NEXT:    vxorpd %ymm2, %ymm0, %ymm0
    808 ; FMA4-NEXT:    vxorpd %ymm2, %ymm1, %ymm1
    809 ; FMA4-NEXT:    retq
    810 ;
    811 ; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz:
    812 ; AVX512:       # BB#0:
    813 ; AVX512-NEXT:    vmulpd %zmm1, %zmm0, %zmm0
    814 ; AVX512-NEXT:    vxorpd {{.*}}(%rip), %zmm0, %zmm0
    815 ; AVX512-NEXT:    retq
    816   %m = fmul <8 x double> %x, %y
    817   %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
    818   ret <8 x double> %n
    819 }
    820 
    821 attributes #0 = { "unsafe-fp-math"="true" }
    822