Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
      9 
     10 ;
     11 ; VFMADD
     12 ;
     13 
     14 define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
     15 ; GENERIC-LABEL: test_vfmaddpd_128:
     16 ; GENERIC:       # %bb.0:
     17 ; GENERIC-NEXT:    #APP
     18 ; GENERIC-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
     19 ; GENERIC-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
     20 ; GENERIC-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
     21 ; GENERIC-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
     22 ; GENERIC-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
     23 ; GENERIC-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
     24 ; GENERIC-NEXT:    #NO_APP
     25 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     26 ;
     27 ; HASWELL-LABEL: test_vfmaddpd_128:
     28 ; HASWELL:       # %bb.0:
     29 ; HASWELL-NEXT:    #APP
     30 ; HASWELL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
     31 ; HASWELL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
     32 ; HASWELL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
     33 ; HASWELL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
     34 ; HASWELL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
     35 ; HASWELL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
     36 ; HASWELL-NEXT:    #NO_APP
     37 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     38 ;
     39 ; BROADWELL-LABEL: test_vfmaddpd_128:
     40 ; BROADWELL:       # %bb.0:
     41 ; BROADWELL-NEXT:    #APP
     42 ; BROADWELL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
     43 ; BROADWELL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
     44 ; BROADWELL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
     45 ; BROADWELL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
     46 ; BROADWELL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
     47 ; BROADWELL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
     48 ; BROADWELL-NEXT:    #NO_APP
     49 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     50 ;
     51 ; SKYLAKE-LABEL: test_vfmaddpd_128:
     52 ; SKYLAKE:       # %bb.0:
     53 ; SKYLAKE-NEXT:    #APP
     54 ; SKYLAKE-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
     55 ; SKYLAKE-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
     56 ; SKYLAKE-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
     57 ; SKYLAKE-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
     58 ; SKYLAKE-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
     59 ; SKYLAKE-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
     60 ; SKYLAKE-NEXT:    #NO_APP
     61 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     62 ;
     63 ; KNL-LABEL: test_vfmaddpd_128:
     64 ; KNL:       # %bb.0:
     65 ; KNL-NEXT:    #APP
     66 ; KNL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
     67 ; KNL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
     68 ; KNL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
     69 ; KNL-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
     70 ; KNL-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
     71 ; KNL-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
     72 ; KNL-NEXT:    #NO_APP
     73 ; KNL-NEXT:    retq # sched: [7:1.00]
     74 ;
     75 ; SKX-LABEL: test_vfmaddpd_128:
     76 ; SKX:       # %bb.0:
     77 ; SKX-NEXT:    #APP
     78 ; SKX-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
     79 ; SKX-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
     80 ; SKX-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
     81 ; SKX-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
     82 ; SKX-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
     83 ; SKX-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
     84 ; SKX-NEXT:    #NO_APP
     85 ; SKX-NEXT:    retq # sched: [7:1.00]
     86 ;
     87 ; ZNVER1-LABEL: test_vfmaddpd_128:
     88 ; ZNVER1:       # %bb.0:
     89 ; ZNVER1-NEXT:    #APP
     90 ; ZNVER1-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
     91 ; ZNVER1-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
     92 ; ZNVER1-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
     93 ; ZNVER1-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
     94 ; ZNVER1-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
     95 ; ZNVER1-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
     96 ; ZNVER1-NEXT:    #NO_APP
     97 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
     98   tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
     99   ret void
    100 }
    101 
    102 define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
    103 ; GENERIC-LABEL: test_vfmaddpd_256:
    104 ; GENERIC:       # %bb.0:
    105 ; GENERIC-NEXT:    #APP
    106 ; GENERIC-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    107 ; GENERIC-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    108 ; GENERIC-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    109 ; GENERIC-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
    110 ; GENERIC-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
    111 ; GENERIC-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
    112 ; GENERIC-NEXT:    #NO_APP
    113 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    114 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    115 ;
    116 ; HASWELL-LABEL: test_vfmaddpd_256:
    117 ; HASWELL:       # %bb.0:
    118 ; HASWELL-NEXT:    #APP
    119 ; HASWELL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    120 ; HASWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    121 ; HASWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    122 ; HASWELL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
    123 ; HASWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
    124 ; HASWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
    125 ; HASWELL-NEXT:    #NO_APP
    126 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
    127 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    128 ;
    129 ; BROADWELL-LABEL: test_vfmaddpd_256:
    130 ; BROADWELL:       # %bb.0:
    131 ; BROADWELL-NEXT:    #APP
    132 ; BROADWELL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    133 ; BROADWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    134 ; BROADWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    135 ; BROADWELL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
    136 ; BROADWELL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
    137 ; BROADWELL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
    138 ; BROADWELL-NEXT:    #NO_APP
    139 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
    140 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    141 ;
    142 ; SKYLAKE-LABEL: test_vfmaddpd_256:
    143 ; SKYLAKE:       # %bb.0:
    144 ; SKYLAKE-NEXT:    #APP
    145 ; SKYLAKE-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
    146 ; SKYLAKE-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
    147 ; SKYLAKE-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
    148 ; SKYLAKE-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
    149 ; SKYLAKE-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
    150 ; SKYLAKE-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
    151 ; SKYLAKE-NEXT:    #NO_APP
    152 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
    153 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    154 ;
    155 ; KNL-LABEL: test_vfmaddpd_256:
    156 ; KNL:       # %bb.0:
    157 ; KNL-NEXT:    #APP
    158 ; KNL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    159 ; KNL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    160 ; KNL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    161 ; KNL-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
    162 ; KNL-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
    163 ; KNL-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
    164 ; KNL-NEXT:    #NO_APP
    165 ; KNL-NEXT:    retq # sched: [7:1.00]
    166 ;
    167 ; SKX-LABEL: test_vfmaddpd_256:
    168 ; SKX:       # %bb.0:
    169 ; SKX-NEXT:    #APP
    170 ; SKX-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
    171 ; SKX-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
    172 ; SKX-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
    173 ; SKX-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
    174 ; SKX-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
    175 ; SKX-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
    176 ; SKX-NEXT:    #NO_APP
    177 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
    178 ; SKX-NEXT:    retq # sched: [7:1.00]
    179 ;
    180 ; ZNVER1-LABEL: test_vfmaddpd_256:
    181 ; ZNVER1:       # %bb.0:
    182 ; ZNVER1-NEXT:    #APP
    183 ; ZNVER1-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    184 ; ZNVER1-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    185 ; ZNVER1-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    186 ; ZNVER1-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
    187 ; ZNVER1-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
    188 ; ZNVER1-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
    189 ; ZNVER1-NEXT:    #NO_APP
    190 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
    191 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    192   tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
    193   ret void
    194 }
    195 
    196 define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    197 ; GENERIC-LABEL: test_vfmaddps_128:
    198 ; GENERIC:       # %bb.0:
    199 ; GENERIC-NEXT:    #APP
    200 ; GENERIC-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    201 ; GENERIC-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    202 ; GENERIC-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    203 ; GENERIC-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    204 ; GENERIC-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    205 ; GENERIC-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    206 ; GENERIC-NEXT:    #NO_APP
    207 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    208 ;
    209 ; HASWELL-LABEL: test_vfmaddps_128:
    210 ; HASWELL:       # %bb.0:
    211 ; HASWELL-NEXT:    #APP
    212 ; HASWELL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    213 ; HASWELL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    214 ; HASWELL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    215 ; HASWELL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
    216 ; HASWELL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
    217 ; HASWELL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
    218 ; HASWELL-NEXT:    #NO_APP
    219 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    220 ;
    221 ; BROADWELL-LABEL: test_vfmaddps_128:
    222 ; BROADWELL:       # %bb.0:
    223 ; BROADWELL-NEXT:    #APP
    224 ; BROADWELL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    225 ; BROADWELL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    226 ; BROADWELL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    227 ; BROADWELL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    228 ; BROADWELL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    229 ; BROADWELL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    230 ; BROADWELL-NEXT:    #NO_APP
    231 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    232 ;
    233 ; SKYLAKE-LABEL: test_vfmaddps_128:
    234 ; SKYLAKE:       # %bb.0:
    235 ; SKYLAKE-NEXT:    #APP
    236 ; SKYLAKE-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
    237 ; SKYLAKE-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
    238 ; SKYLAKE-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
    239 ; SKYLAKE-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    240 ; SKYLAKE-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    241 ; SKYLAKE-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    242 ; SKYLAKE-NEXT:    #NO_APP
    243 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    244 ;
    245 ; KNL-LABEL: test_vfmaddps_128:
    246 ; KNL:       # %bb.0:
    247 ; KNL-NEXT:    #APP
    248 ; KNL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    249 ; KNL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    250 ; KNL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    251 ; KNL-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
    252 ; KNL-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
    253 ; KNL-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
    254 ; KNL-NEXT:    #NO_APP
    255 ; KNL-NEXT:    retq # sched: [7:1.00]
    256 ;
    257 ; SKX-LABEL: test_vfmaddps_128:
    258 ; SKX:       # %bb.0:
    259 ; SKX-NEXT:    #APP
    260 ; SKX-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
    261 ; SKX-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
    262 ; SKX-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
    263 ; SKX-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    264 ; SKX-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    265 ; SKX-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    266 ; SKX-NEXT:    #NO_APP
    267 ; SKX-NEXT:    retq # sched: [7:1.00]
    268 ;
    269 ; ZNVER1-LABEL: test_vfmaddps_128:
    270 ; ZNVER1:       # %bb.0:
    271 ; ZNVER1-NEXT:    #APP
    272 ; ZNVER1-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    273 ; ZNVER1-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    274 ; ZNVER1-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    275 ; ZNVER1-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
    276 ; ZNVER1-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
    277 ; ZNVER1-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
    278 ; ZNVER1-NEXT:    #NO_APP
    279 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    280   tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    281   ret void
    282 }
    283 
    284 define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
    285 ; GENERIC-LABEL: test_vfmaddps_256:
    286 ; GENERIC:       # %bb.0:
    287 ; GENERIC-NEXT:    #APP
    288 ; GENERIC-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    289 ; GENERIC-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    290 ; GENERIC-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    291 ; GENERIC-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
    292 ; GENERIC-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
    293 ; GENERIC-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
    294 ; GENERIC-NEXT:    #NO_APP
    295 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    296 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    297 ;
    298 ; HASWELL-LABEL: test_vfmaddps_256:
    299 ; HASWELL:       # %bb.0:
    300 ; HASWELL-NEXT:    #APP
    301 ; HASWELL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    302 ; HASWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    303 ; HASWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    304 ; HASWELL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
    305 ; HASWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
    306 ; HASWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
    307 ; HASWELL-NEXT:    #NO_APP
    308 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
    309 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    310 ;
    311 ; BROADWELL-LABEL: test_vfmaddps_256:
    312 ; BROADWELL:       # %bb.0:
    313 ; BROADWELL-NEXT:    #APP
    314 ; BROADWELL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    315 ; BROADWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    316 ; BROADWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    317 ; BROADWELL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
    318 ; BROADWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
    319 ; BROADWELL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
    320 ; BROADWELL-NEXT:    #NO_APP
    321 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
    322 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    323 ;
    324 ; SKYLAKE-LABEL: test_vfmaddps_256:
    325 ; SKYLAKE:       # %bb.0:
    326 ; SKYLAKE-NEXT:    #APP
    327 ; SKYLAKE-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
    328 ; SKYLAKE-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
    329 ; SKYLAKE-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
    330 ; SKYLAKE-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
    331 ; SKYLAKE-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
    332 ; SKYLAKE-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
    333 ; SKYLAKE-NEXT:    #NO_APP
    334 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
    335 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    336 ;
    337 ; KNL-LABEL: test_vfmaddps_256:
    338 ; KNL:       # %bb.0:
    339 ; KNL-NEXT:    #APP
    340 ; KNL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    341 ; KNL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    342 ; KNL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    343 ; KNL-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
    344 ; KNL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
    345 ; KNL-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
    346 ; KNL-NEXT:    #NO_APP
    347 ; KNL-NEXT:    retq # sched: [7:1.00]
    348 ;
    349 ; SKX-LABEL: test_vfmaddps_256:
    350 ; SKX:       # %bb.0:
    351 ; SKX-NEXT:    #APP
    352 ; SKX-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
    353 ; SKX-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
    354 ; SKX-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
    355 ; SKX-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
    356 ; SKX-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
    357 ; SKX-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
    358 ; SKX-NEXT:    #NO_APP
    359 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
    360 ; SKX-NEXT:    retq # sched: [7:1.00]
    361 ;
    362 ; ZNVER1-LABEL: test_vfmaddps_256:
    363 ; ZNVER1:       # %bb.0:
    364 ; ZNVER1-NEXT:    #APP
    365 ; ZNVER1-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
    366 ; ZNVER1-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
    367 ; ZNVER1-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
    368 ; ZNVER1-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
    369 ; ZNVER1-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
    370 ; ZNVER1-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
    371 ; ZNVER1-NEXT:    #NO_APP
    372 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
    373 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    374   tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
    375   ret void
    376 }
    377 
    378 define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    379 ; GENERIC-LABEL: test_vfmaddsd_128:
    380 ; GENERIC:       # %bb.0:
    381 ; GENERIC-NEXT:    #APP
    382 ; GENERIC-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    383 ; GENERIC-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    384 ; GENERIC-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    385 ; GENERIC-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    386 ; GENERIC-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    387 ; GENERIC-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    388 ; GENERIC-NEXT:    #NO_APP
    389 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    390 ;
    391 ; HASWELL-LABEL: test_vfmaddsd_128:
    392 ; HASWELL:       # %bb.0:
    393 ; HASWELL-NEXT:    #APP
    394 ; HASWELL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    395 ; HASWELL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    396 ; HASWELL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    397 ; HASWELL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    398 ; HASWELL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    399 ; HASWELL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    400 ; HASWELL-NEXT:    #NO_APP
    401 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    402 ;
    403 ; BROADWELL-LABEL: test_vfmaddsd_128:
    404 ; BROADWELL:       # %bb.0:
    405 ; BROADWELL-NEXT:    #APP
    406 ; BROADWELL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    407 ; BROADWELL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    408 ; BROADWELL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    409 ; BROADWELL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    410 ; BROADWELL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    411 ; BROADWELL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    412 ; BROADWELL-NEXT:    #NO_APP
    413 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    414 ;
    415 ; SKYLAKE-LABEL: test_vfmaddsd_128:
    416 ; SKYLAKE:       # %bb.0:
    417 ; SKYLAKE-NEXT:    #APP
    418 ; SKYLAKE-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
    419 ; SKYLAKE-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
    420 ; SKYLAKE-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
    421 ; SKYLAKE-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
    422 ; SKYLAKE-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
    423 ; SKYLAKE-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
    424 ; SKYLAKE-NEXT:    #NO_APP
    425 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    426 ;
    427 ; KNL-LABEL: test_vfmaddsd_128:
    428 ; KNL:       # %bb.0:
    429 ; KNL-NEXT:    #APP
    430 ; KNL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    431 ; KNL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    432 ; KNL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    433 ; KNL-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    434 ; KNL-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    435 ; KNL-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    436 ; KNL-NEXT:    #NO_APP
    437 ; KNL-NEXT:    retq # sched: [7:1.00]
    438 ;
    439 ; SKX-LABEL: test_vfmaddsd_128:
    440 ; SKX:       # %bb.0:
    441 ; SKX-NEXT:    #APP
    442 ; SKX-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
    443 ; SKX-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
    444 ; SKX-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
    445 ; SKX-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
    446 ; SKX-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
    447 ; SKX-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
    448 ; SKX-NEXT:    #NO_APP
    449 ; SKX-NEXT:    retq # sched: [7:1.00]
    450 ;
    451 ; ZNVER1-LABEL: test_vfmaddsd_128:
    452 ; ZNVER1:       # %bb.0:
    453 ; ZNVER1-NEXT:    #APP
    454 ; ZNVER1-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    455 ; ZNVER1-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    456 ; ZNVER1-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    457 ; ZNVER1-NEXT:    vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
    458 ; ZNVER1-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
    459 ; ZNVER1-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
    460 ; ZNVER1-NEXT:    #NO_APP
    461 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    462   tail call void asm "vfmadd132sd $2, $1, $0 \0A\09 vfmadd213sd $2, $1, $0 \0A\09 vfmadd231sd $2, $1, $0 \0A\09 vfmadd132sd $3, $1, $0 \0A\09 vfmadd213sd $3, $1, $0 \0A\09 vfmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    463   ret void
    464 }
    465 
    466 define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    467 ; GENERIC-LABEL: test_vfmaddss_128:
    468 ; GENERIC:       # %bb.0:
    469 ; GENERIC-NEXT:    #APP
    470 ; GENERIC-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    471 ; GENERIC-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    472 ; GENERIC-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    473 ; GENERIC-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    474 ; GENERIC-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    475 ; GENERIC-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    476 ; GENERIC-NEXT:    #NO_APP
    477 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    478 ;
    479 ; HASWELL-LABEL: test_vfmaddss_128:
    480 ; HASWELL:       # %bb.0:
    481 ; HASWELL-NEXT:    #APP
    482 ; HASWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    483 ; HASWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    484 ; HASWELL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    485 ; HASWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    486 ; HASWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    487 ; HASWELL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    488 ; HASWELL-NEXT:    #NO_APP
    489 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    490 ;
    491 ; BROADWELL-LABEL: test_vfmaddss_128:
    492 ; BROADWELL:       # %bb.0:
    493 ; BROADWELL-NEXT:    #APP
    494 ; BROADWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    495 ; BROADWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    496 ; BROADWELL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    497 ; BROADWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    498 ; BROADWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    499 ; BROADWELL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    500 ; BROADWELL-NEXT:    #NO_APP
    501 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    502 ;
    503 ; SKYLAKE-LABEL: test_vfmaddss_128:
    504 ; SKYLAKE:       # %bb.0:
    505 ; SKYLAKE-NEXT:    #APP
    506 ; SKYLAKE-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
    507 ; SKYLAKE-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
    508 ; SKYLAKE-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
    509 ; SKYLAKE-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
    510 ; SKYLAKE-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
    511 ; SKYLAKE-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
    512 ; SKYLAKE-NEXT:    #NO_APP
    513 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    514 ;
    515 ; KNL-LABEL: test_vfmaddss_128:
    516 ; KNL:       # %bb.0:
    517 ; KNL-NEXT:    #APP
    518 ; KNL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    519 ; KNL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    520 ; KNL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    521 ; KNL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
    522 ; KNL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
    523 ; KNL-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
    524 ; KNL-NEXT:    #NO_APP
    525 ; KNL-NEXT:    retq # sched: [7:1.00]
    526 ;
    527 ; SKX-LABEL: test_vfmaddss_128:
    528 ; SKX:       # %bb.0:
    529 ; SKX-NEXT:    #APP
    530 ; SKX-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
    531 ; SKX-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
    532 ; SKX-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
    533 ; SKX-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
    534 ; SKX-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
    535 ; SKX-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
    536 ; SKX-NEXT:    #NO_APP
    537 ; SKX-NEXT:    retq # sched: [7:1.00]
    538 ;
    539 ; ZNVER1-LABEL: test_vfmaddss_128:
    540 ; ZNVER1:       # %bb.0:
    541 ; ZNVER1-NEXT:    #APP
    542 ; ZNVER1-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
    543 ; ZNVER1-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
    544 ; ZNVER1-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
    545 ; ZNVER1-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
    546 ; ZNVER1-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
    547 ; ZNVER1-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
    548 ; ZNVER1-NEXT:    #NO_APP
    549 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    550   tail call void asm "vfmadd132ss $2, $1, $0 \0A\09 vfmadd213ss $2, $1, $0 \0A\09 vfmadd231ss $2, $1, $0 \0A\09 vfmadd132ss $3, $1, $0 \0A\09 vfmadd213ss $3, $1, $0 \0A\09 vfmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    551   ret void
    552 }
    553 
    554 ;
    555 ; VFMADDSUB
    556 ;
    557 
    558 define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    559 ; GENERIC-LABEL: test_vfmaddsubpd_128:
    560 ; GENERIC:       # %bb.0:
    561 ; GENERIC-NEXT:    #APP
    562 ; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    563 ; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    564 ; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    565 ; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
    566 ; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
    567 ; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
    568 ; GENERIC-NEXT:    #NO_APP
    569 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    570 ;
    571 ; HASWELL-LABEL: test_vfmaddsubpd_128:
    572 ; HASWELL:       # %bb.0:
    573 ; HASWELL-NEXT:    #APP
    574 ; HASWELL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    575 ; HASWELL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    576 ; HASWELL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    577 ; HASWELL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
    578 ; HASWELL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
    579 ; HASWELL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
    580 ; HASWELL-NEXT:    #NO_APP
    581 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    582 ;
    583 ; BROADWELL-LABEL: test_vfmaddsubpd_128:
    584 ; BROADWELL:       # %bb.0:
    585 ; BROADWELL-NEXT:    #APP
    586 ; BROADWELL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    587 ; BROADWELL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    588 ; BROADWELL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    589 ; BROADWELL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
    590 ; BROADWELL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
    591 ; BROADWELL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
    592 ; BROADWELL-NEXT:    #NO_APP
    593 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    594 ;
    595 ; SKYLAKE-LABEL: test_vfmaddsubpd_128:
    596 ; SKYLAKE:       # %bb.0:
    597 ; SKYLAKE-NEXT:    #APP
    598 ; SKYLAKE-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
    599 ; SKYLAKE-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
    600 ; SKYLAKE-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
    601 ; SKYLAKE-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
    602 ; SKYLAKE-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
    603 ; SKYLAKE-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
    604 ; SKYLAKE-NEXT:    #NO_APP
    605 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    606 ;
    607 ; KNL-LABEL: test_vfmaddsubpd_128:
    608 ; KNL:       # %bb.0:
    609 ; KNL-NEXT:    #APP
    610 ; KNL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    611 ; KNL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    612 ; KNL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    613 ; KNL-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
    614 ; KNL-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
    615 ; KNL-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
    616 ; KNL-NEXT:    #NO_APP
    617 ; KNL-NEXT:    retq # sched: [7:1.00]
    618 ;
    619 ; SKX-LABEL: test_vfmaddsubpd_128:
    620 ; SKX:       # %bb.0:
    621 ; SKX-NEXT:    #APP
    622 ; SKX-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
    623 ; SKX-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
    624 ; SKX-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
    625 ; SKX-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
    626 ; SKX-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
    627 ; SKX-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
    628 ; SKX-NEXT:    #NO_APP
    629 ; SKX-NEXT:    retq # sched: [7:1.00]
    630 ;
    631 ; ZNVER1-LABEL: test_vfmaddsubpd_128:
    632 ; ZNVER1:       # %bb.0:
    633 ; ZNVER1-NEXT:    #APP
    634 ; ZNVER1-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    635 ; ZNVER1-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    636 ; ZNVER1-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    637 ; ZNVER1-NEXT:    vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50]
    638 ; ZNVER1-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50]
    639 ; ZNVER1-NEXT:    vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50]
    640 ; ZNVER1-NEXT:    #NO_APP
    641 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    642   tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    643   ret void
    644 }
    645 
    646 define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
    647 ; GENERIC-LABEL: test_vfmaddsubpd_256:
    648 ; GENERIC:       # %bb.0:
    649 ; GENERIC-NEXT:    #APP
    650 ; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    651 ; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    652 ; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    653 ; GENERIC-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
    654 ; GENERIC-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
    655 ; GENERIC-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
    656 ; GENERIC-NEXT:    #NO_APP
    657 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    658 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    659 ;
    660 ; HASWELL-LABEL: test_vfmaddsubpd_256:
    661 ; HASWELL:       # %bb.0:
    662 ; HASWELL-NEXT:    #APP
    663 ; HASWELL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    664 ; HASWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    665 ; HASWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    666 ; HASWELL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
    667 ; HASWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
    668 ; HASWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
    669 ; HASWELL-NEXT:    #NO_APP
    670 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
    671 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    672 ;
    673 ; BROADWELL-LABEL: test_vfmaddsubpd_256:
    674 ; BROADWELL:       # %bb.0:
    675 ; BROADWELL-NEXT:    #APP
    676 ; BROADWELL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    677 ; BROADWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    678 ; BROADWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    679 ; BROADWELL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
    680 ; BROADWELL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
    681 ; BROADWELL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
    682 ; BROADWELL-NEXT:    #NO_APP
    683 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
    684 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    685 ;
    686 ; SKYLAKE-LABEL: test_vfmaddsubpd_256:
    687 ; SKYLAKE:       # %bb.0:
    688 ; SKYLAKE-NEXT:    #APP
    689 ; SKYLAKE-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
    690 ; SKYLAKE-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
    691 ; SKYLAKE-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
    692 ; SKYLAKE-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
    693 ; SKYLAKE-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
    694 ; SKYLAKE-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
    695 ; SKYLAKE-NEXT:    #NO_APP
    696 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
    697 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    698 ;
    699 ; KNL-LABEL: test_vfmaddsubpd_256:
    700 ; KNL:       # %bb.0:
    701 ; KNL-NEXT:    #APP
    702 ; KNL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    703 ; KNL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    704 ; KNL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    705 ; KNL-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
    706 ; KNL-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
    707 ; KNL-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
    708 ; KNL-NEXT:    #NO_APP
    709 ; KNL-NEXT:    retq # sched: [7:1.00]
    710 ;
    711 ; SKX-LABEL: test_vfmaddsubpd_256:
    712 ; SKX:       # %bb.0:
    713 ; SKX-NEXT:    #APP
    714 ; SKX-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
    715 ; SKX-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
    716 ; SKX-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
    717 ; SKX-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
    718 ; SKX-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
    719 ; SKX-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
    720 ; SKX-NEXT:    #NO_APP
    721 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
    722 ; SKX-NEXT:    retq # sched: [7:1.00]
    723 ;
    724 ; ZNVER1-LABEL: test_vfmaddsubpd_256:
    725 ; ZNVER1:       # %bb.0:
    726 ; ZNVER1-NEXT:    #APP
    727 ; ZNVER1-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    728 ; ZNVER1-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    729 ; ZNVER1-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    730 ; ZNVER1-NEXT:    vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
    731 ; ZNVER1-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
    732 ; ZNVER1-NEXT:    vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
    733 ; ZNVER1-NEXT:    #NO_APP
    734 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
    735 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    736   tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
    737   ret void
    738 }
    739 
    740 define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    741 ; GENERIC-LABEL: test_vfmaddsubps_128:
    742 ; GENERIC:       # %bb.0:
    743 ; GENERIC-NEXT:    #APP
    744 ; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    745 ; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    746 ; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    747 ; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
    748 ; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
    749 ; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
    750 ; GENERIC-NEXT:    #NO_APP
    751 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    752 ;
    753 ; HASWELL-LABEL: test_vfmaddsubps_128:
    754 ; HASWELL:       # %bb.0:
    755 ; HASWELL-NEXT:    #APP
    756 ; HASWELL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    757 ; HASWELL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    758 ; HASWELL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    759 ; HASWELL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
    760 ; HASWELL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
    761 ; HASWELL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
    762 ; HASWELL-NEXT:    #NO_APP
    763 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    764 ;
    765 ; BROADWELL-LABEL: test_vfmaddsubps_128:
    766 ; BROADWELL:       # %bb.0:
    767 ; BROADWELL-NEXT:    #APP
    768 ; BROADWELL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    769 ; BROADWELL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    770 ; BROADWELL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    771 ; BROADWELL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
    772 ; BROADWELL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
    773 ; BROADWELL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
    774 ; BROADWELL-NEXT:    #NO_APP
    775 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    776 ;
    777 ; SKYLAKE-LABEL: test_vfmaddsubps_128:
    778 ; SKYLAKE:       # %bb.0:
    779 ; SKYLAKE-NEXT:    #APP
    780 ; SKYLAKE-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
    781 ; SKYLAKE-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
    782 ; SKYLAKE-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
    783 ; SKYLAKE-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
    784 ; SKYLAKE-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
    785 ; SKYLAKE-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
    786 ; SKYLAKE-NEXT:    #NO_APP
    787 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    788 ;
    789 ; KNL-LABEL: test_vfmaddsubps_128:
    790 ; KNL:       # %bb.0:
    791 ; KNL-NEXT:    #APP
    792 ; KNL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    793 ; KNL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    794 ; KNL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    795 ; KNL-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
    796 ; KNL-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
    797 ; KNL-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
    798 ; KNL-NEXT:    #NO_APP
    799 ; KNL-NEXT:    retq # sched: [7:1.00]
    800 ;
    801 ; SKX-LABEL: test_vfmaddsubps_128:
    802 ; SKX:       # %bb.0:
    803 ; SKX-NEXT:    #APP
    804 ; SKX-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
    805 ; SKX-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
    806 ; SKX-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
    807 ; SKX-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
    808 ; SKX-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
    809 ; SKX-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
    810 ; SKX-NEXT:    #NO_APP
    811 ; SKX-NEXT:    retq # sched: [7:1.00]
    812 ;
    813 ; ZNVER1-LABEL: test_vfmaddsubps_128:
    814 ; ZNVER1:       # %bb.0:
    815 ; ZNVER1-NEXT:    #APP
    816 ; ZNVER1-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
    817 ; ZNVER1-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
    818 ; ZNVER1-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
    819 ; ZNVER1-NEXT:    vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50]
    820 ; ZNVER1-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50]
    821 ; ZNVER1-NEXT:    vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50]
    822 ; ZNVER1-NEXT:    #NO_APP
    823 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    824   tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    825   ret void
    826 }
    827 
    828 define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
    829 ; GENERIC-LABEL: test_vfmaddsubps_256:
    830 ; GENERIC:       # %bb.0:
    831 ; GENERIC-NEXT:    #APP
    832 ; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    833 ; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    834 ; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    835 ; GENERIC-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
    836 ; GENERIC-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
    837 ; GENERIC-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
    838 ; GENERIC-NEXT:    #NO_APP
    839 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    840 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    841 ;
    842 ; HASWELL-LABEL: test_vfmaddsubps_256:
    843 ; HASWELL:       # %bb.0:
    844 ; HASWELL-NEXT:    #APP
    845 ; HASWELL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    846 ; HASWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    847 ; HASWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    848 ; HASWELL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
    849 ; HASWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
    850 ; HASWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
    851 ; HASWELL-NEXT:    #NO_APP
    852 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
    853 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    854 ;
    855 ; BROADWELL-LABEL: test_vfmaddsubps_256:
    856 ; BROADWELL:       # %bb.0:
    857 ; BROADWELL-NEXT:    #APP
    858 ; BROADWELL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    859 ; BROADWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    860 ; BROADWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    861 ; BROADWELL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
    862 ; BROADWELL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
    863 ; BROADWELL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
    864 ; BROADWELL-NEXT:    #NO_APP
    865 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
    866 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    867 ;
    868 ; SKYLAKE-LABEL: test_vfmaddsubps_256:
    869 ; SKYLAKE:       # %bb.0:
    870 ; SKYLAKE-NEXT:    #APP
    871 ; SKYLAKE-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
    872 ; SKYLAKE-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
    873 ; SKYLAKE-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
    874 ; SKYLAKE-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
    875 ; SKYLAKE-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
    876 ; SKYLAKE-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
    877 ; SKYLAKE-NEXT:    #NO_APP
    878 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
    879 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    880 ;
    881 ; KNL-LABEL: test_vfmaddsubps_256:
    882 ; KNL:       # %bb.0:
    883 ; KNL-NEXT:    #APP
    884 ; KNL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    885 ; KNL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    886 ; KNL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    887 ; KNL-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
    888 ; KNL-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
    889 ; KNL-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
    890 ; KNL-NEXT:    #NO_APP
    891 ; KNL-NEXT:    retq # sched: [7:1.00]
    892 ;
    893 ; SKX-LABEL: test_vfmaddsubps_256:
    894 ; SKX:       # %bb.0:
    895 ; SKX-NEXT:    #APP
    896 ; SKX-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
    897 ; SKX-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
    898 ; SKX-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
    899 ; SKX-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
    900 ; SKX-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
    901 ; SKX-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
    902 ; SKX-NEXT:    #NO_APP
    903 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
    904 ; SKX-NEXT:    retq # sched: [7:1.00]
    905 ;
    906 ; ZNVER1-LABEL: test_vfmaddsubps_256:
    907 ; ZNVER1:       # %bb.0:
    908 ; ZNVER1-NEXT:    #APP
    909 ; ZNVER1-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
    910 ; ZNVER1-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
    911 ; ZNVER1-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
    912 ; ZNVER1-NEXT:    vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
    913 ; ZNVER1-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
    914 ; ZNVER1-NEXT:    vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
    915 ; ZNVER1-NEXT:    #NO_APP
    916 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
    917 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    918   tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
    919   ret void
    920 }
    921 
    922 ;
    923 ; VFMSUBADD
    924 ;
    925 
    926 define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    927 ; GENERIC-LABEL: test_vfmsubaddpd_128:
    928 ; GENERIC:       # %bb.0:
    929 ; GENERIC-NEXT:    #APP
    930 ; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
    931 ; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
    932 ; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
    933 ; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
    934 ; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
    935 ; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
    936 ; GENERIC-NEXT:    #NO_APP
    937 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    938 ;
    939 ; HASWELL-LABEL: test_vfmsubaddpd_128:
    940 ; HASWELL:       # %bb.0:
    941 ; HASWELL-NEXT:    #APP
    942 ; HASWELL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
    943 ; HASWELL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
    944 ; HASWELL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
    945 ; HASWELL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
    946 ; HASWELL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
    947 ; HASWELL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
    948 ; HASWELL-NEXT:    #NO_APP
    949 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    950 ;
    951 ; BROADWELL-LABEL: test_vfmsubaddpd_128:
    952 ; BROADWELL:       # %bb.0:
    953 ; BROADWELL-NEXT:    #APP
    954 ; BROADWELL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
    955 ; BROADWELL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
    956 ; BROADWELL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
    957 ; BROADWELL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
    958 ; BROADWELL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
    959 ; BROADWELL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
    960 ; BROADWELL-NEXT:    #NO_APP
    961 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    962 ;
    963 ; SKYLAKE-LABEL: test_vfmsubaddpd_128:
    964 ; SKYLAKE:       # %bb.0:
    965 ; SKYLAKE-NEXT:    #APP
    966 ; SKYLAKE-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
    967 ; SKYLAKE-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
    968 ; SKYLAKE-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
    969 ; SKYLAKE-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
    970 ; SKYLAKE-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
    971 ; SKYLAKE-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
    972 ; SKYLAKE-NEXT:    #NO_APP
    973 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    974 ;
    975 ; KNL-LABEL: test_vfmsubaddpd_128:
    976 ; KNL:       # %bb.0:
    977 ; KNL-NEXT:    #APP
    978 ; KNL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
    979 ; KNL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
    980 ; KNL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
    981 ; KNL-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
    982 ; KNL-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
    983 ; KNL-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
    984 ; KNL-NEXT:    #NO_APP
    985 ; KNL-NEXT:    retq # sched: [7:1.00]
    986 ;
    987 ; SKX-LABEL: test_vfmsubaddpd_128:
    988 ; SKX:       # %bb.0:
    989 ; SKX-NEXT:    #APP
    990 ; SKX-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
    991 ; SKX-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
    992 ; SKX-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
    993 ; SKX-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
    994 ; SKX-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
    995 ; SKX-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
    996 ; SKX-NEXT:    #NO_APP
    997 ; SKX-NEXT:    retq # sched: [7:1.00]
    998 ;
    999 ; ZNVER1-LABEL: test_vfmsubaddpd_128:
   1000 ; ZNVER1:       # %bb.0:
   1001 ; ZNVER1-NEXT:    #APP
   1002 ; ZNVER1-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
   1003 ; ZNVER1-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
   1004 ; ZNVER1-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
   1005 ; ZNVER1-NEXT:    vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50]
   1006 ; ZNVER1-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50]
   1007 ; ZNVER1-NEXT:    vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50]
   1008 ; ZNVER1-NEXT:    #NO_APP
   1009 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1010   tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
   1011   ret void
   1012 }
   1013 
   1014 define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
   1015 ; GENERIC-LABEL: test_vfmsubaddpd_256:
   1016 ; GENERIC:       # %bb.0:
   1017 ; GENERIC-NEXT:    #APP
   1018 ; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1019 ; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1020 ; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1021 ; GENERIC-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
   1022 ; GENERIC-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
   1023 ; GENERIC-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
   1024 ; GENERIC-NEXT:    #NO_APP
   1025 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1026 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1027 ;
   1028 ; HASWELL-LABEL: test_vfmsubaddpd_256:
   1029 ; HASWELL:       # %bb.0:
   1030 ; HASWELL-NEXT:    #APP
   1031 ; HASWELL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1032 ; HASWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1033 ; HASWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1034 ; HASWELL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
   1035 ; HASWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
   1036 ; HASWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
   1037 ; HASWELL-NEXT:    #NO_APP
   1038 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1039 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1040 ;
   1041 ; BROADWELL-LABEL: test_vfmsubaddpd_256:
   1042 ; BROADWELL:       # %bb.0:
   1043 ; BROADWELL-NEXT:    #APP
   1044 ; BROADWELL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1045 ; BROADWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1046 ; BROADWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1047 ; BROADWELL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
   1048 ; BROADWELL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
   1049 ; BROADWELL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
   1050 ; BROADWELL-NEXT:    #NO_APP
   1051 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1052 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1053 ;
   1054 ; SKYLAKE-LABEL: test_vfmsubaddpd_256:
   1055 ; SKYLAKE:       # %bb.0:
   1056 ; SKYLAKE-NEXT:    #APP
   1057 ; SKYLAKE-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
   1058 ; SKYLAKE-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
   1059 ; SKYLAKE-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
   1060 ; SKYLAKE-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
   1061 ; SKYLAKE-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
   1062 ; SKYLAKE-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
   1063 ; SKYLAKE-NEXT:    #NO_APP
   1064 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   1065 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1066 ;
   1067 ; KNL-LABEL: test_vfmsubaddpd_256:
   1068 ; KNL:       # %bb.0:
   1069 ; KNL-NEXT:    #APP
   1070 ; KNL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1071 ; KNL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1072 ; KNL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1073 ; KNL-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
   1074 ; KNL-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
   1075 ; KNL-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
   1076 ; KNL-NEXT:    #NO_APP
   1077 ; KNL-NEXT:    retq # sched: [7:1.00]
   1078 ;
   1079 ; SKX-LABEL: test_vfmsubaddpd_256:
   1080 ; SKX:       # %bb.0:
   1081 ; SKX-NEXT:    #APP
   1082 ; SKX-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
   1083 ; SKX-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
   1084 ; SKX-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
   1085 ; SKX-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
   1086 ; SKX-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
   1087 ; SKX-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
   1088 ; SKX-NEXT:    #NO_APP
   1089 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1090 ; SKX-NEXT:    retq # sched: [7:1.00]
   1091 ;
   1092 ; ZNVER1-LABEL: test_vfmsubaddpd_256:
   1093 ; ZNVER1:       # %bb.0:
   1094 ; ZNVER1-NEXT:    #APP
   1095 ; ZNVER1-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1096 ; ZNVER1-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1097 ; ZNVER1-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1098 ; ZNVER1-NEXT:    vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
   1099 ; ZNVER1-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
   1100 ; ZNVER1-NEXT:    vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
   1101 ; ZNVER1-NEXT:    #NO_APP
   1102 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   1103 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1104   tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
   1105   ret void
   1106 }
   1107 
   1108 define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
   1109 ; GENERIC-LABEL: test_vfmsubaddps_128:
   1110 ; GENERIC:       # %bb.0:
   1111 ; GENERIC-NEXT:    #APP
   1112 ; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
   1113 ; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
   1114 ; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
   1115 ; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
   1116 ; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
   1117 ; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
   1118 ; GENERIC-NEXT:    #NO_APP
   1119 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1120 ;
   1121 ; HASWELL-LABEL: test_vfmsubaddps_128:
   1122 ; HASWELL:       # %bb.0:
   1123 ; HASWELL-NEXT:    #APP
   1124 ; HASWELL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
   1125 ; HASWELL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
   1126 ; HASWELL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
   1127 ; HASWELL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
   1128 ; HASWELL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
   1129 ; HASWELL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
   1130 ; HASWELL-NEXT:    #NO_APP
   1131 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1132 ;
   1133 ; BROADWELL-LABEL: test_vfmsubaddps_128:
   1134 ; BROADWELL:       # %bb.0:
   1135 ; BROADWELL-NEXT:    #APP
   1136 ; BROADWELL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
   1137 ; BROADWELL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
   1138 ; BROADWELL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
   1139 ; BROADWELL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
   1140 ; BROADWELL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
   1141 ; BROADWELL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
   1142 ; BROADWELL-NEXT:    #NO_APP
   1143 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1144 ;
   1145 ; SKYLAKE-LABEL: test_vfmsubaddps_128:
   1146 ; SKYLAKE:       # %bb.0:
   1147 ; SKYLAKE-NEXT:    #APP
   1148 ; SKYLAKE-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
   1149 ; SKYLAKE-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
   1150 ; SKYLAKE-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
   1151 ; SKYLAKE-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
   1152 ; SKYLAKE-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
   1153 ; SKYLAKE-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
   1154 ; SKYLAKE-NEXT:    #NO_APP
   1155 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1156 ;
   1157 ; KNL-LABEL: test_vfmsubaddps_128:
   1158 ; KNL:       # %bb.0:
   1159 ; KNL-NEXT:    #APP
   1160 ; KNL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
   1161 ; KNL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
   1162 ; KNL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
   1163 ; KNL-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
   1164 ; KNL-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
   1165 ; KNL-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
   1166 ; KNL-NEXT:    #NO_APP
   1167 ; KNL-NEXT:    retq # sched: [7:1.00]
   1168 ;
   1169 ; SKX-LABEL: test_vfmsubaddps_128:
   1170 ; SKX:       # %bb.0:
   1171 ; SKX-NEXT:    #APP
   1172 ; SKX-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
   1173 ; SKX-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
   1174 ; SKX-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
   1175 ; SKX-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
   1176 ; SKX-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
   1177 ; SKX-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
   1178 ; SKX-NEXT:    #NO_APP
   1179 ; SKX-NEXT:    retq # sched: [7:1.00]
   1180 ;
   1181 ; ZNVER1-LABEL: test_vfmsubaddps_128:
   1182 ; ZNVER1:       # %bb.0:
   1183 ; ZNVER1-NEXT:    #APP
   1184 ; ZNVER1-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
   1185 ; ZNVER1-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
   1186 ; ZNVER1-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
   1187 ; ZNVER1-NEXT:    vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50]
   1188 ; ZNVER1-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50]
   1189 ; ZNVER1-NEXT:    vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50]
   1190 ; ZNVER1-NEXT:    #NO_APP
   1191 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1192   tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
   1193   ret void
   1194 }
   1195 
   1196 define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
   1197 ; GENERIC-LABEL: test_vfmsubaddps_256:
   1198 ; GENERIC:       # %bb.0:
   1199 ; GENERIC-NEXT:    #APP
   1200 ; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1201 ; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1202 ; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1203 ; GENERIC-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
   1204 ; GENERIC-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
   1205 ; GENERIC-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
   1206 ; GENERIC-NEXT:    #NO_APP
   1207 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1208 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1209 ;
   1210 ; HASWELL-LABEL: test_vfmsubaddps_256:
   1211 ; HASWELL:       # %bb.0:
   1212 ; HASWELL-NEXT:    #APP
   1213 ; HASWELL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1214 ; HASWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1215 ; HASWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1216 ; HASWELL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
   1217 ; HASWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
   1218 ; HASWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
   1219 ; HASWELL-NEXT:    #NO_APP
   1220 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1221 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1222 ;
   1223 ; BROADWELL-LABEL: test_vfmsubaddps_256:
   1224 ; BROADWELL:       # %bb.0:
   1225 ; BROADWELL-NEXT:    #APP
   1226 ; BROADWELL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1227 ; BROADWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1228 ; BROADWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1229 ; BROADWELL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
   1230 ; BROADWELL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
   1231 ; BROADWELL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
   1232 ; BROADWELL-NEXT:    #NO_APP
   1233 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1234 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1235 ;
   1236 ; SKYLAKE-LABEL: test_vfmsubaddps_256:
   1237 ; SKYLAKE:       # %bb.0:
   1238 ; SKYLAKE-NEXT:    #APP
   1239 ; SKYLAKE-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
   1240 ; SKYLAKE-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
   1241 ; SKYLAKE-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
   1242 ; SKYLAKE-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
   1243 ; SKYLAKE-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
   1244 ; SKYLAKE-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
   1245 ; SKYLAKE-NEXT:    #NO_APP
   1246 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   1247 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1248 ;
   1249 ; KNL-LABEL: test_vfmsubaddps_256:
   1250 ; KNL:       # %bb.0:
   1251 ; KNL-NEXT:    #APP
   1252 ; KNL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1253 ; KNL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1254 ; KNL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1255 ; KNL-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
   1256 ; KNL-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
   1257 ; KNL-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
   1258 ; KNL-NEXT:    #NO_APP
   1259 ; KNL-NEXT:    retq # sched: [7:1.00]
   1260 ;
   1261 ; SKX-LABEL: test_vfmsubaddps_256:
   1262 ; SKX:       # %bb.0:
   1263 ; SKX-NEXT:    #APP
   1264 ; SKX-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
   1265 ; SKX-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
   1266 ; SKX-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
   1267 ; SKX-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
   1268 ; SKX-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
   1269 ; SKX-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
   1270 ; SKX-NEXT:    #NO_APP
   1271 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1272 ; SKX-NEXT:    retq # sched: [7:1.00]
   1273 ;
   1274 ; ZNVER1-LABEL: test_vfmsubaddps_256:
   1275 ; ZNVER1:       # %bb.0:
   1276 ; ZNVER1-NEXT:    #APP
   1277 ; ZNVER1-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
   1278 ; ZNVER1-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
   1279 ; ZNVER1-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
   1280 ; ZNVER1-NEXT:    vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
   1281 ; ZNVER1-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
   1282 ; ZNVER1-NEXT:    vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
   1283 ; ZNVER1-NEXT:    #NO_APP
   1284 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   1285 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1286   tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
   1287   ret void
   1288 }
   1289 
   1290 ;
   1291 ; VFMSUB
   1292 ;
   1293 
   1294 define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
   1295 ; GENERIC-LABEL: test_vfmsubpd_128:
   1296 ; GENERIC:       # %bb.0:
   1297 ; GENERIC-NEXT:    #APP
   1298 ; GENERIC-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1299 ; GENERIC-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1300 ; GENERIC-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1301 ; GENERIC-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1302 ; GENERIC-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1303 ; GENERIC-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1304 ; GENERIC-NEXT:    #NO_APP
   1305 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1306 ;
   1307 ; HASWELL-LABEL: test_vfmsubpd_128:
   1308 ; HASWELL:       # %bb.0:
   1309 ; HASWELL-NEXT:    #APP
   1310 ; HASWELL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1311 ; HASWELL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1312 ; HASWELL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1313 ; HASWELL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
   1314 ; HASWELL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
   1315 ; HASWELL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
   1316 ; HASWELL-NEXT:    #NO_APP
   1317 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1318 ;
   1319 ; BROADWELL-LABEL: test_vfmsubpd_128:
   1320 ; BROADWELL:       # %bb.0:
   1321 ; BROADWELL-NEXT:    #APP
   1322 ; BROADWELL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1323 ; BROADWELL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1324 ; BROADWELL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1325 ; BROADWELL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1326 ; BROADWELL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1327 ; BROADWELL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1328 ; BROADWELL-NEXT:    #NO_APP
   1329 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1330 ;
   1331 ; SKYLAKE-LABEL: test_vfmsubpd_128:
   1332 ; SKYLAKE:       # %bb.0:
   1333 ; SKYLAKE-NEXT:    #APP
   1334 ; SKYLAKE-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
   1335 ; SKYLAKE-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
   1336 ; SKYLAKE-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
   1337 ; SKYLAKE-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1338 ; SKYLAKE-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1339 ; SKYLAKE-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1340 ; SKYLAKE-NEXT:    #NO_APP
   1341 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1342 ;
   1343 ; KNL-LABEL: test_vfmsubpd_128:
   1344 ; KNL:       # %bb.0:
   1345 ; KNL-NEXT:    #APP
   1346 ; KNL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1347 ; KNL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1348 ; KNL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1349 ; KNL-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
   1350 ; KNL-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
   1351 ; KNL-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
   1352 ; KNL-NEXT:    #NO_APP
   1353 ; KNL-NEXT:    retq # sched: [7:1.00]
   1354 ;
   1355 ; SKX-LABEL: test_vfmsubpd_128:
   1356 ; SKX:       # %bb.0:
   1357 ; SKX-NEXT:    #APP
   1358 ; SKX-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
   1359 ; SKX-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
   1360 ; SKX-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
   1361 ; SKX-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1362 ; SKX-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1363 ; SKX-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1364 ; SKX-NEXT:    #NO_APP
   1365 ; SKX-NEXT:    retq # sched: [7:1.00]
   1366 ;
   1367 ; ZNVER1-LABEL: test_vfmsubpd_128:
   1368 ; ZNVER1:       # %bb.0:
   1369 ; ZNVER1-NEXT:    #APP
   1370 ; ZNVER1-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1371 ; ZNVER1-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1372 ; ZNVER1-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1373 ; ZNVER1-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
   1374 ; ZNVER1-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
   1375 ; ZNVER1-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
   1376 ; ZNVER1-NEXT:    #NO_APP
   1377 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1378   tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
   1379   ret void
   1380 }
   1381 
   1382 define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
   1383 ; GENERIC-LABEL: test_vfmsubpd_256:
   1384 ; GENERIC:       # %bb.0:
   1385 ; GENERIC-NEXT:    #APP
   1386 ; GENERIC-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1387 ; GENERIC-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1388 ; GENERIC-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1389 ; GENERIC-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
   1390 ; GENERIC-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
   1391 ; GENERIC-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
   1392 ; GENERIC-NEXT:    #NO_APP
   1393 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1394 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1395 ;
   1396 ; HASWELL-LABEL: test_vfmsubpd_256:
   1397 ; HASWELL:       # %bb.0:
   1398 ; HASWELL-NEXT:    #APP
   1399 ; HASWELL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1400 ; HASWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1401 ; HASWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1402 ; HASWELL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
   1403 ; HASWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
   1404 ; HASWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
   1405 ; HASWELL-NEXT:    #NO_APP
   1406 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1407 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1408 ;
   1409 ; BROADWELL-LABEL: test_vfmsubpd_256:
   1410 ; BROADWELL:       # %bb.0:
   1411 ; BROADWELL-NEXT:    #APP
   1412 ; BROADWELL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1413 ; BROADWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1414 ; BROADWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1415 ; BROADWELL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
   1416 ; BROADWELL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
   1417 ; BROADWELL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
   1418 ; BROADWELL-NEXT:    #NO_APP
   1419 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1420 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1421 ;
   1422 ; SKYLAKE-LABEL: test_vfmsubpd_256:
   1423 ; SKYLAKE:       # %bb.0:
   1424 ; SKYLAKE-NEXT:    #APP
   1425 ; SKYLAKE-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
   1426 ; SKYLAKE-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
   1427 ; SKYLAKE-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
   1428 ; SKYLAKE-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
   1429 ; SKYLAKE-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
   1430 ; SKYLAKE-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
   1431 ; SKYLAKE-NEXT:    #NO_APP
   1432 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   1433 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1434 ;
   1435 ; KNL-LABEL: test_vfmsubpd_256:
   1436 ; KNL:       # %bb.0:
   1437 ; KNL-NEXT:    #APP
   1438 ; KNL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1439 ; KNL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1440 ; KNL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1441 ; KNL-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
   1442 ; KNL-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
   1443 ; KNL-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
   1444 ; KNL-NEXT:    #NO_APP
   1445 ; KNL-NEXT:    retq # sched: [7:1.00]
   1446 ;
   1447 ; SKX-LABEL: test_vfmsubpd_256:
   1448 ; SKX:       # %bb.0:
   1449 ; SKX-NEXT:    #APP
   1450 ; SKX-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
   1451 ; SKX-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
   1452 ; SKX-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
   1453 ; SKX-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
   1454 ; SKX-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
   1455 ; SKX-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
   1456 ; SKX-NEXT:    #NO_APP
   1457 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1458 ; SKX-NEXT:    retq # sched: [7:1.00]
   1459 ;
   1460 ; ZNVER1-LABEL: test_vfmsubpd_256:
   1461 ; ZNVER1:       # %bb.0:
   1462 ; ZNVER1-NEXT:    #APP
   1463 ; ZNVER1-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1464 ; ZNVER1-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1465 ; ZNVER1-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1466 ; ZNVER1-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
   1467 ; ZNVER1-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
   1468 ; ZNVER1-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
   1469 ; ZNVER1-NEXT:    #NO_APP
   1470 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   1471 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1472   tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
   1473   ret void
   1474 }
   1475 
   1476 define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
   1477 ; GENERIC-LABEL: test_vfmsubps_128:
   1478 ; GENERIC:       # %bb.0:
   1479 ; GENERIC-NEXT:    #APP
   1480 ; GENERIC-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1481 ; GENERIC-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1482 ; GENERIC-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1483 ; GENERIC-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1484 ; GENERIC-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1485 ; GENERIC-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1486 ; GENERIC-NEXT:    #NO_APP
   1487 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1488 ;
   1489 ; HASWELL-LABEL: test_vfmsubps_128:
   1490 ; HASWELL:       # %bb.0:
   1491 ; HASWELL-NEXT:    #APP
   1492 ; HASWELL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1493 ; HASWELL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1494 ; HASWELL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1495 ; HASWELL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
   1496 ; HASWELL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
   1497 ; HASWELL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
   1498 ; HASWELL-NEXT:    #NO_APP
   1499 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1500 ;
   1501 ; BROADWELL-LABEL: test_vfmsubps_128:
   1502 ; BROADWELL:       # %bb.0:
   1503 ; BROADWELL-NEXT:    #APP
   1504 ; BROADWELL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1505 ; BROADWELL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1506 ; BROADWELL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1507 ; BROADWELL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1508 ; BROADWELL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1509 ; BROADWELL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1510 ; BROADWELL-NEXT:    #NO_APP
   1511 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1512 ;
   1513 ; SKYLAKE-LABEL: test_vfmsubps_128:
   1514 ; SKYLAKE:       # %bb.0:
   1515 ; SKYLAKE-NEXT:    #APP
   1516 ; SKYLAKE-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
   1517 ; SKYLAKE-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
   1518 ; SKYLAKE-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
   1519 ; SKYLAKE-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1520 ; SKYLAKE-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1521 ; SKYLAKE-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1522 ; SKYLAKE-NEXT:    #NO_APP
   1523 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1524 ;
   1525 ; KNL-LABEL: test_vfmsubps_128:
   1526 ; KNL:       # %bb.0:
   1527 ; KNL-NEXT:    #APP
   1528 ; KNL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1529 ; KNL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1530 ; KNL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1531 ; KNL-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
   1532 ; KNL-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
   1533 ; KNL-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
   1534 ; KNL-NEXT:    #NO_APP
   1535 ; KNL-NEXT:    retq # sched: [7:1.00]
   1536 ;
   1537 ; SKX-LABEL: test_vfmsubps_128:
   1538 ; SKX:       # %bb.0:
   1539 ; SKX-NEXT:    #APP
   1540 ; SKX-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
   1541 ; SKX-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
   1542 ; SKX-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
   1543 ; SKX-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1544 ; SKX-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1545 ; SKX-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1546 ; SKX-NEXT:    #NO_APP
   1547 ; SKX-NEXT:    retq # sched: [7:1.00]
   1548 ;
   1549 ; ZNVER1-LABEL: test_vfmsubps_128:
   1550 ; ZNVER1:       # %bb.0:
   1551 ; ZNVER1-NEXT:    #APP
   1552 ; ZNVER1-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1553 ; ZNVER1-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1554 ; ZNVER1-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1555 ; ZNVER1-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
   1556 ; ZNVER1-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
   1557 ; ZNVER1-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
   1558 ; ZNVER1-NEXT:    #NO_APP
   1559 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1560   tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
   1561   ret void
   1562 }
   1563 
   1564 define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
   1565 ; GENERIC-LABEL: test_vfmsubps_256:
   1566 ; GENERIC:       # %bb.0:
   1567 ; GENERIC-NEXT:    #APP
   1568 ; GENERIC-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1569 ; GENERIC-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1570 ; GENERIC-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1571 ; GENERIC-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
   1572 ; GENERIC-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
   1573 ; GENERIC-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
   1574 ; GENERIC-NEXT:    #NO_APP
   1575 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1576 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1577 ;
   1578 ; HASWELL-LABEL: test_vfmsubps_256:
   1579 ; HASWELL:       # %bb.0:
   1580 ; HASWELL-NEXT:    #APP
   1581 ; HASWELL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1582 ; HASWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1583 ; HASWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1584 ; HASWELL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
   1585 ; HASWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
   1586 ; HASWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
   1587 ; HASWELL-NEXT:    #NO_APP
   1588 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1589 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1590 ;
   1591 ; BROADWELL-LABEL: test_vfmsubps_256:
   1592 ; BROADWELL:       # %bb.0:
   1593 ; BROADWELL-NEXT:    #APP
   1594 ; BROADWELL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1595 ; BROADWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1596 ; BROADWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1597 ; BROADWELL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
   1598 ; BROADWELL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
   1599 ; BROADWELL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
   1600 ; BROADWELL-NEXT:    #NO_APP
   1601 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1602 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1603 ;
   1604 ; SKYLAKE-LABEL: test_vfmsubps_256:
   1605 ; SKYLAKE:       # %bb.0:
   1606 ; SKYLAKE-NEXT:    #APP
   1607 ; SKYLAKE-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
   1608 ; SKYLAKE-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
   1609 ; SKYLAKE-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
   1610 ; SKYLAKE-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
   1611 ; SKYLAKE-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
   1612 ; SKYLAKE-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
   1613 ; SKYLAKE-NEXT:    #NO_APP
   1614 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   1615 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1616 ;
   1617 ; KNL-LABEL: test_vfmsubps_256:
   1618 ; KNL:       # %bb.0:
   1619 ; KNL-NEXT:    #APP
   1620 ; KNL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1621 ; KNL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1622 ; KNL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1623 ; KNL-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
   1624 ; KNL-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
   1625 ; KNL-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
   1626 ; KNL-NEXT:    #NO_APP
   1627 ; KNL-NEXT:    retq # sched: [7:1.00]
   1628 ;
   1629 ; SKX-LABEL: test_vfmsubps_256:
   1630 ; SKX:       # %bb.0:
   1631 ; SKX-NEXT:    #APP
   1632 ; SKX-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
   1633 ; SKX-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
   1634 ; SKX-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
   1635 ; SKX-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
   1636 ; SKX-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
   1637 ; SKX-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
   1638 ; SKX-NEXT:    #NO_APP
   1639 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1640 ; SKX-NEXT:    retq # sched: [7:1.00]
   1641 ;
   1642 ; ZNVER1-LABEL: test_vfmsubps_256:
   1643 ; ZNVER1:       # %bb.0:
   1644 ; ZNVER1-NEXT:    #APP
   1645 ; ZNVER1-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
   1646 ; ZNVER1-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
   1647 ; ZNVER1-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
   1648 ; ZNVER1-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
   1649 ; ZNVER1-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
   1650 ; ZNVER1-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
   1651 ; ZNVER1-NEXT:    #NO_APP
   1652 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   1653 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1654   tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
   1655   ret void
   1656 }
   1657 
   1658 define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
   1659 ; GENERIC-LABEL: test_vfmsubsd_128:
   1660 ; GENERIC:       # %bb.0:
   1661 ; GENERIC-NEXT:    #APP
   1662 ; GENERIC-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1663 ; GENERIC-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1664 ; GENERIC-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1665 ; GENERIC-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1666 ; GENERIC-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1667 ; GENERIC-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1668 ; GENERIC-NEXT:    #NO_APP
   1669 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1670 ;
   1671 ; HASWELL-LABEL: test_vfmsubsd_128:
   1672 ; HASWELL:       # %bb.0:
   1673 ; HASWELL-NEXT:    #APP
   1674 ; HASWELL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1675 ; HASWELL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1676 ; HASWELL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1677 ; HASWELL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1678 ; HASWELL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1679 ; HASWELL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1680 ; HASWELL-NEXT:    #NO_APP
   1681 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1682 ;
   1683 ; BROADWELL-LABEL: test_vfmsubsd_128:
   1684 ; BROADWELL:       # %bb.0:
   1685 ; BROADWELL-NEXT:    #APP
   1686 ; BROADWELL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1687 ; BROADWELL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1688 ; BROADWELL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1689 ; BROADWELL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1690 ; BROADWELL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1691 ; BROADWELL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1692 ; BROADWELL-NEXT:    #NO_APP
   1693 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1694 ;
   1695 ; SKYLAKE-LABEL: test_vfmsubsd_128:
   1696 ; SKYLAKE:       # %bb.0:
   1697 ; SKYLAKE-NEXT:    #APP
   1698 ; SKYLAKE-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
   1699 ; SKYLAKE-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
   1700 ; SKYLAKE-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
   1701 ; SKYLAKE-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
   1702 ; SKYLAKE-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
   1703 ; SKYLAKE-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
   1704 ; SKYLAKE-NEXT:    #NO_APP
   1705 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1706 ;
   1707 ; KNL-LABEL: test_vfmsubsd_128:
   1708 ; KNL:       # %bb.0:
   1709 ; KNL-NEXT:    #APP
   1710 ; KNL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1711 ; KNL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1712 ; KNL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1713 ; KNL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1714 ; KNL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1715 ; KNL-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1716 ; KNL-NEXT:    #NO_APP
   1717 ; KNL-NEXT:    retq # sched: [7:1.00]
   1718 ;
   1719 ; SKX-LABEL: test_vfmsubsd_128:
   1720 ; SKX:       # %bb.0:
   1721 ; SKX-NEXT:    #APP
   1722 ; SKX-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
   1723 ; SKX-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
   1724 ; SKX-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
   1725 ; SKX-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
   1726 ; SKX-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
   1727 ; SKX-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
   1728 ; SKX-NEXT:    #NO_APP
   1729 ; SKX-NEXT:    retq # sched: [7:1.00]
   1730 ;
   1731 ; ZNVER1-LABEL: test_vfmsubsd_128:
   1732 ; ZNVER1:       # %bb.0:
   1733 ; ZNVER1-NEXT:    #APP
   1734 ; ZNVER1-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1735 ; ZNVER1-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1736 ; ZNVER1-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1737 ; ZNVER1-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
   1738 ; ZNVER1-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
   1739 ; ZNVER1-NEXT:    vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
   1740 ; ZNVER1-NEXT:    #NO_APP
   1741 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1742   tail call void asm "vfmsub132sd $2, $1, $0 \0A\09 vfmsub213sd $2, $1, $0 \0A\09 vfmsub231sd $2, $1, $0 \0A\09 vfmsub132sd $3, $1, $0 \0A\09 vfmsub213sd $3, $1, $0 \0A\09 vfmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
   1743   ret void
   1744 }
   1745 
   1746 define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
   1747 ; GENERIC-LABEL: test_vfmsubss_128:
   1748 ; GENERIC:       # %bb.0:
   1749 ; GENERIC-NEXT:    #APP
   1750 ; GENERIC-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1751 ; GENERIC-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1752 ; GENERIC-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1753 ; GENERIC-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1754 ; GENERIC-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1755 ; GENERIC-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1756 ; GENERIC-NEXT:    #NO_APP
   1757 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1758 ;
   1759 ; HASWELL-LABEL: test_vfmsubss_128:
   1760 ; HASWELL:       # %bb.0:
   1761 ; HASWELL-NEXT:    #APP
   1762 ; HASWELL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1763 ; HASWELL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1764 ; HASWELL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1765 ; HASWELL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1766 ; HASWELL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1767 ; HASWELL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1768 ; HASWELL-NEXT:    #NO_APP
   1769 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1770 ;
   1771 ; BROADWELL-LABEL: test_vfmsubss_128:
   1772 ; BROADWELL:       # %bb.0:
   1773 ; BROADWELL-NEXT:    #APP
   1774 ; BROADWELL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1775 ; BROADWELL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1776 ; BROADWELL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1777 ; BROADWELL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1778 ; BROADWELL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1779 ; BROADWELL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1780 ; BROADWELL-NEXT:    #NO_APP
   1781 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1782 ;
   1783 ; SKYLAKE-LABEL: test_vfmsubss_128:
   1784 ; SKYLAKE:       # %bb.0:
   1785 ; SKYLAKE-NEXT:    #APP
   1786 ; SKYLAKE-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
   1787 ; SKYLAKE-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
   1788 ; SKYLAKE-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
   1789 ; SKYLAKE-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
   1790 ; SKYLAKE-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
   1791 ; SKYLAKE-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
   1792 ; SKYLAKE-NEXT:    #NO_APP
   1793 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1794 ;
   1795 ; KNL-LABEL: test_vfmsubss_128:
   1796 ; KNL:       # %bb.0:
   1797 ; KNL-NEXT:    #APP
   1798 ; KNL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1799 ; KNL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1800 ; KNL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1801 ; KNL-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
   1802 ; KNL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
   1803 ; KNL-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
   1804 ; KNL-NEXT:    #NO_APP
   1805 ; KNL-NEXT:    retq # sched: [7:1.00]
   1806 ;
   1807 ; SKX-LABEL: test_vfmsubss_128:
   1808 ; SKX:       # %bb.0:
   1809 ; SKX-NEXT:    #APP
   1810 ; SKX-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
   1811 ; SKX-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
   1812 ; SKX-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
   1813 ; SKX-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
   1814 ; SKX-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
   1815 ; SKX-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
   1816 ; SKX-NEXT:    #NO_APP
   1817 ; SKX-NEXT:    retq # sched: [7:1.00]
   1818 ;
   1819 ; ZNVER1-LABEL: test_vfmsubss_128:
   1820 ; ZNVER1:       # %bb.0:
   1821 ; ZNVER1-NEXT:    #APP
   1822 ; ZNVER1-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
   1823 ; ZNVER1-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
   1824 ; ZNVER1-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
   1825 ; ZNVER1-NEXT:    vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
   1826 ; ZNVER1-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
   1827 ; ZNVER1-NEXT:    vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
   1828 ; ZNVER1-NEXT:    #NO_APP
   1829 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1830   tail call void asm "vfmsub132ss $2, $1, $0 \0A\09 vfmsub213ss $2, $1, $0 \0A\09 vfmsub231ss $2, $1, $0 \0A\09 vfmsub132ss $3, $1, $0 \0A\09 vfmsub213ss $3, $1, $0 \0A\09 vfmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
   1831   ret void
   1832 }
   1833 
   1834 ;
   1835 ; VFNMADD
   1836 ;
   1837 
   1838 define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
   1839 ; GENERIC-LABEL: test_vfnmaddpd_128:
   1840 ; GENERIC:       # %bb.0:
   1841 ; GENERIC-NEXT:    #APP
   1842 ; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   1843 ; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   1844 ; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   1845 ; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   1846 ; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   1847 ; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   1848 ; GENERIC-NEXT:    #NO_APP
   1849 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1850 ;
   1851 ; HASWELL-LABEL: test_vfnmaddpd_128:
   1852 ; HASWELL:       # %bb.0:
   1853 ; HASWELL-NEXT:    #APP
   1854 ; HASWELL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   1855 ; HASWELL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   1856 ; HASWELL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   1857 ; HASWELL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
   1858 ; HASWELL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
   1859 ; HASWELL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
   1860 ; HASWELL-NEXT:    #NO_APP
   1861 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1862 ;
   1863 ; BROADWELL-LABEL: test_vfnmaddpd_128:
   1864 ; BROADWELL:       # %bb.0:
   1865 ; BROADWELL-NEXT:    #APP
   1866 ; BROADWELL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   1867 ; BROADWELL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   1868 ; BROADWELL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   1869 ; BROADWELL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   1870 ; BROADWELL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   1871 ; BROADWELL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   1872 ; BROADWELL-NEXT:    #NO_APP
   1873 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1874 ;
   1875 ; SKYLAKE-LABEL: test_vfnmaddpd_128:
   1876 ; SKYLAKE:       # %bb.0:
   1877 ; SKYLAKE-NEXT:    #APP
   1878 ; SKYLAKE-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
   1879 ; SKYLAKE-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
   1880 ; SKYLAKE-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
   1881 ; SKYLAKE-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   1882 ; SKYLAKE-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   1883 ; SKYLAKE-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   1884 ; SKYLAKE-NEXT:    #NO_APP
   1885 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1886 ;
   1887 ; KNL-LABEL: test_vfnmaddpd_128:
   1888 ; KNL:       # %bb.0:
   1889 ; KNL-NEXT:    #APP
   1890 ; KNL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   1891 ; KNL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   1892 ; KNL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   1893 ; KNL-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
   1894 ; KNL-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
   1895 ; KNL-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
   1896 ; KNL-NEXT:    #NO_APP
   1897 ; KNL-NEXT:    retq # sched: [7:1.00]
   1898 ;
   1899 ; SKX-LABEL: test_vfnmaddpd_128:
   1900 ; SKX:       # %bb.0:
   1901 ; SKX-NEXT:    #APP
   1902 ; SKX-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
   1903 ; SKX-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
   1904 ; SKX-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
   1905 ; SKX-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   1906 ; SKX-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   1907 ; SKX-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   1908 ; SKX-NEXT:    #NO_APP
   1909 ; SKX-NEXT:    retq # sched: [7:1.00]
   1910 ;
   1911 ; ZNVER1-LABEL: test_vfnmaddpd_128:
   1912 ; ZNVER1:       # %bb.0:
   1913 ; ZNVER1-NEXT:    #APP
   1914 ; ZNVER1-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   1915 ; ZNVER1-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   1916 ; ZNVER1-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   1917 ; ZNVER1-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
   1918 ; ZNVER1-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
   1919 ; ZNVER1-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
   1920 ; ZNVER1-NEXT:    #NO_APP
   1921 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1922   tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
   1923   ret void
   1924 }
   1925 
   1926 define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
   1927 ; GENERIC-LABEL: test_vfnmaddpd_256:
   1928 ; GENERIC:       # %bb.0:
   1929 ; GENERIC-NEXT:    #APP
   1930 ; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   1931 ; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   1932 ; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   1933 ; GENERIC-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
   1934 ; GENERIC-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
   1935 ; GENERIC-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
   1936 ; GENERIC-NEXT:    #NO_APP
   1937 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1938 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1939 ;
   1940 ; HASWELL-LABEL: test_vfnmaddpd_256:
   1941 ; HASWELL:       # %bb.0:
   1942 ; HASWELL-NEXT:    #APP
   1943 ; HASWELL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   1944 ; HASWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   1945 ; HASWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   1946 ; HASWELL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
   1947 ; HASWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
   1948 ; HASWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
   1949 ; HASWELL-NEXT:    #NO_APP
   1950 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1951 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1952 ;
   1953 ; BROADWELL-LABEL: test_vfnmaddpd_256:
   1954 ; BROADWELL:       # %bb.0:
   1955 ; BROADWELL-NEXT:    #APP
   1956 ; BROADWELL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   1957 ; BROADWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   1958 ; BROADWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   1959 ; BROADWELL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
   1960 ; BROADWELL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
   1961 ; BROADWELL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
   1962 ; BROADWELL-NEXT:    #NO_APP
   1963 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1964 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1965 ;
   1966 ; SKYLAKE-LABEL: test_vfnmaddpd_256:
   1967 ; SKYLAKE:       # %bb.0:
   1968 ; SKYLAKE-NEXT:    #APP
   1969 ; SKYLAKE-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
   1970 ; SKYLAKE-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
   1971 ; SKYLAKE-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
   1972 ; SKYLAKE-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
   1973 ; SKYLAKE-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
   1974 ; SKYLAKE-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
   1975 ; SKYLAKE-NEXT:    #NO_APP
   1976 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   1977 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1978 ;
   1979 ; KNL-LABEL: test_vfnmaddpd_256:
   1980 ; KNL:       # %bb.0:
   1981 ; KNL-NEXT:    #APP
   1982 ; KNL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   1983 ; KNL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   1984 ; KNL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   1985 ; KNL-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
   1986 ; KNL-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
   1987 ; KNL-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
   1988 ; KNL-NEXT:    #NO_APP
   1989 ; KNL-NEXT:    retq # sched: [7:1.00]
   1990 ;
   1991 ; SKX-LABEL: test_vfnmaddpd_256:
   1992 ; SKX:       # %bb.0:
   1993 ; SKX-NEXT:    #APP
   1994 ; SKX-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
   1995 ; SKX-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
   1996 ; SKX-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
   1997 ; SKX-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
   1998 ; SKX-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
   1999 ; SKX-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
   2000 ; SKX-NEXT:    #NO_APP
   2001 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   2002 ; SKX-NEXT:    retq # sched: [7:1.00]
   2003 ;
   2004 ; ZNVER1-LABEL: test_vfnmaddpd_256:
   2005 ; ZNVER1:       # %bb.0:
   2006 ; ZNVER1-NEXT:    #APP
   2007 ; ZNVER1-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   2008 ; ZNVER1-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   2009 ; ZNVER1-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   2010 ; ZNVER1-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
   2011 ; ZNVER1-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
   2012 ; ZNVER1-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
   2013 ; ZNVER1-NEXT:    #NO_APP
   2014 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   2015 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2016   tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
   2017   ret void
   2018 }
   2019 
   2020 define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
   2021 ; GENERIC-LABEL: test_vfnmaddps_128:
   2022 ; GENERIC:       # %bb.0:
   2023 ; GENERIC-NEXT:    #APP
   2024 ; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2025 ; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2026 ; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2027 ; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2028 ; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2029 ; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2030 ; GENERIC-NEXT:    #NO_APP
   2031 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2032 ;
   2033 ; HASWELL-LABEL: test_vfnmaddps_128:
   2034 ; HASWELL:       # %bb.0:
   2035 ; HASWELL-NEXT:    #APP
   2036 ; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2037 ; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2038 ; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2039 ; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
   2040 ; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
   2041 ; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
   2042 ; HASWELL-NEXT:    #NO_APP
   2043 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2044 ;
   2045 ; BROADWELL-LABEL: test_vfnmaddps_128:
   2046 ; BROADWELL:       # %bb.0:
   2047 ; BROADWELL-NEXT:    #APP
   2048 ; BROADWELL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2049 ; BROADWELL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2050 ; BROADWELL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2051 ; BROADWELL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2052 ; BROADWELL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2053 ; BROADWELL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2054 ; BROADWELL-NEXT:    #NO_APP
   2055 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2056 ;
   2057 ; SKYLAKE-LABEL: test_vfnmaddps_128:
   2058 ; SKYLAKE:       # %bb.0:
   2059 ; SKYLAKE-NEXT:    #APP
   2060 ; SKYLAKE-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
   2061 ; SKYLAKE-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
   2062 ; SKYLAKE-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
   2063 ; SKYLAKE-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2064 ; SKYLAKE-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2065 ; SKYLAKE-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2066 ; SKYLAKE-NEXT:    #NO_APP
   2067 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2068 ;
   2069 ; KNL-LABEL: test_vfnmaddps_128:
   2070 ; KNL:       # %bb.0:
   2071 ; KNL-NEXT:    #APP
   2072 ; KNL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2073 ; KNL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2074 ; KNL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2075 ; KNL-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
   2076 ; KNL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
   2077 ; KNL-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
   2078 ; KNL-NEXT:    #NO_APP
   2079 ; KNL-NEXT:    retq # sched: [7:1.00]
   2080 ;
   2081 ; SKX-LABEL: test_vfnmaddps_128:
   2082 ; SKX:       # %bb.0:
   2083 ; SKX-NEXT:    #APP
   2084 ; SKX-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
   2085 ; SKX-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
   2086 ; SKX-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
   2087 ; SKX-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2088 ; SKX-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2089 ; SKX-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2090 ; SKX-NEXT:    #NO_APP
   2091 ; SKX-NEXT:    retq # sched: [7:1.00]
   2092 ;
   2093 ; ZNVER1-LABEL: test_vfnmaddps_128:
   2094 ; ZNVER1:       # %bb.0:
   2095 ; ZNVER1-NEXT:    #APP
   2096 ; ZNVER1-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2097 ; ZNVER1-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2098 ; ZNVER1-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2099 ; ZNVER1-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
   2100 ; ZNVER1-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
   2101 ; ZNVER1-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
   2102 ; ZNVER1-NEXT:    #NO_APP
   2103 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2104   tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
   2105   ret void
   2106 }
   2107 
   2108 define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
   2109 ; GENERIC-LABEL: test_vfnmaddps_256:
   2110 ; GENERIC:       # %bb.0:
   2111 ; GENERIC-NEXT:    #APP
   2112 ; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   2113 ; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   2114 ; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   2115 ; GENERIC-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
   2116 ; GENERIC-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
   2117 ; GENERIC-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
   2118 ; GENERIC-NEXT:    #NO_APP
   2119 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   2120 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2121 ;
   2122 ; HASWELL-LABEL: test_vfnmaddps_256:
   2123 ; HASWELL:       # %bb.0:
   2124 ; HASWELL-NEXT:    #APP
   2125 ; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   2126 ; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   2127 ; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   2128 ; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
   2129 ; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
   2130 ; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
   2131 ; HASWELL-NEXT:    #NO_APP
   2132 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2133 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2134 ;
   2135 ; BROADWELL-LABEL: test_vfnmaddps_256:
   2136 ; BROADWELL:       # %bb.0:
   2137 ; BROADWELL-NEXT:    #APP
   2138 ; BROADWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   2139 ; BROADWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   2140 ; BROADWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   2141 ; BROADWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
   2142 ; BROADWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
   2143 ; BROADWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
   2144 ; BROADWELL-NEXT:    #NO_APP
   2145 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2146 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2147 ;
   2148 ; SKYLAKE-LABEL: test_vfnmaddps_256:
   2149 ; SKYLAKE:       # %bb.0:
   2150 ; SKYLAKE-NEXT:    #APP
   2151 ; SKYLAKE-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
   2152 ; SKYLAKE-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
   2153 ; SKYLAKE-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
   2154 ; SKYLAKE-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
   2155 ; SKYLAKE-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
   2156 ; SKYLAKE-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
   2157 ; SKYLAKE-NEXT:    #NO_APP
   2158 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   2159 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2160 ;
   2161 ; KNL-LABEL: test_vfnmaddps_256:
   2162 ; KNL:       # %bb.0:
   2163 ; KNL-NEXT:    #APP
   2164 ; KNL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   2165 ; KNL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   2166 ; KNL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   2167 ; KNL-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
   2168 ; KNL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
   2169 ; KNL-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
   2170 ; KNL-NEXT:    #NO_APP
   2171 ; KNL-NEXT:    retq # sched: [7:1.00]
   2172 ;
   2173 ; SKX-LABEL: test_vfnmaddps_256:
   2174 ; SKX:       # %bb.0:
   2175 ; SKX-NEXT:    #APP
   2176 ; SKX-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
   2177 ; SKX-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
   2178 ; SKX-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
   2179 ; SKX-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
   2180 ; SKX-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
   2181 ; SKX-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
   2182 ; SKX-NEXT:    #NO_APP
   2183 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   2184 ; SKX-NEXT:    retq # sched: [7:1.00]
   2185 ;
   2186 ; ZNVER1-LABEL: test_vfnmaddps_256:
   2187 ; ZNVER1:       # %bb.0:
   2188 ; ZNVER1-NEXT:    #APP
   2189 ; ZNVER1-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
   2190 ; ZNVER1-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
   2191 ; ZNVER1-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
   2192 ; ZNVER1-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
   2193 ; ZNVER1-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
   2194 ; ZNVER1-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
   2195 ; ZNVER1-NEXT:    #NO_APP
   2196 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   2197 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2198   tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
   2199   ret void
   2200 }
   2201 
   2202 define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
   2203 ; GENERIC-LABEL: test_vfnmaddsd_128:
   2204 ; GENERIC:       # %bb.0:
   2205 ; GENERIC-NEXT:    #APP
   2206 ; GENERIC-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2207 ; GENERIC-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2208 ; GENERIC-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2209 ; GENERIC-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2210 ; GENERIC-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2211 ; GENERIC-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2212 ; GENERIC-NEXT:    #NO_APP
   2213 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2214 ;
   2215 ; HASWELL-LABEL: test_vfnmaddsd_128:
   2216 ; HASWELL:       # %bb.0:
   2217 ; HASWELL-NEXT:    #APP
   2218 ; HASWELL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2219 ; HASWELL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2220 ; HASWELL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2221 ; HASWELL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2222 ; HASWELL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2223 ; HASWELL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2224 ; HASWELL-NEXT:    #NO_APP
   2225 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2226 ;
   2227 ; BROADWELL-LABEL: test_vfnmaddsd_128:
   2228 ; BROADWELL:       # %bb.0:
   2229 ; BROADWELL-NEXT:    #APP
   2230 ; BROADWELL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2231 ; BROADWELL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2232 ; BROADWELL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2233 ; BROADWELL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2234 ; BROADWELL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2235 ; BROADWELL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2236 ; BROADWELL-NEXT:    #NO_APP
   2237 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2238 ;
   2239 ; SKYLAKE-LABEL: test_vfnmaddsd_128:
   2240 ; SKYLAKE:       # %bb.0:
   2241 ; SKYLAKE-NEXT:    #APP
   2242 ; SKYLAKE-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
   2243 ; SKYLAKE-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
   2244 ; SKYLAKE-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
   2245 ; SKYLAKE-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
   2246 ; SKYLAKE-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
   2247 ; SKYLAKE-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
   2248 ; SKYLAKE-NEXT:    #NO_APP
   2249 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2250 ;
   2251 ; KNL-LABEL: test_vfnmaddsd_128:
   2252 ; KNL:       # %bb.0:
   2253 ; KNL-NEXT:    #APP
   2254 ; KNL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2255 ; KNL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2256 ; KNL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2257 ; KNL-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2258 ; KNL-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2259 ; KNL-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2260 ; KNL-NEXT:    #NO_APP
   2261 ; KNL-NEXT:    retq # sched: [7:1.00]
   2262 ;
   2263 ; SKX-LABEL: test_vfnmaddsd_128:
   2264 ; SKX:       # %bb.0:
   2265 ; SKX-NEXT:    #APP
   2266 ; SKX-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
   2267 ; SKX-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
   2268 ; SKX-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
   2269 ; SKX-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
   2270 ; SKX-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
   2271 ; SKX-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
   2272 ; SKX-NEXT:    #NO_APP
   2273 ; SKX-NEXT:    retq # sched: [7:1.00]
   2274 ;
   2275 ; ZNVER1-LABEL: test_vfnmaddsd_128:
   2276 ; ZNVER1:       # %bb.0:
   2277 ; ZNVER1-NEXT:    #APP
   2278 ; ZNVER1-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2279 ; ZNVER1-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2280 ; ZNVER1-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2281 ; ZNVER1-NEXT:    vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
   2282 ; ZNVER1-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
   2283 ; ZNVER1-NEXT:    vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
   2284 ; ZNVER1-NEXT:    #NO_APP
   2285 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2286   tail call void asm "vfnmadd132sd $2, $1, $0 \0A\09 vfnmadd213sd $2, $1, $0 \0A\09 vfnmadd231sd $2, $1, $0 \0A\09 vfnmadd132sd $3, $1, $0 \0A\09 vfnmadd213sd $3, $1, $0 \0A\09 vfnmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
   2287   ret void
   2288 }
   2289 
   2290 define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
   2291 ; GENERIC-LABEL: test_vfnmaddss_128:
   2292 ; GENERIC:       # %bb.0:
   2293 ; GENERIC-NEXT:    #APP
   2294 ; GENERIC-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2295 ; GENERIC-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2296 ; GENERIC-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2297 ; GENERIC-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2298 ; GENERIC-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2299 ; GENERIC-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2300 ; GENERIC-NEXT:    #NO_APP
   2301 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2302 ;
   2303 ; HASWELL-LABEL: test_vfnmaddss_128:
   2304 ; HASWELL:       # %bb.0:
   2305 ; HASWELL-NEXT:    #APP
   2306 ; HASWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2307 ; HASWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2308 ; HASWELL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2309 ; HASWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2310 ; HASWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2311 ; HASWELL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2312 ; HASWELL-NEXT:    #NO_APP
   2313 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2314 ;
   2315 ; BROADWELL-LABEL: test_vfnmaddss_128:
   2316 ; BROADWELL:       # %bb.0:
   2317 ; BROADWELL-NEXT:    #APP
   2318 ; BROADWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2319 ; BROADWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2320 ; BROADWELL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2321 ; BROADWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2322 ; BROADWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2323 ; BROADWELL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2324 ; BROADWELL-NEXT:    #NO_APP
   2325 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2326 ;
   2327 ; SKYLAKE-LABEL: test_vfnmaddss_128:
   2328 ; SKYLAKE:       # %bb.0:
   2329 ; SKYLAKE-NEXT:    #APP
   2330 ; SKYLAKE-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
   2331 ; SKYLAKE-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
   2332 ; SKYLAKE-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
   2333 ; SKYLAKE-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
   2334 ; SKYLAKE-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
   2335 ; SKYLAKE-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
   2336 ; SKYLAKE-NEXT:    #NO_APP
   2337 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2338 ;
   2339 ; KNL-LABEL: test_vfnmaddss_128:
   2340 ; KNL:       # %bb.0:
   2341 ; KNL-NEXT:    #APP
   2342 ; KNL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2343 ; KNL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2344 ; KNL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2345 ; KNL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
   2346 ; KNL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
   2347 ; KNL-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
   2348 ; KNL-NEXT:    #NO_APP
   2349 ; KNL-NEXT:    retq # sched: [7:1.00]
   2350 ;
   2351 ; SKX-LABEL: test_vfnmaddss_128:
   2352 ; SKX:       # %bb.0:
   2353 ; SKX-NEXT:    #APP
   2354 ; SKX-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
   2355 ; SKX-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
   2356 ; SKX-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
   2357 ; SKX-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
   2358 ; SKX-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
   2359 ; SKX-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
   2360 ; SKX-NEXT:    #NO_APP
   2361 ; SKX-NEXT:    retq # sched: [7:1.00]
   2362 ;
   2363 ; ZNVER1-LABEL: test_vfnmaddss_128:
   2364 ; ZNVER1:       # %bb.0:
   2365 ; ZNVER1-NEXT:    #APP
   2366 ; ZNVER1-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
   2367 ; ZNVER1-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
   2368 ; ZNVER1-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
   2369 ; ZNVER1-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
   2370 ; ZNVER1-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
   2371 ; ZNVER1-NEXT:    vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
   2372 ; ZNVER1-NEXT:    #NO_APP
   2373 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2374   tail call void asm "vfnmadd132ss $2, $1, $0 \0A\09 vfnmadd213ss $2, $1, $0 \0A\09 vfnmadd231ss $2, $1, $0 \0A\09 vfnmadd132ss $3, $1, $0 \0A\09 vfnmadd213ss $3, $1, $0 \0A\09 vfnmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
   2375   ret void
   2376 }
   2377 
   2378 ;
   2379 ; VFNMSUB
   2380 ;
   2381 
   2382 define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
   2383 ; GENERIC-LABEL: test_vfnmsubpd_128:
   2384 ; GENERIC:       # %bb.0:
   2385 ; GENERIC-NEXT:    #APP
   2386 ; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2387 ; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2388 ; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2389 ; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2390 ; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2391 ; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2392 ; GENERIC-NEXT:    #NO_APP
   2393 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2394 ;
   2395 ; HASWELL-LABEL: test_vfnmsubpd_128:
   2396 ; HASWELL:       # %bb.0:
   2397 ; HASWELL-NEXT:    #APP
   2398 ; HASWELL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2399 ; HASWELL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2400 ; HASWELL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2401 ; HASWELL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
   2402 ; HASWELL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
   2403 ; HASWELL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
   2404 ; HASWELL-NEXT:    #NO_APP
   2405 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2406 ;
   2407 ; BROADWELL-LABEL: test_vfnmsubpd_128:
   2408 ; BROADWELL:       # %bb.0:
   2409 ; BROADWELL-NEXT:    #APP
   2410 ; BROADWELL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2411 ; BROADWELL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2412 ; BROADWELL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2413 ; BROADWELL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2414 ; BROADWELL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2415 ; BROADWELL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2416 ; BROADWELL-NEXT:    #NO_APP
   2417 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2418 ;
   2419 ; SKYLAKE-LABEL: test_vfnmsubpd_128:
   2420 ; SKYLAKE:       # %bb.0:
   2421 ; SKYLAKE-NEXT:    #APP
   2422 ; SKYLAKE-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
   2423 ; SKYLAKE-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
   2424 ; SKYLAKE-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
   2425 ; SKYLAKE-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2426 ; SKYLAKE-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2427 ; SKYLAKE-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2428 ; SKYLAKE-NEXT:    #NO_APP
   2429 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2430 ;
   2431 ; KNL-LABEL: test_vfnmsubpd_128:
   2432 ; KNL:       # %bb.0:
   2433 ; KNL-NEXT:    #APP
   2434 ; KNL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2435 ; KNL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2436 ; KNL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2437 ; KNL-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
   2438 ; KNL-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
   2439 ; KNL-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
   2440 ; KNL-NEXT:    #NO_APP
   2441 ; KNL-NEXT:    retq # sched: [7:1.00]
   2442 ;
   2443 ; SKX-LABEL: test_vfnmsubpd_128:
   2444 ; SKX:       # %bb.0:
   2445 ; SKX-NEXT:    #APP
   2446 ; SKX-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
   2447 ; SKX-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
   2448 ; SKX-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
   2449 ; SKX-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2450 ; SKX-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2451 ; SKX-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2452 ; SKX-NEXT:    #NO_APP
   2453 ; SKX-NEXT:    retq # sched: [7:1.00]
   2454 ;
   2455 ; ZNVER1-LABEL: test_vfnmsubpd_128:
   2456 ; ZNVER1:       # %bb.0:
   2457 ; ZNVER1-NEXT:    #APP
   2458 ; ZNVER1-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2459 ; ZNVER1-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2460 ; ZNVER1-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2461 ; ZNVER1-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
   2462 ; ZNVER1-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
   2463 ; ZNVER1-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
   2464 ; ZNVER1-NEXT:    #NO_APP
   2465 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2466   tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
   2467   ret void
   2468 }
   2469 
   2470 define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
   2471 ; GENERIC-LABEL: test_vfnmsubpd_256:
   2472 ; GENERIC:       # %bb.0:
   2473 ; GENERIC-NEXT:    #APP
   2474 ; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2475 ; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2476 ; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2477 ; GENERIC-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
   2478 ; GENERIC-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
   2479 ; GENERIC-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
   2480 ; GENERIC-NEXT:    #NO_APP
   2481 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   2482 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2483 ;
   2484 ; HASWELL-LABEL: test_vfnmsubpd_256:
   2485 ; HASWELL:       # %bb.0:
   2486 ; HASWELL-NEXT:    #APP
   2487 ; HASWELL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2488 ; HASWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2489 ; HASWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2490 ; HASWELL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
   2491 ; HASWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
   2492 ; HASWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
   2493 ; HASWELL-NEXT:    #NO_APP
   2494 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2495 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2496 ;
   2497 ; BROADWELL-LABEL: test_vfnmsubpd_256:
   2498 ; BROADWELL:       # %bb.0:
   2499 ; BROADWELL-NEXT:    #APP
   2500 ; BROADWELL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2501 ; BROADWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2502 ; BROADWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2503 ; BROADWELL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
   2504 ; BROADWELL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
   2505 ; BROADWELL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
   2506 ; BROADWELL-NEXT:    #NO_APP
   2507 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2508 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2509 ;
   2510 ; SKYLAKE-LABEL: test_vfnmsubpd_256:
   2511 ; SKYLAKE:       # %bb.0:
   2512 ; SKYLAKE-NEXT:    #APP
   2513 ; SKYLAKE-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
   2514 ; SKYLAKE-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
   2515 ; SKYLAKE-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
   2516 ; SKYLAKE-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
   2517 ; SKYLAKE-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
   2518 ; SKYLAKE-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
   2519 ; SKYLAKE-NEXT:    #NO_APP
   2520 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   2521 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2522 ;
   2523 ; KNL-LABEL: test_vfnmsubpd_256:
   2524 ; KNL:       # %bb.0:
   2525 ; KNL-NEXT:    #APP
   2526 ; KNL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2527 ; KNL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2528 ; KNL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2529 ; KNL-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
   2530 ; KNL-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
   2531 ; KNL-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
   2532 ; KNL-NEXT:    #NO_APP
   2533 ; KNL-NEXT:    retq # sched: [7:1.00]
   2534 ;
   2535 ; SKX-LABEL: test_vfnmsubpd_256:
   2536 ; SKX:       # %bb.0:
   2537 ; SKX-NEXT:    #APP
   2538 ; SKX-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
   2539 ; SKX-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
   2540 ; SKX-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
   2541 ; SKX-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
   2542 ; SKX-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
   2543 ; SKX-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
   2544 ; SKX-NEXT:    #NO_APP
   2545 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   2546 ; SKX-NEXT:    retq # sched: [7:1.00]
   2547 ;
   2548 ; ZNVER1-LABEL: test_vfnmsubpd_256:
   2549 ; ZNVER1:       # %bb.0:
   2550 ; ZNVER1-NEXT:    #APP
   2551 ; ZNVER1-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2552 ; ZNVER1-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2553 ; ZNVER1-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2554 ; ZNVER1-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
   2555 ; ZNVER1-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
   2556 ; ZNVER1-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
   2557 ; ZNVER1-NEXT:    #NO_APP
   2558 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   2559 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2560   tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
   2561   ret void
   2562 }
   2563 
   2564 define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
   2565 ; GENERIC-LABEL: test_vfnmsubps_128:
   2566 ; GENERIC:       # %bb.0:
   2567 ; GENERIC-NEXT:    #APP
   2568 ; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2569 ; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2570 ; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2571 ; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2572 ; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2573 ; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2574 ; GENERIC-NEXT:    #NO_APP
   2575 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2576 ;
   2577 ; HASWELL-LABEL: test_vfnmsubps_128:
   2578 ; HASWELL:       # %bb.0:
   2579 ; HASWELL-NEXT:    #APP
   2580 ; HASWELL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2581 ; HASWELL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2582 ; HASWELL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2583 ; HASWELL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
   2584 ; HASWELL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
   2585 ; HASWELL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
   2586 ; HASWELL-NEXT:    #NO_APP
   2587 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2588 ;
   2589 ; BROADWELL-LABEL: test_vfnmsubps_128:
   2590 ; BROADWELL:       # %bb.0:
   2591 ; BROADWELL-NEXT:    #APP
   2592 ; BROADWELL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2593 ; BROADWELL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2594 ; BROADWELL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2595 ; BROADWELL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2596 ; BROADWELL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2597 ; BROADWELL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2598 ; BROADWELL-NEXT:    #NO_APP
   2599 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2600 ;
   2601 ; SKYLAKE-LABEL: test_vfnmsubps_128:
   2602 ; SKYLAKE:       # %bb.0:
   2603 ; SKYLAKE-NEXT:    #APP
   2604 ; SKYLAKE-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
   2605 ; SKYLAKE-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
   2606 ; SKYLAKE-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
   2607 ; SKYLAKE-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2608 ; SKYLAKE-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2609 ; SKYLAKE-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2610 ; SKYLAKE-NEXT:    #NO_APP
   2611 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2612 ;
   2613 ; KNL-LABEL: test_vfnmsubps_128:
   2614 ; KNL:       # %bb.0:
   2615 ; KNL-NEXT:    #APP
   2616 ; KNL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2617 ; KNL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2618 ; KNL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2619 ; KNL-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
   2620 ; KNL-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
   2621 ; KNL-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
   2622 ; KNL-NEXT:    #NO_APP
   2623 ; KNL-NEXT:    retq # sched: [7:1.00]
   2624 ;
   2625 ; SKX-LABEL: test_vfnmsubps_128:
   2626 ; SKX:       # %bb.0:
   2627 ; SKX-NEXT:    #APP
   2628 ; SKX-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
   2629 ; SKX-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
   2630 ; SKX-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
   2631 ; SKX-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2632 ; SKX-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2633 ; SKX-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2634 ; SKX-NEXT:    #NO_APP
   2635 ; SKX-NEXT:    retq # sched: [7:1.00]
   2636 ;
   2637 ; ZNVER1-LABEL: test_vfnmsubps_128:
   2638 ; ZNVER1:       # %bb.0:
   2639 ; ZNVER1-NEXT:    #APP
   2640 ; ZNVER1-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2641 ; ZNVER1-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2642 ; ZNVER1-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2643 ; ZNVER1-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
   2644 ; ZNVER1-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
   2645 ; ZNVER1-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
   2646 ; ZNVER1-NEXT:    #NO_APP
   2647 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2648   tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
   2649   ret void
   2650 }
   2651 
   2652 define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
   2653 ; GENERIC-LABEL: test_vfnmsubps_256:
   2654 ; GENERIC:       # %bb.0:
   2655 ; GENERIC-NEXT:    #APP
   2656 ; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2657 ; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2658 ; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2659 ; GENERIC-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
   2660 ; GENERIC-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
   2661 ; GENERIC-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
   2662 ; GENERIC-NEXT:    #NO_APP
   2663 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   2664 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2665 ;
   2666 ; HASWELL-LABEL: test_vfnmsubps_256:
   2667 ; HASWELL:       # %bb.0:
   2668 ; HASWELL-NEXT:    #APP
   2669 ; HASWELL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2670 ; HASWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2671 ; HASWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2672 ; HASWELL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
   2673 ; HASWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
   2674 ; HASWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
   2675 ; HASWELL-NEXT:    #NO_APP
   2676 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2677 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2678 ;
   2679 ; BROADWELL-LABEL: test_vfnmsubps_256:
   2680 ; BROADWELL:       # %bb.0:
   2681 ; BROADWELL-NEXT:    #APP
   2682 ; BROADWELL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2683 ; BROADWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2684 ; BROADWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2685 ; BROADWELL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
   2686 ; BROADWELL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
   2687 ; BROADWELL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
   2688 ; BROADWELL-NEXT:    #NO_APP
   2689 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2690 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2691 ;
   2692 ; SKYLAKE-LABEL: test_vfnmsubps_256:
   2693 ; SKYLAKE:       # %bb.0:
   2694 ; SKYLAKE-NEXT:    #APP
   2695 ; SKYLAKE-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
   2696 ; SKYLAKE-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
   2697 ; SKYLAKE-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
   2698 ; SKYLAKE-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
   2699 ; SKYLAKE-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
   2700 ; SKYLAKE-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
   2701 ; SKYLAKE-NEXT:    #NO_APP
   2702 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   2703 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2704 ;
   2705 ; KNL-LABEL: test_vfnmsubps_256:
   2706 ; KNL:       # %bb.0:
   2707 ; KNL-NEXT:    #APP
   2708 ; KNL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2709 ; KNL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2710 ; KNL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2711 ; KNL-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
   2712 ; KNL-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
   2713 ; KNL-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
   2714 ; KNL-NEXT:    #NO_APP
   2715 ; KNL-NEXT:    retq # sched: [7:1.00]
   2716 ;
   2717 ; SKX-LABEL: test_vfnmsubps_256:
   2718 ; SKX:       # %bb.0:
   2719 ; SKX-NEXT:    #APP
   2720 ; SKX-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
   2721 ; SKX-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
   2722 ; SKX-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
   2723 ; SKX-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
   2724 ; SKX-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
   2725 ; SKX-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
   2726 ; SKX-NEXT:    #NO_APP
   2727 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   2728 ; SKX-NEXT:    retq # sched: [7:1.00]
   2729 ;
   2730 ; ZNVER1-LABEL: test_vfnmsubps_256:
   2731 ; ZNVER1:       # %bb.0:
   2732 ; ZNVER1-NEXT:    #APP
   2733 ; ZNVER1-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
   2734 ; ZNVER1-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
   2735 ; ZNVER1-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
   2736 ; ZNVER1-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
   2737 ; ZNVER1-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
   2738 ; ZNVER1-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
   2739 ; ZNVER1-NEXT:    #NO_APP
   2740 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   2741 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2742   tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
   2743   ret void
   2744 }
   2745 
   2746 define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
   2747 ; GENERIC-LABEL: test_vfnmsubsd_128:
   2748 ; GENERIC:       # %bb.0:
   2749 ; GENERIC-NEXT:    #APP
   2750 ; GENERIC-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2751 ; GENERIC-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2752 ; GENERIC-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2753 ; GENERIC-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2754 ; GENERIC-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2755 ; GENERIC-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2756 ; GENERIC-NEXT:    #NO_APP
   2757 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2758 ;
   2759 ; HASWELL-LABEL: test_vfnmsubsd_128:
   2760 ; HASWELL:       # %bb.0:
   2761 ; HASWELL-NEXT:    #APP
   2762 ; HASWELL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2763 ; HASWELL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2764 ; HASWELL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2765 ; HASWELL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2766 ; HASWELL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2767 ; HASWELL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2768 ; HASWELL-NEXT:    #NO_APP
   2769 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2770 ;
   2771 ; BROADWELL-LABEL: test_vfnmsubsd_128:
   2772 ; BROADWELL:       # %bb.0:
   2773 ; BROADWELL-NEXT:    #APP
   2774 ; BROADWELL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2775 ; BROADWELL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2776 ; BROADWELL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2777 ; BROADWELL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2778 ; BROADWELL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2779 ; BROADWELL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2780 ; BROADWELL-NEXT:    #NO_APP
   2781 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2782 ;
   2783 ; SKYLAKE-LABEL: test_vfnmsubsd_128:
   2784 ; SKYLAKE:       # %bb.0:
   2785 ; SKYLAKE-NEXT:    #APP
   2786 ; SKYLAKE-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
   2787 ; SKYLAKE-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
   2788 ; SKYLAKE-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
   2789 ; SKYLAKE-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
   2790 ; SKYLAKE-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
   2791 ; SKYLAKE-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
   2792 ; SKYLAKE-NEXT:    #NO_APP
   2793 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2794 ;
   2795 ; KNL-LABEL: test_vfnmsubsd_128:
   2796 ; KNL:       # %bb.0:
   2797 ; KNL-NEXT:    #APP
   2798 ; KNL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2799 ; KNL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2800 ; KNL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2801 ; KNL-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2802 ; KNL-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2803 ; KNL-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2804 ; KNL-NEXT:    #NO_APP
   2805 ; KNL-NEXT:    retq # sched: [7:1.00]
   2806 ;
   2807 ; SKX-LABEL: test_vfnmsubsd_128:
   2808 ; SKX:       # %bb.0:
   2809 ; SKX-NEXT:    #APP
   2810 ; SKX-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
   2811 ; SKX-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
   2812 ; SKX-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
   2813 ; SKX-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
   2814 ; SKX-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
   2815 ; SKX-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
   2816 ; SKX-NEXT:    #NO_APP
   2817 ; SKX-NEXT:    retq # sched: [7:1.00]
   2818 ;
   2819 ; ZNVER1-LABEL: test_vfnmsubsd_128:
   2820 ; ZNVER1:       # %bb.0:
   2821 ; ZNVER1-NEXT:    #APP
   2822 ; ZNVER1-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2823 ; ZNVER1-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2824 ; ZNVER1-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2825 ; ZNVER1-NEXT:    vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
   2826 ; ZNVER1-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
   2827 ; ZNVER1-NEXT:    vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
   2828 ; ZNVER1-NEXT:    #NO_APP
   2829 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2830   tail call void asm "vfnmsub132sd $2, $1, $0 \0A\09 vfnmsub213sd $2, $1, $0 \0A\09 vfnmsub231sd $2, $1, $0 \0A\09 vfnmsub132sd $3, $1, $0 \0A\09 vfnmsub213sd $3, $1, $0 \0A\09 vfnmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
   2831   ret void
   2832 }
   2833 
   2834 define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
   2835 ; GENERIC-LABEL: test_vfnmsubss_128:
   2836 ; GENERIC:       # %bb.0:
   2837 ; GENERIC-NEXT:    #APP
   2838 ; GENERIC-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2839 ; GENERIC-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2840 ; GENERIC-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2841 ; GENERIC-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2842 ; GENERIC-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2843 ; GENERIC-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2844 ; GENERIC-NEXT:    #NO_APP
   2845 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2846 ;
   2847 ; HASWELL-LABEL: test_vfnmsubss_128:
   2848 ; HASWELL:       # %bb.0:
   2849 ; HASWELL-NEXT:    #APP
   2850 ; HASWELL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2851 ; HASWELL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2852 ; HASWELL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2853 ; HASWELL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2854 ; HASWELL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2855 ; HASWELL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2856 ; HASWELL-NEXT:    #NO_APP
   2857 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2858 ;
   2859 ; BROADWELL-LABEL: test_vfnmsubss_128:
   2860 ; BROADWELL:       # %bb.0:
   2861 ; BROADWELL-NEXT:    #APP
   2862 ; BROADWELL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2863 ; BROADWELL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2864 ; BROADWELL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2865 ; BROADWELL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2866 ; BROADWELL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2867 ; BROADWELL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2868 ; BROADWELL-NEXT:    #NO_APP
   2869 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2870 ;
   2871 ; SKYLAKE-LABEL: test_vfnmsubss_128:
   2872 ; SKYLAKE:       # %bb.0:
   2873 ; SKYLAKE-NEXT:    #APP
   2874 ; SKYLAKE-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
   2875 ; SKYLAKE-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
   2876 ; SKYLAKE-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
   2877 ; SKYLAKE-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
   2878 ; SKYLAKE-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
   2879 ; SKYLAKE-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
   2880 ; SKYLAKE-NEXT:    #NO_APP
   2881 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2882 ;
   2883 ; KNL-LABEL: test_vfnmsubss_128:
   2884 ; KNL:       # %bb.0:
   2885 ; KNL-NEXT:    #APP
   2886 ; KNL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2887 ; KNL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2888 ; KNL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2889 ; KNL-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
   2890 ; KNL-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
   2891 ; KNL-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
   2892 ; KNL-NEXT:    #NO_APP
   2893 ; KNL-NEXT:    retq # sched: [7:1.00]
   2894 ;
   2895 ; SKX-LABEL: test_vfnmsubss_128:
   2896 ; SKX:       # %bb.0:
   2897 ; SKX-NEXT:    #APP
   2898 ; SKX-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
   2899 ; SKX-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
   2900 ; SKX-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
   2901 ; SKX-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
   2902 ; SKX-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
   2903 ; SKX-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
   2904 ; SKX-NEXT:    #NO_APP
   2905 ; SKX-NEXT:    retq # sched: [7:1.00]
   2906 ;
   2907 ; ZNVER1-LABEL: test_vfnmsubss_128:
   2908 ; ZNVER1:       # %bb.0:
   2909 ; ZNVER1-NEXT:    #APP
   2910 ; ZNVER1-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
   2911 ; ZNVER1-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
   2912 ; ZNVER1-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
   2913 ; ZNVER1-NEXT:    vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
   2914 ; ZNVER1-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
   2915 ; ZNVER1-NEXT:    vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
   2916 ; ZNVER1-NEXT:    #NO_APP
   2917 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2918   tail call void asm "vfnmsub132ss $2, $1, $0 \0A\09 vfnmsub213ss $2, $1, $0 \0A\09 vfnmsub231ss $2, $1, $0 \0A\09 vfnmsub132ss $3, $1, $0 \0A\09 vfnmsub213ss $3, $1, $0 \0A\09 vfnmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
   2919   ret void
   2920 }
   2921