Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER --check-prefix=BDVER1
      7 
      8 ;
      9 ; VFMADD
     10 ;
     11 
     12 define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
     13 ; GENERIC-LABEL: test_vfmaddpd_128:
     14 ; GENERIC:       # %bb.0:
     15 ; GENERIC-NEXT:    #APP
     16 ; GENERIC-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
     17 ; GENERIC-NEXT:    vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
     18 ; GENERIC-NEXT:    vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
     19 ; GENERIC-NEXT:    #NO_APP
     20 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     21 ;
     22 ; BDVER-LABEL: test_vfmaddpd_128:
     23 ; BDVER:       # %bb.0:
     24 ; BDVER-NEXT:    #APP
     25 ; BDVER-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
     26 ; BDVER-NEXT:    vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0
     27 ; BDVER-NEXT:    vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0
     28 ; BDVER-NEXT:    #NO_APP
     29 ; BDVER-NEXT:    retq
     30   tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
     31   ret void
     32 }
     33 
     34 define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
     35 ; GENERIC-LABEL: test_vfmaddpd_256:
     36 ; GENERIC:       # %bb.0:
     37 ; GENERIC-NEXT:    #APP
     38 ; GENERIC-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
     39 ; GENERIC-NEXT:    vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
     40 ; GENERIC-NEXT:    vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
     41 ; GENERIC-NEXT:    #NO_APP
     42 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
     43 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     44 ;
     45 ; BDVER-LABEL: test_vfmaddpd_256:
     46 ; BDVER:       # %bb.0:
     47 ; BDVER-NEXT:    #APP
     48 ; BDVER-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
     49 ; BDVER-NEXT:    vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0
     50 ; BDVER-NEXT:    vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0
     51 ; BDVER-NEXT:    #NO_APP
     52 ; BDVER-NEXT:    vzeroupper
     53 ; BDVER-NEXT:    retq
     54   tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
     55   ret void
     56 }
     57 
     58 define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
     59 ; GENERIC-LABEL: test_vfmaddps_128:
     60 ; GENERIC:       # %bb.0:
     61 ; GENERIC-NEXT:    #APP
     62 ; GENERIC-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
     63 ; GENERIC-NEXT:    vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
     64 ; GENERIC-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
     65 ; GENERIC-NEXT:    #NO_APP
     66 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     67 ;
     68 ; BDVER-LABEL: test_vfmaddps_128:
     69 ; BDVER:       # %bb.0:
     70 ; BDVER-NEXT:    #APP
     71 ; BDVER-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
     72 ; BDVER-NEXT:    vfmaddps (%rdi), %xmm1, %xmm0, %xmm0
     73 ; BDVER-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
     74 ; BDVER-NEXT:    #NO_APP
     75 ; BDVER-NEXT:    retq
     76   tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
     77   ret void
     78 }
     79 
     80 define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
     81 ; GENERIC-LABEL: test_vfmaddps_256:
     82 ; GENERIC:       # %bb.0:
     83 ; GENERIC-NEXT:    #APP
     84 ; GENERIC-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
     85 ; GENERIC-NEXT:    vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
     86 ; GENERIC-NEXT:    vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
     87 ; GENERIC-NEXT:    #NO_APP
     88 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
     89 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     90 ;
     91 ; BDVER-LABEL: test_vfmaddps_256:
     92 ; BDVER:       # %bb.0:
     93 ; BDVER-NEXT:    #APP
     94 ; BDVER-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
     95 ; BDVER-NEXT:    vfmaddps (%rdi), %ymm1, %ymm0, %ymm0
     96 ; BDVER-NEXT:    vfmaddps %ymm1, (%rdi), %ymm0, %ymm0
     97 ; BDVER-NEXT:    #NO_APP
     98 ; BDVER-NEXT:    vzeroupper
     99 ; BDVER-NEXT:    retq
    100   tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
    101   ret void
    102 }
    103 
    104 define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    105 ; GENERIC-LABEL: test_vfmaddsd_128:
    106 ; GENERIC:       # %bb.0:
    107 ; GENERIC-NEXT:    #APP
    108 ; GENERIC-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    109 ; GENERIC-NEXT:    vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    110 ; GENERIC-NEXT:    vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    111 ; GENERIC-NEXT:    #NO_APP
    112 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    113 ;
    114 ; BDVER-LABEL: test_vfmaddsd_128:
    115 ; BDVER:       # %bb.0:
    116 ; BDVER-NEXT:    #APP
    117 ; BDVER-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
    118 ; BDVER-NEXT:    vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0
    119 ; BDVER-NEXT:    vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0
    120 ; BDVER-NEXT:    #NO_APP
    121 ; BDVER-NEXT:    retq
    122   tail call void asm "vfmaddsd $2, $1, $0, $0 \0A\09 vfmaddsd $3, $1, $0, $0 \0A\09 vfmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    123   ret void
    124 }
    125 
    126 define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    127 ; GENERIC-LABEL: test_vfmaddss_128:
    128 ; GENERIC:       # %bb.0:
    129 ; GENERIC-NEXT:    #APP
    130 ; GENERIC-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    131 ; GENERIC-NEXT:    vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    132 ; GENERIC-NEXT:    vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    133 ; GENERIC-NEXT:    #NO_APP
    134 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    135 ;
    136 ; BDVER-LABEL: test_vfmaddss_128:
    137 ; BDVER:       # %bb.0:
    138 ; BDVER-NEXT:    #APP
    139 ; BDVER-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
    140 ; BDVER-NEXT:    vfmaddss (%rdi), %xmm1, %xmm0, %xmm0
    141 ; BDVER-NEXT:    vfmaddss %xmm1, (%rdi), %xmm0, %xmm0
    142 ; BDVER-NEXT:    #NO_APP
    143 ; BDVER-NEXT:    retq
    144   tail call void asm "vfmaddss $2, $1, $0, $0 \0A\09 vfmaddss $3, $1, $0, $0 \0A\09 vfmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    145   ret void
    146 }
    147 
    148 ;
    149 ; VFMADDSUB
    150 ;
    151 
    152 define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    153 ; GENERIC-LABEL: test_vfmaddsubpd_128:
    154 ; GENERIC:       # %bb.0:
    155 ; GENERIC-NEXT:    #APP
    156 ; GENERIC-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    157 ; GENERIC-NEXT:    vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    158 ; GENERIC-NEXT:    vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    159 ; GENERIC-NEXT:    #NO_APP
    160 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    161 ;
    162 ; BDVER-LABEL: test_vfmaddsubpd_128:
    163 ; BDVER:       # %bb.0:
    164 ; BDVER-NEXT:    #APP
    165 ; BDVER-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
    166 ; BDVER-NEXT:    vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0
    167 ; BDVER-NEXT:    vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0
    168 ; BDVER-NEXT:    #NO_APP
    169 ; BDVER-NEXT:    retq
    170   tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    171   ret void
    172 }
    173 
    174 define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
    175 ; GENERIC-LABEL: test_vfmaddsubpd_256:
    176 ; GENERIC:       # %bb.0:
    177 ; GENERIC-NEXT:    #APP
    178 ; GENERIC-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    179 ; GENERIC-NEXT:    vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    180 ; GENERIC-NEXT:    vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    181 ; GENERIC-NEXT:    #NO_APP
    182 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    183 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    184 ;
    185 ; BDVER-LABEL: test_vfmaddsubpd_256:
    186 ; BDVER:       # %bb.0:
    187 ; BDVER-NEXT:    #APP
    188 ; BDVER-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
    189 ; BDVER-NEXT:    vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0
    190 ; BDVER-NEXT:    vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0
    191 ; BDVER-NEXT:    #NO_APP
    192 ; BDVER-NEXT:    vzeroupper
    193 ; BDVER-NEXT:    retq
    194   tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
    195   ret void
    196 }
    197 
    198 define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    199 ; GENERIC-LABEL: test_vfmaddsubps_128:
    200 ; GENERIC:       # %bb.0:
    201 ; GENERIC-NEXT:    #APP
    202 ; GENERIC-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    203 ; GENERIC-NEXT:    vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    204 ; GENERIC-NEXT:    vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    205 ; GENERIC-NEXT:    #NO_APP
    206 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    207 ;
    208 ; BDVER-LABEL: test_vfmaddsubps_128:
    209 ; BDVER:       # %bb.0:
    210 ; BDVER-NEXT:    #APP
    211 ; BDVER-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
    212 ; BDVER-NEXT:    vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0
    213 ; BDVER-NEXT:    vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0
    214 ; BDVER-NEXT:    #NO_APP
    215 ; BDVER-NEXT:    retq
    216   tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    217   ret void
    218 }
    219 
    220 define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
    221 ; GENERIC-LABEL: test_vfmaddsubps_256:
    222 ; GENERIC:       # %bb.0:
    223 ; GENERIC-NEXT:    #APP
    224 ; GENERIC-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    225 ; GENERIC-NEXT:    vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    226 ; GENERIC-NEXT:    vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    227 ; GENERIC-NEXT:    #NO_APP
    228 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    229 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    230 ;
    231 ; BDVER-LABEL: test_vfmaddsubps_256:
    232 ; BDVER:       # %bb.0:
    233 ; BDVER-NEXT:    #APP
    234 ; BDVER-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
    235 ; BDVER-NEXT:    vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0
    236 ; BDVER-NEXT:    vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0
    237 ; BDVER-NEXT:    #NO_APP
    238 ; BDVER-NEXT:    vzeroupper
    239 ; BDVER-NEXT:    retq
    240   tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
    241   ret void
    242 }
    243 
    244 ;
    245 ; VFMSUBADD
    246 ;
    247 
    248 define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    249 ; GENERIC-LABEL: test_vfmsubaddpd_128:
    250 ; GENERIC:       # %bb.0:
    251 ; GENERIC-NEXT:    #APP
    252 ; GENERIC-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    253 ; GENERIC-NEXT:    vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    254 ; GENERIC-NEXT:    vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    255 ; GENERIC-NEXT:    #NO_APP
    256 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    257 ;
    258 ; BDVER-LABEL: test_vfmsubaddpd_128:
    259 ; BDVER:       # %bb.0:
    260 ; BDVER-NEXT:    #APP
    261 ; BDVER-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
    262 ; BDVER-NEXT:    vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0
    263 ; BDVER-NEXT:    vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0
    264 ; BDVER-NEXT:    #NO_APP
    265 ; BDVER-NEXT:    retq
    266   tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    267   ret void
    268 }
    269 
    270 define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
    271 ; GENERIC-LABEL: test_vfmsubaddpd_256:
    272 ; GENERIC:       # %bb.0:
    273 ; GENERIC-NEXT:    #APP
    274 ; GENERIC-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    275 ; GENERIC-NEXT:    vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    276 ; GENERIC-NEXT:    vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    277 ; GENERIC-NEXT:    #NO_APP
    278 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    279 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    280 ;
    281 ; BDVER-LABEL: test_vfmsubaddpd_256:
    282 ; BDVER:       # %bb.0:
    283 ; BDVER-NEXT:    #APP
    284 ; BDVER-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
    285 ; BDVER-NEXT:    vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0
    286 ; BDVER-NEXT:    vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0
    287 ; BDVER-NEXT:    #NO_APP
    288 ; BDVER-NEXT:    vzeroupper
    289 ; BDVER-NEXT:    retq
    290   tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
    291   ret void
    292 }
    293 
    294 define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    295 ; GENERIC-LABEL: test_vfmsubaddps_128:
    296 ; GENERIC:       # %bb.0:
    297 ; GENERIC-NEXT:    #APP
    298 ; GENERIC-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    299 ; GENERIC-NEXT:    vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    300 ; GENERIC-NEXT:    vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    301 ; GENERIC-NEXT:    #NO_APP
    302 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    303 ;
    304 ; BDVER-LABEL: test_vfmsubaddps_128:
    305 ; BDVER:       # %bb.0:
    306 ; BDVER-NEXT:    #APP
    307 ; BDVER-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
    308 ; BDVER-NEXT:    vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0
    309 ; BDVER-NEXT:    vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0
    310 ; BDVER-NEXT:    #NO_APP
    311 ; BDVER-NEXT:    retq
    312   tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    313   ret void
    314 }
    315 
    316 define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
    317 ; GENERIC-LABEL: test_vfmsubaddps_256:
    318 ; GENERIC:       # %bb.0:
    319 ; GENERIC-NEXT:    #APP
    320 ; GENERIC-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    321 ; GENERIC-NEXT:    vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    322 ; GENERIC-NEXT:    vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    323 ; GENERIC-NEXT:    #NO_APP
    324 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    325 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    326 ;
    327 ; BDVER-LABEL: test_vfmsubaddps_256:
    328 ; BDVER:       # %bb.0:
    329 ; BDVER-NEXT:    #APP
    330 ; BDVER-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
    331 ; BDVER-NEXT:    vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0
    332 ; BDVER-NEXT:    vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0
    333 ; BDVER-NEXT:    #NO_APP
    334 ; BDVER-NEXT:    vzeroupper
    335 ; BDVER-NEXT:    retq
    336   tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
    337   ret void
    338 }
    339 
    340 ;
    341 ; VFMSUB
    342 ;
    343 
    344 define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    345 ; GENERIC-LABEL: test_vfmsubpd_128:
    346 ; GENERIC:       # %bb.0:
    347 ; GENERIC-NEXT:    #APP
    348 ; GENERIC-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    349 ; GENERIC-NEXT:    vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    350 ; GENERIC-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    351 ; GENERIC-NEXT:    #NO_APP
    352 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    353 ;
    354 ; BDVER-LABEL: test_vfmsubpd_128:
    355 ; BDVER:       # %bb.0:
    356 ; BDVER-NEXT:    #APP
    357 ; BDVER-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
    358 ; BDVER-NEXT:    vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0
    359 ; BDVER-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
    360 ; BDVER-NEXT:    #NO_APP
    361 ; BDVER-NEXT:    retq
    362   tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    363   ret void
    364 }
    365 
    366 define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
    367 ; GENERIC-LABEL: test_vfmsubpd_256:
    368 ; GENERIC:       # %bb.0:
    369 ; GENERIC-NEXT:    #APP
    370 ; GENERIC-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    371 ; GENERIC-NEXT:    vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    372 ; GENERIC-NEXT:    vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    373 ; GENERIC-NEXT:    #NO_APP
    374 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    375 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    376 ;
    377 ; BDVER-LABEL: test_vfmsubpd_256:
    378 ; BDVER:       # %bb.0:
    379 ; BDVER-NEXT:    #APP
    380 ; BDVER-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
    381 ; BDVER-NEXT:    vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0
    382 ; BDVER-NEXT:    vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0
    383 ; BDVER-NEXT:    #NO_APP
    384 ; BDVER-NEXT:    vzeroupper
    385 ; BDVER-NEXT:    retq
    386   tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
    387   ret void
    388 }
    389 
    390 define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    391 ; GENERIC-LABEL: test_vfmsubps_128:
    392 ; GENERIC:       # %bb.0:
    393 ; GENERIC-NEXT:    #APP
    394 ; GENERIC-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    395 ; GENERIC-NEXT:    vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    396 ; GENERIC-NEXT:    vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    397 ; GENERIC-NEXT:    #NO_APP
    398 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    399 ;
    400 ; BDVER-LABEL: test_vfmsubps_128:
    401 ; BDVER:       # %bb.0:
    402 ; BDVER-NEXT:    #APP
    403 ; BDVER-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
    404 ; BDVER-NEXT:    vfmsubps (%rdi), %xmm1, %xmm0, %xmm0
    405 ; BDVER-NEXT:    vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
    406 ; BDVER-NEXT:    #NO_APP
    407 ; BDVER-NEXT:    retq
    408   tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    409   ret void
    410 }
    411 
    412 define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
    413 ; GENERIC-LABEL: test_vfmsubps_256:
    414 ; GENERIC:       # %bb.0:
    415 ; GENERIC-NEXT:    #APP
    416 ; GENERIC-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    417 ; GENERIC-NEXT:    vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    418 ; GENERIC-NEXT:    vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    419 ; GENERIC-NEXT:    #NO_APP
    420 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    421 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    422 ;
    423 ; BDVER-LABEL: test_vfmsubps_256:
    424 ; BDVER:       # %bb.0:
    425 ; BDVER-NEXT:    #APP
    426 ; BDVER-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
    427 ; BDVER-NEXT:    vfmsubps (%rdi), %ymm1, %ymm0, %ymm0
    428 ; BDVER-NEXT:    vfmsubps %ymm1, (%rdi), %ymm0, %ymm0
    429 ; BDVER-NEXT:    #NO_APP
    430 ; BDVER-NEXT:    vzeroupper
    431 ; BDVER-NEXT:    retq
    432   tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
    433   ret void
    434 }
    435 
    436 define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    437 ; GENERIC-LABEL: test_vfmsubsd_128:
    438 ; GENERIC:       # %bb.0:
    439 ; GENERIC-NEXT:    #APP
    440 ; GENERIC-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    441 ; GENERIC-NEXT:    vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    442 ; GENERIC-NEXT:    vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    443 ; GENERIC-NEXT:    #NO_APP
    444 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    445 ;
    446 ; BDVER-LABEL: test_vfmsubsd_128:
    447 ; BDVER:       # %bb.0:
    448 ; BDVER-NEXT:    #APP
    449 ; BDVER-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
    450 ; BDVER-NEXT:    vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0
    451 ; BDVER-NEXT:    vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0
    452 ; BDVER-NEXT:    #NO_APP
    453 ; BDVER-NEXT:    retq
    454   tail call void asm "vfmsubsd $2, $1, $0, $0 \0A\09 vfmsubsd $3, $1, $0, $0 \0A\09 vfmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    455   ret void
    456 }
    457 
    458 define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    459 ; GENERIC-LABEL: test_vfmsubss_128:
    460 ; GENERIC:       # %bb.0:
    461 ; GENERIC-NEXT:    #APP
    462 ; GENERIC-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    463 ; GENERIC-NEXT:    vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    464 ; GENERIC-NEXT:    vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    465 ; GENERIC-NEXT:    #NO_APP
    466 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    467 ;
    468 ; BDVER-LABEL: test_vfmsubss_128:
    469 ; BDVER:       # %bb.0:
    470 ; BDVER-NEXT:    #APP
    471 ; BDVER-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
    472 ; BDVER-NEXT:    vfmsubss (%rdi), %xmm1, %xmm0, %xmm0
    473 ; BDVER-NEXT:    vfmsubss %xmm1, (%rdi), %xmm0, %xmm0
    474 ; BDVER-NEXT:    #NO_APP
    475 ; BDVER-NEXT:    retq
    476   tail call void asm "vfmsubss $2, $1, $0, $0 \0A\09 vfmsubss $3, $1, $0, $0 \0A\09 vfmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    477   ret void
    478 }
    479 
    480 ;
    481 ; VFNMADD
    482 ;
    483 
    484 define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    485 ; GENERIC-LABEL: test_vfnmaddpd_128:
    486 ; GENERIC:       # %bb.0:
    487 ; GENERIC-NEXT:    #APP
    488 ; GENERIC-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    489 ; GENERIC-NEXT:    vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    490 ; GENERIC-NEXT:    vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    491 ; GENERIC-NEXT:    #NO_APP
    492 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    493 ;
    494 ; BDVER-LABEL: test_vfnmaddpd_128:
    495 ; BDVER:       # %bb.0:
    496 ; BDVER-NEXT:    #APP
    497 ; BDVER-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
    498 ; BDVER-NEXT:    vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0
    499 ; BDVER-NEXT:    vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0
    500 ; BDVER-NEXT:    #NO_APP
    501 ; BDVER-NEXT:    retq
    502   tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    503   ret void
    504 }
    505 
    506 define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
    507 ; GENERIC-LABEL: test_vfnmaddpd_256:
    508 ; GENERIC:       # %bb.0:
    509 ; GENERIC-NEXT:    #APP
    510 ; GENERIC-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    511 ; GENERIC-NEXT:    vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    512 ; GENERIC-NEXT:    vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    513 ; GENERIC-NEXT:    #NO_APP
    514 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    515 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    516 ;
    517 ; BDVER-LABEL: test_vfnmaddpd_256:
    518 ; BDVER:       # %bb.0:
    519 ; BDVER-NEXT:    #APP
    520 ; BDVER-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
    521 ; BDVER-NEXT:    vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0
    522 ; BDVER-NEXT:    vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0
    523 ; BDVER-NEXT:    #NO_APP
    524 ; BDVER-NEXT:    vzeroupper
    525 ; BDVER-NEXT:    retq
    526   tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
    527   ret void
    528 }
    529 
    530 define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    531 ; GENERIC-LABEL: test_vfnmaddps_128:
    532 ; GENERIC:       # %bb.0:
    533 ; GENERIC-NEXT:    #APP
    534 ; GENERIC-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    535 ; GENERIC-NEXT:    vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    536 ; GENERIC-NEXT:    vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    537 ; GENERIC-NEXT:    #NO_APP
    538 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    539 ;
    540 ; BDVER-LABEL: test_vfnmaddps_128:
    541 ; BDVER:       # %bb.0:
    542 ; BDVER-NEXT:    #APP
    543 ; BDVER-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
    544 ; BDVER-NEXT:    vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0
    545 ; BDVER-NEXT:    vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0
    546 ; BDVER-NEXT:    #NO_APP
    547 ; BDVER-NEXT:    retq
    548   tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    549   ret void
    550 }
    551 
    552 define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
    553 ; GENERIC-LABEL: test_vfnmaddps_256:
    554 ; GENERIC:       # %bb.0:
    555 ; GENERIC-NEXT:    #APP
    556 ; GENERIC-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    557 ; GENERIC-NEXT:    vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    558 ; GENERIC-NEXT:    vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    559 ; GENERIC-NEXT:    #NO_APP
    560 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    561 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    562 ;
    563 ; BDVER-LABEL: test_vfnmaddps_256:
    564 ; BDVER:       # %bb.0:
    565 ; BDVER-NEXT:    #APP
    566 ; BDVER-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
    567 ; BDVER-NEXT:    vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0
    568 ; BDVER-NEXT:    vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0
    569 ; BDVER-NEXT:    #NO_APP
    570 ; BDVER-NEXT:    vzeroupper
    571 ; BDVER-NEXT:    retq
    572   tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
    573   ret void
    574 }
    575 
    576 define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    577 ; GENERIC-LABEL: test_vfnmaddsd_128:
    578 ; GENERIC:       # %bb.0:
    579 ; GENERIC-NEXT:    #APP
    580 ; GENERIC-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    581 ; GENERIC-NEXT:    vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    582 ; GENERIC-NEXT:    vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    583 ; GENERIC-NEXT:    #NO_APP
    584 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    585 ;
    586 ; BDVER-LABEL: test_vfnmaddsd_128:
    587 ; BDVER:       # %bb.0:
    588 ; BDVER-NEXT:    #APP
    589 ; BDVER-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
    590 ; BDVER-NEXT:    vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0
    591 ; BDVER-NEXT:    vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0
    592 ; BDVER-NEXT:    #NO_APP
    593 ; BDVER-NEXT:    retq
    594   tail call void asm "vfnmaddsd $2, $1, $0, $0 \0A\09 vfnmaddsd $3, $1, $0, $0 \0A\09 vfnmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    595   ret void
    596 }
    597 
    598 define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    599 ; GENERIC-LABEL: test_vfnmaddss_128:
    600 ; GENERIC:       # %bb.0:
    601 ; GENERIC-NEXT:    #APP
    602 ; GENERIC-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    603 ; GENERIC-NEXT:    vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    604 ; GENERIC-NEXT:    vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    605 ; GENERIC-NEXT:    #NO_APP
    606 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    607 ;
    608 ; BDVER-LABEL: test_vfnmaddss_128:
    609 ; BDVER:       # %bb.0:
    610 ; BDVER-NEXT:    #APP
    611 ; BDVER-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
    612 ; BDVER-NEXT:    vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0
    613 ; BDVER-NEXT:    vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0
    614 ; BDVER-NEXT:    #NO_APP
    615 ; BDVER-NEXT:    retq
    616   tail call void asm "vfnmaddss $2, $1, $0, $0 \0A\09 vfnmaddss $3, $1, $0, $0 \0A\09 vfnmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    617   ret void
    618 }
    619 
    620 ;
    621 ; VFNMSUB
    622 ;
    623 
    624 define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    625 ; GENERIC-LABEL: test_vfnmsubpd_128:
    626 ; GENERIC:       # %bb.0:
    627 ; GENERIC-NEXT:    #APP
    628 ; GENERIC-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    629 ; GENERIC-NEXT:    vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    630 ; GENERIC-NEXT:    vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    631 ; GENERIC-NEXT:    #NO_APP
    632 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    633 ;
    634 ; BDVER-LABEL: test_vfnmsubpd_128:
    635 ; BDVER:       # %bb.0:
    636 ; BDVER-NEXT:    #APP
    637 ; BDVER-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
    638 ; BDVER-NEXT:    vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0
    639 ; BDVER-NEXT:    vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0
    640 ; BDVER-NEXT:    #NO_APP
    641 ; BDVER-NEXT:    retq
    642   tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    643   ret void
    644 }
    645 
    646 define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
    647 ; GENERIC-LABEL: test_vfnmsubpd_256:
    648 ; GENERIC:       # %bb.0:
    649 ; GENERIC-NEXT:    #APP
    650 ; GENERIC-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    651 ; GENERIC-NEXT:    vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    652 ; GENERIC-NEXT:    vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    653 ; GENERIC-NEXT:    #NO_APP
    654 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    655 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    656 ;
    657 ; BDVER-LABEL: test_vfnmsubpd_256:
    658 ; BDVER:       # %bb.0:
    659 ; BDVER-NEXT:    #APP
    660 ; BDVER-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
    661 ; BDVER-NEXT:    vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0
    662 ; BDVER-NEXT:    vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0
    663 ; BDVER-NEXT:    #NO_APP
    664 ; BDVER-NEXT:    vzeroupper
    665 ; BDVER-NEXT:    retq
    666   tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
    667   ret void
    668 }
    669 
    670 define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    671 ; GENERIC-LABEL: test_vfnmsubps_128:
    672 ; GENERIC:       # %bb.0:
    673 ; GENERIC-NEXT:    #APP
    674 ; GENERIC-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    675 ; GENERIC-NEXT:    vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    676 ; GENERIC-NEXT:    vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    677 ; GENERIC-NEXT:    #NO_APP
    678 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    679 ;
    680 ; BDVER-LABEL: test_vfnmsubps_128:
    681 ; BDVER:       # %bb.0:
    682 ; BDVER-NEXT:    #APP
    683 ; BDVER-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
    684 ; BDVER-NEXT:    vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0
    685 ; BDVER-NEXT:    vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0
    686 ; BDVER-NEXT:    #NO_APP
    687 ; BDVER-NEXT:    retq
    688   tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    689   ret void
    690 }
    691 
    692 define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
    693 ; GENERIC-LABEL: test_vfnmsubps_256:
    694 ; GENERIC:       # %bb.0:
    695 ; GENERIC-NEXT:    #APP
    696 ; GENERIC-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
    697 ; GENERIC-NEXT:    vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
    698 ; GENERIC-NEXT:    vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
    699 ; GENERIC-NEXT:    #NO_APP
    700 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    701 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    702 ;
    703 ; BDVER-LABEL: test_vfnmsubps_256:
    704 ; BDVER:       # %bb.0:
    705 ; BDVER-NEXT:    #APP
    706 ; BDVER-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
    707 ; BDVER-NEXT:    vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0
    708 ; BDVER-NEXT:    vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0
    709 ; BDVER-NEXT:    #NO_APP
    710 ; BDVER-NEXT:    vzeroupper
    711 ; BDVER-NEXT:    retq
    712   tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
    713   ret void
    714 }
    715 
    716 define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
    717 ; GENERIC-LABEL: test_vfnmsubsd_128:
    718 ; GENERIC:       # %bb.0:
    719 ; GENERIC-NEXT:    #APP
    720 ; GENERIC-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    721 ; GENERIC-NEXT:    vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    722 ; GENERIC-NEXT:    vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    723 ; GENERIC-NEXT:    #NO_APP
    724 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    725 ;
    726 ; BDVER-LABEL: test_vfnmsubsd_128:
    727 ; BDVER:       # %bb.0:
    728 ; BDVER-NEXT:    #APP
    729 ; BDVER-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
    730 ; BDVER-NEXT:    vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0
    731 ; BDVER-NEXT:    vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0
    732 ; BDVER-NEXT:    #NO_APP
    733 ; BDVER-NEXT:    retq
    734   tail call void asm "vfnmsubsd $2, $1, $0, $0 \0A\09 vfnmsubsd $3, $1, $0, $0 \0A\09 vfnmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
    735   ret void
    736 }
    737 
    738 define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
    739 ; GENERIC-LABEL: test_vfnmsubss_128:
    740 ; GENERIC:       # %bb.0:
    741 ; GENERIC-NEXT:    #APP
    742 ; GENERIC-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
    743 ; GENERIC-NEXT:    vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
    744 ; GENERIC-NEXT:    vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    745 ; GENERIC-NEXT:    #NO_APP
    746 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    747 ;
    748 ; BDVER-LABEL: test_vfnmsubss_128:
    749 ; BDVER:       # %bb.0:
    750 ; BDVER-NEXT:    #APP
    751 ; BDVER-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
    752 ; BDVER-NEXT:    vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0
    753 ; BDVER-NEXT:    vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0
    754 ; BDVER-NEXT:    #NO_APP
    755 ; BDVER-NEXT:    retq
    756   tail call void asm "vfnmsubss $2, $1, $0, $0 \0A\09 vfnmsubss $3, $1, $0, $0 \0A\09 vfnmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
    757   ret void
    758 }
    759