Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s --check-prefix=FMA
      3 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s --check-prefix=FMA
      4 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s --check-prefix=FMA
      5 
      6 attributes #0 = { nounwind }
      7 
      8 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
      9 define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
     10 ; FMA-LABEL: test_x86_fmadd_baa_ss:
     11 ; FMA:       # %bb.0:
     12 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
     13 ; FMA-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
     14 ; FMA-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
     15 ; FMA-NEXT:    retq
     16   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
     17   ret <4 x float> %res
     18 }
     19 
     20 define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
     21 ; FMA-LABEL: test_x86_fmadd_aba_ss:
     22 ; FMA:       # %bb.0:
     23 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
     24 ; FMA-NEXT:    vfmadd132ss (%rdx), %xmm0, %xmm0
     25 ; FMA-NEXT:    retq
     26   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
     27   ret <4 x float> %res
     28 }
     29 
     30 define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
     31 ; FMA-LABEL: test_x86_fmadd_bba_ss:
     32 ; FMA:       # %bb.0:
     33 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
     34 ; FMA-NEXT:    vfmadd213ss (%rcx), %xmm0, %xmm0
     35 ; FMA-NEXT:    retq
     36   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
     37   ret <4 x float> %res
     38 }
     39 
     40 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
     41 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
     42 ; FMA-LABEL: test_x86_fmadd_baa_ps:
     43 ; FMA:       # %bb.0:
     44 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
     45 ; FMA-NEXT:    vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
     46 ; FMA-NEXT:    retq
     47   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
     48   ret <4 x float> %res
     49 }
     50 
     51 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
     52 ; FMA-LABEL: test_x86_fmadd_aba_ps:
     53 ; FMA:       # %bb.0:
     54 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
     55 ; FMA-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
     56 ; FMA-NEXT:    retq
     57   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
     58   ret <4 x float> %res
     59 }
     60 
     61 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
     62 ; FMA-LABEL: test_x86_fmadd_bba_ps:
     63 ; FMA:       # %bb.0:
     64 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
     65 ; FMA-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
     66 ; FMA-NEXT:    retq
     67   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
     68   ret <4 x float> %res
     69 }
     70 
     71 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
     72 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
     73 ; FMA-LABEL: test_x86_fmadd_baa_ps_y:
     74 ; FMA:       # %bb.0:
     75 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
     76 ; FMA-NEXT:    vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
     77 ; FMA-NEXT:    retq
     78   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
     79   ret <8 x float> %res
     80 }
     81 
     82 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
     83 ; FMA-LABEL: test_x86_fmadd_aba_ps_y:
     84 ; FMA:       # %bb.0:
     85 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
     86 ; FMA-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
     87 ; FMA-NEXT:    retq
     88   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
     89   ret <8 x float> %res
     90 }
     91 
     92 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
     93 ; FMA-LABEL: test_x86_fmadd_bba_ps_y:
     94 ; FMA:       # %bb.0:
     95 ; FMA-NEXT:    vmovaps (%rdx), %ymm0
     96 ; FMA-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
     97 ; FMA-NEXT:    retq
     98   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
     99   ret <8 x float> %res
    100 }
    101 
    102 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    103 define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
    104 ; FMA-LABEL: test_x86_fmadd_baa_sd:
    105 ; FMA:       # %bb.0:
    106 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    107 ; FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    108 ; FMA-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
    109 ; FMA-NEXT:    retq
    110   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    111   ret <2 x double> %res
    112 }
    113 
    114 define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
    115 ; FMA-LABEL: test_x86_fmadd_aba_sd:
    116 ; FMA:       # %bb.0:
    117 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    118 ; FMA-NEXT:    vfmadd132sd (%rdx), %xmm0, %xmm0
    119 ; FMA-NEXT:    retq
    120   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    121   ret <2 x double> %res
    122 }
    123 
    124 define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
    125 ; FMA-LABEL: test_x86_fmadd_bba_sd:
    126 ; FMA:       # %bb.0:
    127 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    128 ; FMA-NEXT:    vfmadd213sd (%rcx), %xmm0, %xmm0
    129 ; FMA-NEXT:    retq
    130   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    131   ret <2 x double> %res
    132 }
    133 
    134 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    135 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
    136 ; FMA-LABEL: test_x86_fmadd_baa_pd:
    137 ; FMA:       # %bb.0:
    138 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    139 ; FMA-NEXT:    vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
    140 ; FMA-NEXT:    retq
    141   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    142   ret <2 x double> %res
    143 }
    144 
    145 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
    146 ; FMA-LABEL: test_x86_fmadd_aba_pd:
    147 ; FMA:       # %bb.0:
    148 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    149 ; FMA-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
    150 ; FMA-NEXT:    retq
    151   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    152   ret <2 x double> %res
    153 }
    154 
    155 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
    156 ; FMA-LABEL: test_x86_fmadd_bba_pd:
    157 ; FMA:       # %bb.0:
    158 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    159 ; FMA-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
    160 ; FMA-NEXT:    retq
    161   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    162   ret <2 x double> %res
    163 }
    164 
    165 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
    166 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    167 ; FMA-LABEL: test_x86_fmadd_baa_pd_y:
    168 ; FMA:       # %bb.0:
    169 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
    170 ; FMA-NEXT:    vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
    171 ; FMA-NEXT:    retq
    172   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
    173   ret <4 x double> %res
    174 }
    175 
    176 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    177 ; FMA-LABEL: test_x86_fmadd_aba_pd_y:
    178 ; FMA:       # %bb.0:
    179 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
    180 ; FMA-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm0
    181 ; FMA-NEXT:    retq
    182   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
    183   ret <4 x double> %res
    184 }
    185 
    186 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    187 ; FMA-LABEL: test_x86_fmadd_bba_pd_y:
    188 ; FMA:       # %bb.0:
    189 ; FMA-NEXT:    vmovapd (%rdx), %ymm0
    190 ; FMA-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm0 * ymm0) + mem
    191 ; FMA-NEXT:    retq
    192   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
    193   ret <4 x double> %res
    194 }
    195 
    196 
    197 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    198 define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
    199 ; FMA-LABEL: test_x86_fnmadd_baa_ss:
    200 ; FMA:       # %bb.0:
    201 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
    202 ; FMA-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    203 ; FMA-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
    204 ; FMA-NEXT:    retq
    205   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
    206   ret <4 x float> %res
    207 }
    208 
    209 define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
    210 ; FMA-LABEL: test_x86_fnmadd_aba_ss:
    211 ; FMA:       # %bb.0:
    212 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
    213 ; FMA-NEXT:    vfnmadd132ss (%rdx), %xmm0, %xmm0
    214 ; FMA-NEXT:    retq
    215   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
    216   ret <4 x float> %res
    217 }
    218 
    219 define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
    220 ; FMA-LABEL: test_x86_fnmadd_bba_ss:
    221 ; FMA:       # %bb.0:
    222 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
    223 ; FMA-NEXT:    vfnmadd213ss (%rcx), %xmm0, %xmm0
    224 ; FMA-NEXT:    retq
    225   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
    226   ret <4 x float> %res
    227 }
    228 
    229 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    230 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
    231 ; FMA-LABEL: test_x86_fnmadd_baa_ps:
    232 ; FMA:       # %bb.0:
    233 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
    234 ; FMA-NEXT:    vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
    235 ; FMA-NEXT:    retq
    236   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
    237   ret <4 x float> %res
    238 }
    239 
    240 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
    241 ; FMA-LABEL: test_x86_fnmadd_aba_ps:
    242 ; FMA:       # %bb.0:
    243 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
    244 ; FMA-NEXT:    vfnmadd231ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
    245 ; FMA-NEXT:    retq
    246   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
    247   ret <4 x float> %res
    248 }
    249 
    250 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
    251 ; FMA-LABEL: test_x86_fnmadd_bba_ps:
    252 ; FMA:       # %bb.0:
    253 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
    254 ; FMA-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
    255 ; FMA-NEXT:    retq
    256   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
    257   ret <4 x float> %res
    258 }
    259 
    260 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
    261 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    262 ; FMA-LABEL: test_x86_fnmadd_baa_ps_y:
    263 ; FMA:       # %bb.0:
    264 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
    265 ; FMA-NEXT:    vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
    266 ; FMA-NEXT:    retq
    267   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
    268   ret <8 x float> %res
    269 }
    270 
    271 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    272 ; FMA-LABEL: test_x86_fnmadd_aba_ps_y:
    273 ; FMA:       # %bb.0:
    274 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
    275 ; FMA-NEXT:    vfnmadd231ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
    276 ; FMA-NEXT:    retq
    277   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
    278   ret <8 x float> %res
    279 }
    280 
    281 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    282 ; FMA-LABEL: test_x86_fnmadd_bba_ps_y:
    283 ; FMA:       # %bb.0:
    284 ; FMA-NEXT:    vmovaps (%rdx), %ymm0
    285 ; FMA-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
    286 ; FMA-NEXT:    retq
    287   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
    288   ret <8 x float> %res
    289 }
    290 
    291 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    292 define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
    293 ; FMA-LABEL: test_x86_fnmadd_baa_sd:
    294 ; FMA:       # %bb.0:
    295 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    296 ; FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    297 ; FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
    298 ; FMA-NEXT:    retq
    299   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    300   ret <2 x double> %res
    301 }
    302 
    303 define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
    304 ; FMA-LABEL: test_x86_fnmadd_aba_sd:
    305 ; FMA:       # %bb.0:
    306 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    307 ; FMA-NEXT:    vfnmadd132sd (%rdx), %xmm0, %xmm0
    308 ; FMA-NEXT:    retq
    309   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    310   ret <2 x double> %res
    311 }
    312 
    313 define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
    314 ; FMA-LABEL: test_x86_fnmadd_bba_sd:
    315 ; FMA:       # %bb.0:
    316 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    317 ; FMA-NEXT:    vfnmadd213sd (%rcx), %xmm0, %xmm0
    318 ; FMA-NEXT:    retq
    319   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    320   ret <2 x double> %res
    321 }
    322 
    323 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    324 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
    325 ; FMA-LABEL: test_x86_fnmadd_baa_pd:
    326 ; FMA:       # %bb.0:
    327 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    328 ; FMA-NEXT:    vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
    329 ; FMA-NEXT:    retq
    330   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    331   ret <2 x double> %res
    332 }
    333 
    334 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
    335 ; FMA-LABEL: test_x86_fnmadd_aba_pd:
    336 ; FMA:       # %bb.0:
    337 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    338 ; FMA-NEXT:    vfnmadd231pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
    339 ; FMA-NEXT:    retq
    340   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    341   ret <2 x double> %res
    342 }
    343 
    344 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
    345 ; FMA-LABEL: test_x86_fnmadd_bba_pd:
    346 ; FMA:       # %bb.0:
    347 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    348 ; FMA-NEXT:    vfnmadd213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
    349 ; FMA-NEXT:    retq
    350   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    351   ret <2 x double> %res
    352 }
    353 
    354 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
    355 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    356 ; FMA-LABEL: test_x86_fnmadd_baa_pd_y:
    357 ; FMA:       # %bb.0:
    358 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
    359 ; FMA-NEXT:    vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
    360 ; FMA-NEXT:    retq
    361   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
    362   ret <4 x double> %res
    363 }
    364 
    365 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    366 ; FMA-LABEL: test_x86_fnmadd_aba_pd_y:
    367 ; FMA:       # %bb.0:
    368 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
    369 ; FMA-NEXT:    vfnmadd231pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm0
    370 ; FMA-NEXT:    retq
    371   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
    372   ret <4 x double> %res
    373 }
    374 
    375 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    376 ; FMA-LABEL: test_x86_fnmadd_bba_pd_y:
    377 ; FMA:       # %bb.0:
    378 ; FMA-NEXT:    vmovapd (%rdx), %ymm0
    379 ; FMA-NEXT:    vfnmadd213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) + mem
    380 ; FMA-NEXT:    retq
    381   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
    382   ret <4 x double> %res
    383 }
    384 
    385 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    386 define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
    387 ; FMA-LABEL: test_x86_fmsub_baa_ss:
    388 ; FMA:       # %bb.0:
    389 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
    390 ; FMA-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    391 ; FMA-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
    392 ; FMA-NEXT:    retq
    393   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
    394   ret <4 x float> %res
    395 }
    396 
    397 define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
    398 ; FMA-LABEL: test_x86_fmsub_aba_ss:
    399 ; FMA:       # %bb.0:
    400 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
    401 ; FMA-NEXT:    vfmsub132ss (%rdx), %xmm0, %xmm0
    402 ; FMA-NEXT:    retq
    403   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
    404   ret <4 x float> %res
    405 }
    406 
    407 define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
    408 ; FMA-LABEL: test_x86_fmsub_bba_ss:
    409 ; FMA:       # %bb.0:
    410 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
    411 ; FMA-NEXT:    vfmsub213ss (%rcx), %xmm0, %xmm0
    412 ; FMA-NEXT:    retq
    413   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
    414   ret <4 x float> %res
    415 }
    416 
    417 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    418 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
    419 ; FMA-LABEL: test_x86_fmsub_baa_ps:
    420 ; FMA:       # %bb.0:
    421 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
    422 ; FMA-NEXT:    vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
    423 ; FMA-NEXT:    retq
    424   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
    425   ret <4 x float> %res
    426 }
    427 
    428 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
    429 ; FMA-LABEL: test_x86_fmsub_aba_ps:
    430 ; FMA:       # %bb.0:
    431 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
    432 ; FMA-NEXT:    vfmsub231ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
    433 ; FMA-NEXT:    retq
    434   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
    435   ret <4 x float> %res
    436 }
    437 
    438 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
    439 ; FMA-LABEL: test_x86_fmsub_bba_ps:
    440 ; FMA:       # %bb.0:
    441 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
    442 ; FMA-NEXT:    vfmsub213ps {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
    443 ; FMA-NEXT:    retq
    444   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
    445   ret <4 x float> %res
    446 }
    447 
    448 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
    449 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    450 ; FMA-LABEL: test_x86_fmsub_baa_ps_y:
    451 ; FMA:       # %bb.0:
    452 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
    453 ; FMA-NEXT:    vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
    454 ; FMA-NEXT:    retq
    455   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
    456   ret <8 x float> %res
    457 }
    458 
    459 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    460 ; FMA-LABEL: test_x86_fmsub_aba_ps_y:
    461 ; FMA:       # %bb.0:
    462 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
    463 ; FMA-NEXT:    vfmsub231ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
    464 ; FMA-NEXT:    retq
    465   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
    466   ret <8 x float> %res
    467 }
    468 
    469 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    470 ; FMA-LABEL: test_x86_fmsub_bba_ps_y:
    471 ; FMA:       # %bb.0:
    472 ; FMA-NEXT:    vmovaps (%rdx), %ymm0
    473 ; FMA-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
    474 ; FMA-NEXT:    retq
    475   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
    476   ret <8 x float> %res
    477 }
    478 
    479 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    480 define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
    481 ; FMA-LABEL: test_x86_fmsub_baa_sd:
    482 ; FMA:       # %bb.0:
    483 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    484 ; FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    485 ; FMA-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
    486 ; FMA-NEXT:    retq
    487   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    488   ret <2 x double> %res
    489 }
    490 
    491 define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
    492 ; FMA-LABEL: test_x86_fmsub_aba_sd:
    493 ; FMA:       # %bb.0:
    494 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    495 ; FMA-NEXT:    vfmsub132sd (%rdx), %xmm0, %xmm0
    496 ; FMA-NEXT:    retq
    497   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    498   ret <2 x double> %res
    499 }
    500 
    501 define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
    502 ; FMA-LABEL: test_x86_fmsub_bba_sd:
    503 ; FMA:       # %bb.0:
    504 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    505 ; FMA-NEXT:    vfmsub213sd (%rcx), %xmm0, %xmm0
    506 ; FMA-NEXT:    retq
    507   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    508   ret <2 x double> %res
    509 }
    510 
    511 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    512 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
    513 ; FMA-LABEL: test_x86_fmsub_baa_pd:
    514 ; FMA:       # %bb.0:
    515 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    516 ; FMA-NEXT:    vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
    517 ; FMA-NEXT:    retq
    518   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    519   ret <2 x double> %res
    520 }
    521 
    522 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
    523 ; FMA-LABEL: test_x86_fmsub_aba_pd:
    524 ; FMA:       # %bb.0:
    525 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    526 ; FMA-NEXT:    vfmsub231pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
    527 ; FMA-NEXT:    retq
    528   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    529   ret <2 x double> %res
    530 }
    531 
    532 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
    533 ; FMA-LABEL: test_x86_fmsub_bba_pd:
    534 ; FMA:       # %bb.0:
    535 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    536 ; FMA-NEXT:    vfmsub213pd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
    537 ; FMA-NEXT:    retq
    538   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    539   ret <2 x double> %res
    540 }
    541 
    542 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
    543 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    544 ; FMA-LABEL: test_x86_fmsub_baa_pd_y:
    545 ; FMA:       # %bb.0:
    546 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
    547 ; FMA-NEXT:    vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
    548 ; FMA-NEXT:    retq
    549   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
    550   ret <4 x double> %res
    551 }
    552 
    553 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    554 ; FMA-LABEL: test_x86_fmsub_aba_pd_y:
    555 ; FMA:       # %bb.0:
    556 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
    557 ; FMA-NEXT:    vfmsub231pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm0
    558 ; FMA-NEXT:    retq
    559   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
    560   ret <4 x double> %res
    561 }
    562 
    563 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    564 ; FMA-LABEL: test_x86_fmsub_bba_pd_y:
    565 ; FMA:       # %bb.0:
    566 ; FMA-NEXT:    vmovapd (%rdx), %ymm0
    567 ; FMA-NEXT:    vfmsub213pd {{.*#+}} ymm0 = (ymm0 * ymm0) - mem
    568 ; FMA-NEXT:    retq
    569   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
    570   ret <4 x double> %res
    571 }
    572 
    573 
    574 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    575 define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
    576 ; FMA-LABEL: test_x86_fnmsub_baa_ss:
    577 ; FMA:       # %bb.0:
    578 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
    579 ; FMA-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    580 ; FMA-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
    581 ; FMA-NEXT:    retq
    582   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
    583   ret <4 x float> %res
    584 }
    585 
    586 define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
    587 ; FMA-LABEL: test_x86_fnmsub_aba_ss:
    588 ; FMA:       # %bb.0:
    589 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
    590 ; FMA-NEXT:    vfnmsub132ss (%rdx), %xmm0, %xmm0
    591 ; FMA-NEXT:    retq
    592   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
    593   ret <4 x float> %res
    594 }
    595 
    596 define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
    597 ; FMA-LABEL: test_x86_fnmsub_bba_ss:
    598 ; FMA:       # %bb.0:
    599 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
    600 ; FMA-NEXT:    vfnmsub213ss (%rcx), %xmm0, %xmm0
    601 ; FMA-NEXT:    retq
    602   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
    603   ret <4 x float> %res
    604 }
    605 
    606 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    607 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
    608 ; FMA-LABEL: test_x86_fnmsub_baa_ps:
    609 ; FMA:       # %bb.0:
    610 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
    611 ; FMA-NEXT:    vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
    612 ; FMA-NEXT:    retq
    613   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
    614   ret <4 x float> %res
    615 }
    616 
    617 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
    618 ; FMA-LABEL: test_x86_fnmsub_aba_ps:
    619 ; FMA:       # %bb.0:
    620 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
    621 ; FMA-NEXT:    vfnmsub231ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
    622 ; FMA-NEXT:    retq
    623   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
    624   ret <4 x float> %res
    625 }
    626 
    627 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
    628 ; FMA-LABEL: test_x86_fnmsub_bba_ps:
    629 ; FMA:       # %bb.0:
    630 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
    631 ; FMA-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
    632 ; FMA-NEXT:    retq
    633   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
    634   ret <4 x float> %res
    635 }
    636 
    637 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
    638 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    639 ; FMA-LABEL: test_x86_fnmsub_baa_ps_y:
    640 ; FMA:       # %bb.0:
    641 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
    642 ; FMA-NEXT:    vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
    643 ; FMA-NEXT:    retq
    644   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
    645   ret <8 x float> %res
    646 }
    647 
    648 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    649 ; FMA-LABEL: test_x86_fnmsub_aba_ps_y:
    650 ; FMA:       # %bb.0:
    651 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
    652 ; FMA-NEXT:    vfnmsub231ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
    653 ; FMA-NEXT:    retq
    654   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
    655   ret <8 x float> %res
    656 }
    657 
    658 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    659 ; FMA-LABEL: test_x86_fnmsub_bba_ps_y:
    660 ; FMA:       # %bb.0:
    661 ; FMA-NEXT:    vmovaps (%rdx), %ymm0
    662 ; FMA-NEXT:    vfnmsub213ps {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
    663 ; FMA-NEXT:    retq
    664   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
    665   ret <8 x float> %res
    666 }
    667 
    668 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    669 define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
    670 ; FMA-LABEL: test_x86_fnmsub_baa_sd:
    671 ; FMA:       # %bb.0:
    672 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    673 ; FMA-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    674 ; FMA-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
    675 ; FMA-NEXT:    retq
    676   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    677   ret <2 x double> %res
    678 }
    679 
    680 define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
    681 ; FMA-LABEL: test_x86_fnmsub_aba_sd:
    682 ; FMA:       # %bb.0:
    683 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    684 ; FMA-NEXT:    vfnmsub132sd (%rdx), %xmm0, %xmm0
    685 ; FMA-NEXT:    retq
    686   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    687   ret <2 x double> %res
    688 }
    689 
    690 define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
    691 ; FMA-LABEL: test_x86_fnmsub_bba_sd:
    692 ; FMA:       # %bb.0:
    693 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    694 ; FMA-NEXT:    vfnmsub213sd (%rcx), %xmm0, %xmm0
    695 ; FMA-NEXT:    retq
    696   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    697   ret <2 x double> %res
    698 }
    699 
    700 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    701 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
    702 ; FMA-LABEL: test_x86_fnmsub_baa_pd:
    703 ; FMA:       # %bb.0:
    704 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    705 ; FMA-NEXT:    vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
    706 ; FMA-NEXT:    retq
    707   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    708   ret <2 x double> %res
    709 }
    710 
    711 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
    712 ; FMA-LABEL: test_x86_fnmsub_aba_pd:
    713 ; FMA:       # %bb.0:
    714 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
    715 ; FMA-NEXT:    vfnmsub231pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
    716 ; FMA-NEXT:    retq
    717   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    718   ret <2 x double> %res
    719 }
    720 
    721 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
    722 ; FMA-LABEL: test_x86_fnmsub_bba_pd:
    723 ; FMA:       # %bb.0:
    724 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
    725 ; FMA-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
    726 ; FMA-NEXT:    retq
    727   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    728   ret <2 x double> %res
    729 }
    730 
    731 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
    732 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    733 ; FMA-LABEL: test_x86_fnmsub_baa_pd_y:
    734 ; FMA:       # %bb.0:
    735 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
    736 ; FMA-NEXT:    vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
    737 ; FMA-NEXT:    retq
    738   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
    739   ret <4 x double> %res
    740 }
    741 
    742 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    743 ; FMA-LABEL: test_x86_fnmsub_aba_pd_y:
    744 ; FMA:       # %bb.0:
    745 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
    746 ; FMA-NEXT:    vfnmsub231pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm0
    747 ; FMA-NEXT:    retq
    748   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
    749   ret <4 x double> %res
    750 }
    751 
    752 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    753 ; FMA-LABEL: test_x86_fnmsub_bba_pd_y:
    754 ; FMA:       # %bb.0:
    755 ; FMA-NEXT:    vmovapd (%rdx), %ymm0
    756 ; FMA-NEXT:    vfnmsub213pd {{.*#+}} ymm0 = -(ymm0 * ymm0) - mem
    757 ; FMA-NEXT:    retq
    758   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
    759   ret <4 x double> %res
    760 }
    761 
    762