Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA
      3 
      4 attributes #0 = { nounwind }
      5 
      6 declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
      7 define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
      8 ; FMA4-LABEL: test_x86_fmadd_baa_ss:
      9 ; FMA4:       # %bb.0:
     10 ; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     11 ; FMA4-NEXT:    vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
     12 ; FMA4-NEXT:    retq
     13   %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
     14   ret <4 x float> %res
     15 }
     16 
     17 define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
     18 ; FMA4-LABEL: test_x86_fmadd_aba_ss:
     19 ; FMA4:       # %bb.0:
     20 ; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     21 ; FMA4-NEXT:    vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
     22 ; FMA4-NEXT:    retq
     23   %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
     24   ret <4 x float> %res
     25 }
     26 
     27 define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
     28 ; FMA4-LABEL: test_x86_fmadd_bba_ss:
     29 ; FMA4:       # %bb.0:
     30 ; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     31 ; FMA4-NEXT:    vfmaddss (%rcx), %xmm0, %xmm0, %xmm0
     32 ; FMA4-NEXT:    retq
     33   %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
     34   ret <4 x float> %res
     35 }
     36 
     37 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
     38 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
     39 ; FMA4-LABEL: test_x86_fmadd_baa_ps:
     40 ; FMA4:       # %bb.0:
     41 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
     42 ; FMA4-NEXT:    vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
     43 ; FMA4-NEXT:    retq
     44   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
     45   ret <4 x float> %res
     46 }
     47 
     48 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
     49 ; FMA4-LABEL: test_x86_fmadd_aba_ps:
     50 ; FMA4:       # %bb.0:
     51 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
     52 ; FMA4-NEXT:    vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
     53 ; FMA4-NEXT:    retq
     54   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
     55   ret <4 x float> %res
     56 }
     57 
     58 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
     59 ; FMA4-LABEL: test_x86_fmadd_bba_ps:
     60 ; FMA4:       # %bb.0:
     61 ; FMA4-NEXT:    vmovaps (%rdx), %xmm0
     62 ; FMA4-NEXT:    vfmaddps (%rcx), %xmm0, %xmm0, %xmm0
     63 ; FMA4-NEXT:    retq
     64   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
     65   ret <4 x float> %res
     66 }
     67 
     68 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
     69 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
     70 ; FMA4-LABEL: test_x86_fmadd_baa_ps_y:
     71 ; FMA4:       # %bb.0:
     72 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
     73 ; FMA4-NEXT:    vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
     74 ; FMA4-NEXT:    retq
     75   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
     76   ret <8 x float> %res
     77 }
     78 
     79 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
     80 ; FMA4-LABEL: test_x86_fmadd_aba_ps_y:
     81 ; FMA4:       # %bb.0:
     82 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
     83 ; FMA4-NEXT:    vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
     84 ; FMA4-NEXT:    retq
     85   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
     86   ret <8 x float> %res
     87 }
     88 
     89 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
     90 ; FMA4-LABEL: test_x86_fmadd_bba_ps_y:
     91 ; FMA4:       # %bb.0:
     92 ; FMA4-NEXT:    vmovaps (%rdx), %ymm0
     93 ; FMA4-NEXT:    vfmaddps (%rcx), %ymm0, %ymm0, %ymm0
     94 ; FMA4-NEXT:    retq
     95   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
     96   ret <8 x float> %res
     97 }
     98 
     99 declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    100 define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
    101 ; FMA4-LABEL: test_x86_fmadd_baa_sd:
    102 ; FMA4:       # %bb.0:
    103 ; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    104 ; FMA4-NEXT:    vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
    105 ; FMA4-NEXT:    retq
    106   %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    107   ret <2 x double> %res
    108 }
    109 
    110 define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
    111 ; FMA4-LABEL: test_x86_fmadd_aba_sd:
    112 ; FMA4:       # %bb.0:
    113 ; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    114 ; FMA4-NEXT:    vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
    115 ; FMA4-NEXT:    retq
    116   %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    117   ret <2 x double> %res
    118 }
    119 
    120 define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
    121 ; FMA4-LABEL: test_x86_fmadd_bba_sd:
    122 ; FMA4:       # %bb.0:
    123 ; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    124 ; FMA4-NEXT:    vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0
    125 ; FMA4-NEXT:    retq
    126   %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    127   ret <2 x double> %res
    128 }
    129 
    130 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    131 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
    132 ; FMA4-LABEL: test_x86_fmadd_baa_pd:
    133 ; FMA4:       # %bb.0:
    134 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
    135 ; FMA4-NEXT:    vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
    136 ; FMA4-NEXT:    retq
    137   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    138   ret <2 x double> %res
    139 }
    140 
    141 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
    142 ; FMA4-LABEL: test_x86_fmadd_aba_pd:
    143 ; FMA4:       # %bb.0:
    144 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
    145 ; FMA4-NEXT:    vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
    146 ; FMA4-NEXT:    retq
    147   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    148   ret <2 x double> %res
    149 }
    150 
    151 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
    152 ; FMA4-LABEL: test_x86_fmadd_bba_pd:
    153 ; FMA4:       # %bb.0:
    154 ; FMA4-NEXT:    vmovapd (%rdx), %xmm0
    155 ; FMA4-NEXT:    vfmaddpd (%rcx), %xmm0, %xmm0, %xmm0
    156 ; FMA4-NEXT:    retq
    157   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    158   ret <2 x double> %res
    159 }
    160 
    161 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
    162 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    163 ; FMA4-LABEL: test_x86_fmadd_baa_pd_y:
    164 ; FMA4:       # %bb.0:
    165 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
    166 ; FMA4-NEXT:    vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
    167 ; FMA4-NEXT:    retq
    168   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
    169   ret <4 x double> %res
    170 }
    171 
    172 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    173 ; FMA4-LABEL: test_x86_fmadd_aba_pd_y:
    174 ; FMA4:       # %bb.0:
    175 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
    176 ; FMA4-NEXT:    vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
    177 ; FMA4-NEXT:    retq
    178   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
    179   ret <4 x double> %res
    180 }
    181 
    182 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    183 ; FMA4-LABEL: test_x86_fmadd_bba_pd_y:
    184 ; FMA4:       # %bb.0:
    185 ; FMA4-NEXT:    vmovapd (%rdx), %ymm0
    186 ; FMA4-NEXT:    vfmaddpd (%rcx), %ymm0, %ymm0, %ymm0
    187 ; FMA4-NEXT:    retq
    188   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
    189   ret <4 x double> %res
    190 }
    191 
    192 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    193 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
    194 ; FMA4-LABEL: test_x86_fnmadd_baa_ps:
    195 ; FMA4:       # %bb.0:
    196 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
    197 ; FMA4-NEXT:    vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
    198 ; FMA4-NEXT:    retq
    199   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
    200   ret <4 x float> %res
    201 }
    202 
    203 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
    204 ; FMA4-LABEL: test_x86_fnmadd_aba_ps:
    205 ; FMA4:       # %bb.0:
    206 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
    207 ; FMA4-NEXT:    vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
    208 ; FMA4-NEXT:    retq
    209   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
    210   ret <4 x float> %res
    211 }
    212 
    213 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
    214 ; FMA4-LABEL: test_x86_fnmadd_bba_ps:
    215 ; FMA4:       # %bb.0:
    216 ; FMA4-NEXT:    vmovaps (%rdx), %xmm0
    217 ; FMA4-NEXT:    vfnmaddps (%rcx), %xmm0, %xmm0, %xmm0
    218 ; FMA4-NEXT:    retq
    219   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
    220   ret <4 x float> %res
    221 }
    222 
    223 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
    224 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    225 ; FMA4-LABEL: test_x86_fnmadd_baa_ps_y:
    226 ; FMA4:       # %bb.0:
    227 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
    228 ; FMA4-NEXT:    vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
    229 ; FMA4-NEXT:    retq
    230   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
    231   ret <8 x float> %res
    232 }
    233 
    234 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    235 ; FMA4-LABEL: test_x86_fnmadd_aba_ps_y:
    236 ; FMA4:       # %bb.0:
    237 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
    238 ; FMA4-NEXT:    vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
    239 ; FMA4-NEXT:    retq
    240   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
    241   ret <8 x float> %res
    242 }
    243 
    244 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    245 ; FMA4-LABEL: test_x86_fnmadd_bba_ps_y:
    246 ; FMA4:       # %bb.0:
    247 ; FMA4-NEXT:    vmovaps (%rdx), %ymm0
    248 ; FMA4-NEXT:    vfnmaddps (%rcx), %ymm0, %ymm0, %ymm0
    249 ; FMA4-NEXT:    retq
    250   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
    251   ret <8 x float> %res
    252 }
    253 
    254 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    255 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
    256 ; FMA4-LABEL: test_x86_fnmadd_baa_pd:
    257 ; FMA4:       # %bb.0:
    258 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
    259 ; FMA4-NEXT:    vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
    260 ; FMA4-NEXT:    retq
    261   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    262   ret <2 x double> %res
    263 }
    264 
    265 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
    266 ; FMA4-LABEL: test_x86_fnmadd_aba_pd:
    267 ; FMA4:       # %bb.0:
    268 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
    269 ; FMA4-NEXT:    vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
    270 ; FMA4-NEXT:    retq
    271   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    272   ret <2 x double> %res
    273 }
    274 
    275 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
    276 ; FMA4-LABEL: test_x86_fnmadd_bba_pd:
    277 ; FMA4:       # %bb.0:
    278 ; FMA4-NEXT:    vmovapd (%rdx), %xmm0
    279 ; FMA4-NEXT:    vfnmaddpd (%rcx), %xmm0, %xmm0, %xmm0
    280 ; FMA4-NEXT:    retq
    281   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    282   ret <2 x double> %res
    283 }
    284 
    285 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
    286 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    287 ; FMA4-LABEL: test_x86_fnmadd_baa_pd_y:
    288 ; FMA4:       # %bb.0:
    289 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
    290 ; FMA4-NEXT:    vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
    291 ; FMA4-NEXT:    retq
    292   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
    293   ret <4 x double> %res
    294 }
    295 
    296 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    297 ; FMA4-LABEL: test_x86_fnmadd_aba_pd_y:
    298 ; FMA4:       # %bb.0:
    299 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
    300 ; FMA4-NEXT:    vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
    301 ; FMA4-NEXT:    retq
    302   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
    303   ret <4 x double> %res
    304 }
    305 
    306 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    307 ; FMA4-LABEL: test_x86_fnmadd_bba_pd_y:
    308 ; FMA4:       # %bb.0:
    309 ; FMA4-NEXT:    vmovapd (%rdx), %ymm0
    310 ; FMA4-NEXT:    vfnmaddpd (%rcx), %ymm0, %ymm0, %ymm0
    311 ; FMA4-NEXT:    retq
    312   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
    313   ret <4 x double> %res
    314 }
    315 
    316 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    317 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
    318 ; FMA4-LABEL: test_x86_fmsub_baa_ps:
    319 ; FMA4:       # %bb.0:
    320 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
    321 ; FMA4-NEXT:    vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
    322 ; FMA4-NEXT:    retq
    323   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
    324   ret <4 x float> %res
    325 }
    326 
    327 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
    328 ; FMA4-LABEL: test_x86_fmsub_aba_ps:
    329 ; FMA4:       # %bb.0:
    330 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
    331 ; FMA4-NEXT:    vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
    332 ; FMA4-NEXT:    retq
    333   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
    334   ret <4 x float> %res
    335 }
    336 
    337 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
    338 ; FMA4-LABEL: test_x86_fmsub_bba_ps:
    339 ; FMA4:       # %bb.0:
    340 ; FMA4-NEXT:    vmovaps (%rdx), %xmm0
    341 ; FMA4-NEXT:    vfmsubps (%rcx), %xmm0, %xmm0, %xmm0
    342 ; FMA4-NEXT:    retq
    343   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
    344   ret <4 x float> %res
    345 }
    346 
    347 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
    348 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    349 ; FMA4-LABEL: test_x86_fmsub_baa_ps_y:
    350 ; FMA4:       # %bb.0:
    351 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
    352 ; FMA4-NEXT:    vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
    353 ; FMA4-NEXT:    retq
    354   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
    355   ret <8 x float> %res
    356 }
    357 
    358 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    359 ; FMA4-LABEL: test_x86_fmsub_aba_ps_y:
    360 ; FMA4:       # %bb.0:
    361 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
    362 ; FMA4-NEXT:    vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
    363 ; FMA4-NEXT:    retq
    364   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
    365   ret <8 x float> %res
    366 }
    367 
    368 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    369 ; FMA4-LABEL: test_x86_fmsub_bba_ps_y:
    370 ; FMA4:       # %bb.0:
    371 ; FMA4-NEXT:    vmovaps (%rdx), %ymm0
    372 ; FMA4-NEXT:    vfmsubps (%rcx), %ymm0, %ymm0, %ymm0
    373 ; FMA4-NEXT:    retq
    374   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
    375   ret <8 x float> %res
    376 }
    377 
    378 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    379 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
    380 ; FMA4-LABEL: test_x86_fmsub_baa_pd:
    381 ; FMA4:       # %bb.0:
    382 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
    383 ; FMA4-NEXT:    vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
    384 ; FMA4-NEXT:    retq
    385   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    386   ret <2 x double> %res
    387 }
    388 
    389 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
    390 ; FMA4-LABEL: test_x86_fmsub_aba_pd:
    391 ; FMA4:       # %bb.0:
    392 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
    393 ; FMA4-NEXT:    vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
    394 ; FMA4-NEXT:    retq
    395   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    396   ret <2 x double> %res
    397 }
    398 
    399 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
    400 ; FMA4-LABEL: test_x86_fmsub_bba_pd:
    401 ; FMA4:       # %bb.0:
    402 ; FMA4-NEXT:    vmovapd (%rdx), %xmm0
    403 ; FMA4-NEXT:    vfmsubpd (%rcx), %xmm0, %xmm0, %xmm0
    404 ; FMA4-NEXT:    retq
    405   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    406   ret <2 x double> %res
    407 }
    408 
    409 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
    410 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    411 ; FMA4-LABEL: test_x86_fmsub_baa_pd_y:
    412 ; FMA4:       # %bb.0:
    413 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
    414 ; FMA4-NEXT:    vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
    415 ; FMA4-NEXT:    retq
    416   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
    417   ret <4 x double> %res
    418 }
    419 
    420 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    421 ; FMA4-LABEL: test_x86_fmsub_aba_pd_y:
    422 ; FMA4:       # %bb.0:
    423 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
    424 ; FMA4-NEXT:    vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
    425 ; FMA4-NEXT:    retq
    426   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
    427   ret <4 x double> %res
    428 }
    429 
    430 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    431 ; FMA4-LABEL: test_x86_fmsub_bba_pd_y:
    432 ; FMA4:       # %bb.0:
    433 ; FMA4-NEXT:    vmovapd (%rdx), %ymm0
    434 ; FMA4-NEXT:    vfmsubpd (%rcx), %ymm0, %ymm0, %ymm0
    435 ; FMA4-NEXT:    retq
    436   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
    437   ret <4 x double> %res
    438 }
    439 
    440 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    441 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
    442 ; FMA4-LABEL: test_x86_fnmsub_baa_ps:
    443 ; FMA4:       # %bb.0:
    444 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
    445 ; FMA4-NEXT:    vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
    446 ; FMA4-NEXT:    retq
    447   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
    448   ret <4 x float> %res
    449 }
    450 
    451 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
    452 ; FMA4-LABEL: test_x86_fnmsub_aba_ps:
    453 ; FMA4:       # %bb.0:
    454 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
    455 ; FMA4-NEXT:    vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
    456 ; FMA4-NEXT:    retq
    457   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
    458   ret <4 x float> %res
    459 }
    460 
    461 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
    462 ; FMA4-LABEL: test_x86_fnmsub_bba_ps:
    463 ; FMA4:       # %bb.0:
    464 ; FMA4-NEXT:    vmovaps (%rdx), %xmm0
    465 ; FMA4-NEXT:    vfnmsubps (%rcx), %xmm0, %xmm0, %xmm0
    466 ; FMA4-NEXT:    retq
    467   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
    468   ret <4 x float> %res
    469 }
    470 
    471 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
    472 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    473 ; FMA4-LABEL: test_x86_fnmsub_baa_ps_y:
    474 ; FMA4:       # %bb.0:
    475 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
    476 ; FMA4-NEXT:    vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
    477 ; FMA4-NEXT:    retq
    478   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
    479   ret <8 x float> %res
    480 }
    481 
    482 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    483 ; FMA4-LABEL: test_x86_fnmsub_aba_ps_y:
    484 ; FMA4:       # %bb.0:
    485 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
    486 ; FMA4-NEXT:    vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
    487 ; FMA4-NEXT:    retq
    488   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
    489   ret <8 x float> %res
    490 }
    491 
    492 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
    493 ; FMA4-LABEL: test_x86_fnmsub_bba_ps_y:
    494 ; FMA4:       # %bb.0:
    495 ; FMA4-NEXT:    vmovaps (%rdx), %ymm0
    496 ; FMA4-NEXT:    vfnmsubps (%rcx), %ymm0, %ymm0, %ymm0
    497 ; FMA4-NEXT:    retq
    498   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
    499   ret <8 x float> %res
    500 }
    501 
    502 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    503 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
    504 ; FMA4-LABEL: test_x86_fnmsub_baa_pd:
    505 ; FMA4:       # %bb.0:
    506 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
    507 ; FMA4-NEXT:    vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
    508 ; FMA4-NEXT:    retq
    509   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
    510   ret <2 x double> %res
    511 }
    512 
    513 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
    514 ; FMA4-LABEL: test_x86_fnmsub_aba_pd:
    515 ; FMA4:       # %bb.0:
    516 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
    517 ; FMA4-NEXT:    vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
    518 ; FMA4-NEXT:    retq
    519   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
    520   ret <2 x double> %res
    521 }
    522 
    523 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
    524 ; FMA4-LABEL: test_x86_fnmsub_bba_pd:
    525 ; FMA4:       # %bb.0:
    526 ; FMA4-NEXT:    vmovapd (%rdx), %xmm0
    527 ; FMA4-NEXT:    vfnmsubpd (%rcx), %xmm0, %xmm0, %xmm0
    528 ; FMA4-NEXT:    retq
    529   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
    530   ret <2 x double> %res
    531 }
    532 
    533 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
    534 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    535 ; FMA4-LABEL: test_x86_fnmsub_baa_pd_y:
    536 ; FMA4:       # %bb.0:
    537 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
    538 ; FMA4-NEXT:    vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
    539 ; FMA4-NEXT:    retq
    540   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
    541   ret <4 x double> %res
    542 }
    543 
    544 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    545 ; FMA4-LABEL: test_x86_fnmsub_aba_pd_y:
    546 ; FMA4:       # %bb.0:
    547 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
    548 ; FMA4-NEXT:    vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
    549 ; FMA4-NEXT:    retq
    550   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
    551   ret <4 x double> %res
    552 }
    553 
    554 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
    555 ; FMA4-LABEL: test_x86_fnmsub_bba_pd_y:
    556 ; FMA4:       # %bb.0:
    557 ; FMA4-NEXT:    vmovapd (%rdx), %ymm0
    558 ; FMA4-NEXT:    vfnmsubpd (%rcx), %ymm0, %ymm0, %ymm0
    559 ; FMA4-NEXT:    retq
    560   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
    561   ret <4 x double> %res
    562 }
    563 
    564