Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
      4 
      5 ; VFMADD
      6 define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
      7 ; CHECK-LABEL: test_x86_fma4_vfmadd_ss:
      8 ; CHECK:       # %bb.0:
      9 ; CHECK-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
     10 ; CHECK-NEXT:    retq # encoding: [0xc3]
     11   %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
     12   ret <4 x float> %res
     13 }
     14 
     15 define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
     16 ; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss:
     17 ; CHECK:       # %bb.0:
     18 ; CHECK-NEXT:    vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00]
     19 ; CHECK-NEXT:    retq # encoding: [0xc3]
     20   %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
     21   ret <4 x float> %res
     22 }
     23 declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
     24 
     25 define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
     26 ; CHECK-LABEL: test_x86_fma4_vfmadd_sd:
     27 ; CHECK:       # %bb.0:
     28 ; CHECK-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
     29 ; CHECK-NEXT:    retq # encoding: [0xc3]
     30   %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
     31   ret <2 x double> %res
     32 }
     33 
     34 define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
     35 ; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd:
     36 ; CHECK:       # %bb.0:
     37 ; CHECK-NEXT:    vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00]
     38 ; CHECK-NEXT:    retq # encoding: [0xc3]
     39   %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
     40   ret <2 x double> %res
     41 }
     42 declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
     43 
     44 define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
     45 ; CHECK-LABEL: test_x86_fma_vfmadd_ps:
     46 ; CHECK:       # %bb.0:
     47 ; CHECK-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
     48 ; CHECK-NEXT:    retq # encoding: [0xc3]
     49   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
     50   ret <4 x float> %1
     51 }
     52 
     53 define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
     54 ; CHECK-LABEL: test_x86_fma_vfmadd_pd:
     55 ; CHECK:       # %bb.0:
     56 ; CHECK-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
     57 ; CHECK-NEXT:    retq # encoding: [0xc3]
     58   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
     59   ret <2 x double> %1
     60 }
     61 
     62 define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
     63 ; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
     64 ; CHECK:       # %bb.0:
     65 ; CHECK-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
     66 ; CHECK-NEXT:    retq # encoding: [0xc3]
     67   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
     68   ret <8 x float> %1
     69 }
     70 
     71 define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
     72 ; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
     73 ; CHECK:       # %bb.0:
     74 ; CHECK-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
     75 ; CHECK-NEXT:    retq # encoding: [0xc3]
     76   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
     77   ret <4 x double> %1
     78 }
     79 
     80 ; VFMSUB
     81 define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
     82 ; CHECK-LABEL: test_x86_fma_vfmsub_ps:
     83 ; CHECK:       # %bb.0:
     84 ; CHECK-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
     85 ; CHECK-NEXT:    retq # encoding: [0xc3]
     86   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
     87   %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1)
     88   ret <4 x float> %2
     89 }
     90 
     91 define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
     92 ; CHECK-LABEL: test_x86_fma_vfmsub_pd:
     93 ; CHECK:       # %bb.0:
     94 ; CHECK-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
     95 ; CHECK-NEXT:    retq # encoding: [0xc3]
     96   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
     97   %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1)
     98   ret <2 x double> %2
     99 }
    100 
    101 define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
    102 ; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
    103 ; CHECK:       # %bb.0:
    104 ; CHECK-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
    105 ; CHECK-NEXT:    retq # encoding: [0xc3]
    106   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
    107   %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1)
    108   ret <8 x float> %2
    109 }
    110 
    111 define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
    112 ; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
    113 ; CHECK:       # %bb.0:
    114 ; CHECK-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
    115 ; CHECK-NEXT:    retq # encoding: [0xc3]
    116   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
    117   %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1)
    118   ret <4 x double> %2
    119 }
    120 
    121 ; VFNMADD
    122 define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
    123 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
    124 ; CHECK:       # %bb.0:
    125 ; CHECK-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
    126 ; CHECK-NEXT:    retq # encoding: [0xc3]
    127   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
    128   %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2)
    129   ret <4 x float> %2
    130 }
    131 
    132 define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
    133 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
    134 ; CHECK:       # %bb.0:
    135 ; CHECK-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
    136 ; CHECK-NEXT:    retq # encoding: [0xc3]
    137   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
    138   %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2)
    139   ret <2 x double> %2
    140 }
    141 
    142 define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
    143 ; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
    144 ; CHECK:       # %bb.0:
    145 ; CHECK-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
    146 ; CHECK-NEXT:    retq # encoding: [0xc3]
    147   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
    148   %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2)
    149   ret <8 x float> %2
    150 }
    151 
    152 define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
    153 ; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
    154 ; CHECK:       # %bb.0:
    155 ; CHECK-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
    156 ; CHECK-NEXT:    retq # encoding: [0xc3]
    157   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
    158   %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2)
    159   ret <4 x double> %2
    160 }
    161 
    162 ; VFNMSUB
    163 define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
    164 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
    165 ; CHECK:       # %bb.0:
    166 ; CHECK-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
    167 ; CHECK-NEXT:    retq # encoding: [0xc3]
    168   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
    169   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
    170   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2)
    171   ret <4 x float> %3
    172 }
    173 
    174 define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
    175 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
    176 ; CHECK:       # %bb.0:
    177 ; CHECK-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
    178 ; CHECK-NEXT:    retq # encoding: [0xc3]
    179   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0
    180   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
    181   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2)
    182   ret <2 x double> %3
    183 }
    184 
    185 define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
    186 ; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
    187 ; CHECK:       # %bb.0:
    188 ; CHECK-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
    189 ; CHECK-NEXT:    retq # encoding: [0xc3]
    190   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0
    191   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
    192   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2)
    193   ret <8 x float> %3
    194 }
    195 
    196 define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
    197 ; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
    198 ; CHECK:       # %bb.0:
    199 ; CHECK-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
    200 ; CHECK-NEXT:    retq # encoding: [0xc3]
    201   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0
    202   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
    203   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2)
    204   ret <4 x double> %3
    205 }
    206 
    207 ; VFMADDSUB
    208 define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
    209 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
    210 ; CHECK:       # %bb.0:
    211 ; CHECK-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
    212 ; CHECK-NEXT:    retq # encoding: [0xc3]
    213   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
    214   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
    215   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
    216   %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    217   ret <4 x float> %4
    218 }
    219 
    220 define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
    221 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
    222 ; CHECK:       # %bb.0:
    223 ; CHECK-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
    224 ; CHECK-NEXT:    retq # encoding: [0xc3]
    225   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
    226   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
    227   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
    228   %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3>
    229   ret <2 x double> %4
    230 }
    231 
    232 define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
    233 ; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
    234 ; CHECK:       # %bb.0:
    235 ; CHECK-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
    236 ; CHECK-NEXT:    retq # encoding: [0xc3]
    237   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
    238   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
    239   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
    240   %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    241   ret <8 x float> %4
    242 }
    243 
    244 define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
    245 ; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
    246 ; CHECK:       # %bb.0:
    247 ; CHECK-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
    248 ; CHECK-NEXT:    retq # encoding: [0xc3]
    249   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
    250   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
    251   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
    252   %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    253   ret <4 x double> %4
    254 }
    255 
    256 ; VFMSUBADD
    257 define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
    258 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
    259 ; CHECK:       # %bb.0:
    260 ; CHECK-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
    261 ; CHECK-NEXT:    retq # encoding: [0xc3]
    262   %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
    263   %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
    264   %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2)
    265   %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    266   ret <4 x float> %4
    267 }
    268 
    269 define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
    270 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
    271 ; CHECK:       # %bb.0:
    272 ; CHECK-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
    273 ; CHECK-NEXT:    retq # encoding: [0xc3]
    274   %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
    275   %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2
    276   %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2)
    277   %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3>
    278   ret <2 x double> %4
    279 }
    280 
    281 define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
    282 ; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
    283 ; CHECK:       # %bb.0:
    284 ; CHECK-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
    285 ; CHECK-NEXT:    retq # encoding: [0xc3]
    286   %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
    287   %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
    288   %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2)
    289   %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    290   ret <8 x float> %4
    291 }
    292 
    293 define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
    294 ; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
    295 ; CHECK:       # %bb.0:
    296 ; CHECK-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
    297 ; CHECK-NEXT:    retq # encoding: [0xc3]
    298   %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
    299   %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
    300   %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2)
    301   %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    302   ret <4 x double> %4
    303 }
    304 
    305 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #2
    306 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #2
    307 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #2
    308 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #2
    309 
    310 attributes #0 = { nounwind }
    311