Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK
      3 
      4 define <4 x float> @test_mm_fmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
      5 ; CHECK-LABEL: test_mm_fmadd_ps:
      6 ; CHECK:       # %bb.0: # %entry
      7 ; CHECK-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
      8 ; CHECK-NEXT:    retq
      9 entry:
     10   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
     11   ret <4 x float> %0
     12 }
     13 
     14 define <2 x double> @test_mm_fmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
     15 ; CHECK-LABEL: test_mm_fmadd_pd:
     16 ; CHECK:       # %bb.0: # %entry
     17 ; CHECK-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
     18 ; CHECK-NEXT:    retq
     19 entry:
     20   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
     21   ret <2 x double> %0
     22 }
     23 
     24 define <4 x float> @test_mm_fmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
     25 ; CHECK-LABEL: test_mm_fmadd_ss:
     26 ; CHECK:       # %bb.0: # %entry
     27 ; CHECK-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
     28 ; CHECK-NEXT:    retq
     29 entry:
     30   %0 = extractelement <4 x float> %a, i64 0
     31   %1 = extractelement <4 x float> %b, i64 0
     32   %2 = extractelement <4 x float> %c, i64 0
     33   %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2
     34   %4 = insertelement <4 x float> %a, float %3, i64 0
     35   ret <4 x float> %4
     36 }
     37 
     38 define <2 x double> @test_mm_fmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
     39 ; CHECK-LABEL: test_mm_fmadd_sd:
     40 ; CHECK:       # %bb.0: # %entry
     41 ; CHECK-NEXT:    vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
     42 ; CHECK-NEXT:    retq
     43 entry:
     44   %0 = extractelement <2 x double> %a, i64 0
     45   %1 = extractelement <2 x double> %b, i64 0
     46   %2 = extractelement <2 x double> %c, i64 0
     47   %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2
     48   %4 = insertelement <2 x double> %a, double %3, i64 0
     49   ret <2 x double> %4
     50 }
     51 
     52 define <4 x float> @test_mm_fmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
     53 ; CHECK-LABEL: test_mm_fmsub_ps:
     54 ; CHECK:       # %bb.0: # %entry
     55 ; CHECK-NEXT:    vxorps {{.*}}(%rip), %xmm2, %xmm2
     56 ; CHECK-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
     57 ; CHECK-NEXT:    retq
     58 entry:
     59   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
     60   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2
     61   ret <4 x float> %0
     62 }
     63 
     64 define <2 x double> @test_mm_fmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
     65 ; CHECK-LABEL: test_mm_fmsub_pd:
     66 ; CHECK:       # %bb.0: # %entry
     67 ; CHECK-NEXT:    vxorpd {{.*}}(%rip), %xmm2, %xmm2
     68 ; CHECK-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
     69 ; CHECK-NEXT:    retq
     70 entry:
     71   %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
     72   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2
     73   ret <2 x double> %0
     74 }
     75 
     76 define <4 x float> @test_mm_fmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
     77 ; CHECK-LABEL: test_mm_fmsub_ss:
     78 ; CHECK:       # %bb.0: # %entry
     79 ; CHECK-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
     80 ; CHECK-NEXT:    retq
     81 entry:
     82   %0 = extractelement <4 x float> %a, i64 0
     83   %1 = extractelement <4 x float> %b, i64 0
     84   %.rhs.i = extractelement <4 x float> %c, i64 0
     85   %2 = fsub float -0.000000e+00, %.rhs.i
     86   %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2
     87   %4 = insertelement <4 x float> %a, float %3, i64 0
     88   ret <4 x float> %4
     89 }
     90 
     91 define <2 x double> @test_mm_fmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
     92 ; CHECK-LABEL: test_mm_fmsub_sd:
     93 ; CHECK:       # %bb.0: # %entry
     94 ; CHECK-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
     95 ; CHECK-NEXT:    retq
     96 entry:
     97   %0 = extractelement <2 x double> %a, i64 0
     98   %1 = extractelement <2 x double> %b, i64 0
     99   %.rhs.i = extractelement <2 x double> %c, i64 0
    100   %2 = fsub double -0.000000e+00, %.rhs.i
    101   %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2
    102   %4 = insertelement <2 x double> %a, double %3, i64 0
    103   ret <2 x double> %4
    104 }
    105 
    106 define <4 x float> @test_mm_fnmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
    107 ; CHECK-LABEL: test_mm_fnmadd_ps:
    108 ; CHECK:       # %bb.0: # %entry
    109 ; CHECK-NEXT:    vxorps {{.*}}(%rip), %xmm0, %xmm0
    110 ; CHECK-NEXT:    vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
    111 ; CHECK-NEXT:    retq
    112 entry:
    113   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
    114   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c) #2
    115   ret <4 x float> %0
    116 }
    117 
    118 define <2 x double> @test_mm_fnmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
    119 ; CHECK-LABEL: test_mm_fnmadd_pd:
    120 ; CHECK:       # %bb.0: # %entry
    121 ; CHECK-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
    122 ; CHECK-NEXT:    vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
    123 ; CHECK-NEXT:    retq
    124 entry:
    125   %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
    126   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c) #2
    127   ret <2 x double> %0
    128 }
    129 
    130 define <4 x float> @test_mm_fnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
    131 ; CHECK-LABEL: test_mm_fnmadd_ss:
    132 ; CHECK:       # %bb.0: # %entry
    133 ; CHECK-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
    134 ; CHECK-NEXT:    retq
    135 entry:
    136   %0 = extractelement <4 x float> %a, i64 0
    137   %.rhs.i = extractelement <4 x float> %b, i64 0
    138   %1 = fsub float -0.000000e+00, %.rhs.i
    139   %2 = extractelement <4 x float> %c, i64 0
    140   %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2
    141   %4 = insertelement <4 x float> %a, float %3, i64 0
    142   ret <4 x float> %4
    143 }
    144 
    145 define <2 x double> @test_mm_fnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
    146 ; CHECK-LABEL: test_mm_fnmadd_sd:
    147 ; CHECK:       # %bb.0: # %entry
    148 ; CHECK-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
    149 ; CHECK-NEXT:    retq
    150 entry:
    151   %0 = extractelement <2 x double> %a, i64 0
    152   %.rhs.i = extractelement <2 x double> %b, i64 0
    153   %1 = fsub double -0.000000e+00, %.rhs.i
    154   %2 = extractelement <2 x double> %c, i64 0
    155   %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2
    156   %4 = insertelement <2 x double> %a, double %3, i64 0
    157   ret <2 x double> %4
    158 }
    159 
    160 define <4 x float> @test_mm_fnmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
    161 ; CHECK-LABEL: test_mm_fnmsub_ps:
    162 ; CHECK:       # %bb.0: # %entry
    163 ; CHECK-NEXT:    vmovaps {{.*#+}} xmm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
    164 ; CHECK-NEXT:    vxorps %xmm3, %xmm0, %xmm4
    165 ; CHECK-NEXT:    vxorps %xmm3, %xmm2, %xmm0
    166 ; CHECK-NEXT:    vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0
    167 ; CHECK-NEXT:    retq
    168 entry:
    169   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
    170   %sub1.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    171   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %sub1.i) #2
    172   ret <4 x float> %0
    173 }
    174 
    175 define <2 x double> @test_mm_fnmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
    176 ; CHECK-LABEL: test_mm_fnmsub_pd:
    177 ; CHECK:       # %bb.0: # %entry
    178 ; CHECK-NEXT:    vmovapd {{.*#+}} xmm3 = [-0.000000e+00,-0.000000e+00]
    179 ; CHECK-NEXT:    vxorpd %xmm3, %xmm0, %xmm4
    180 ; CHECK-NEXT:    vxorpd %xmm3, %xmm2, %xmm0
    181 ; CHECK-NEXT:    vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm4) + xmm0
    182 ; CHECK-NEXT:    retq
    183 entry:
    184   %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
    185   %sub1.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
    186   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %sub1.i) #2
    187   ret <2 x double> %0
    188 }
    189 
    190 define <4 x float> @test_mm_fnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
    191 ; CHECK-LABEL: test_mm_fnmsub_ss:
    192 ; CHECK:       # %bb.0: # %entry
    193 ; CHECK-NEXT:    vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
    194 ; CHECK-NEXT:    retq
    195 entry:
    196   %0 = extractelement <4 x float> %a, i64 0
    197   %.rhs.i = extractelement <4 x float> %b, i64 0
    198   %1 = fsub float -0.000000e+00, %.rhs.i
    199   %.rhs2.i = extractelement <4 x float> %c, i64 0
    200   %2 = fsub float -0.000000e+00, %.rhs2.i
    201   %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2
    202   %4 = insertelement <4 x float> %a, float %3, i64 0
    203   ret <4 x float> %4
    204 }
    205 
    206 define <2 x double> @test_mm_fnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
    207 ; CHECK-LABEL: test_mm_fnmsub_sd:
    208 ; CHECK:       # %bb.0: # %entry
    209 ; CHECK-NEXT:    vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
    210 ; CHECK-NEXT:    retq
    211 entry:
    212   %0 = extractelement <2 x double> %a, i64 0
    213   %.rhs.i = extractelement <2 x double> %b, i64 0
    214   %1 = fsub double -0.000000e+00, %.rhs.i
    215   %.rhs2.i = extractelement <2 x double> %c, i64 0
    216   %2 = fsub double -0.000000e+00, %.rhs2.i
    217   %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2
    218   %4 = insertelement <2 x double> %a, double %3, i64 0
    219   ret <2 x double> %4
    220 }
    221 
    222 define <4 x float> @test_mm_fmaddsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
    223 ; CHECK-LABEL: test_mm_fmaddsub_ps:
    224 ; CHECK:       # %bb.0: # %entry
    225 ; CHECK-NEXT:    vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2
    226 ; CHECK-NEXT:    retq
    227 entry:
    228   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
    229   %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    230   %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %1) #2
    231   %3 = shufflevector <4 x float> %2, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    232   ret <4 x float> %3
    233 }
    234 
    235 define <2 x double> @test_mm_fmaddsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
    236 ; CHECK-LABEL: test_mm_fmaddsub_pd:
    237 ; CHECK:       # %bb.0: # %entry
    238 ; CHECK-NEXT:    vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2
    239 ; CHECK-NEXT:    retq
    240 entry:
    241   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
    242   %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
    243   %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %1) #2
    244   %3 = shufflevector <2 x double> %2, <2 x double> %0, <2 x i32> <i32 0, i32 3>
    245   ret <2 x double> %3
    246 }
    247 
    248 define <4 x float> @test_mm_fmsubadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
    249 ; CHECK-LABEL: test_mm_fmsubadd_ps:
    250 ; CHECK:       # %bb.0: # %entry
    251 ; CHECK-NEXT:    vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2
    252 ; CHECK-NEXT:    retq
    253 entry:
    254   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    255   %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2
    256   %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
    257   %2 = shufflevector <4 x float> %1, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    258   ret <4 x float> %2
    259 }
    260 
    261 define <2 x double> @test_mm_fmsubadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
    262 ; CHECK-LABEL: test_mm_fmsubadd_pd:
    263 ; CHECK:       # %bb.0: # %entry
    264 ; CHECK-NEXT:    vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2
    265 ; CHECK-NEXT:    retq
    266 entry:
    267   %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
    268   %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2
    269   %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
    270   %2 = shufflevector <2 x double> %1, <2 x double> %0, <2 x i32> <i32 0, i32 3>
    271   ret <2 x double> %2
    272 }
    273 
    274 define <8 x float> @test_mm256_fmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
    275 ; CHECK-LABEL: test_mm256_fmadd_ps:
    276 ; CHECK:       # %bb.0: # %entry
    277 ; CHECK-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
    278 ; CHECK-NEXT:    retq
    279 entry:
    280   %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
    281   ret <8 x float> %0
    282 }
    283 
    284 define <4 x double> @test_mm256_fmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
    285 ; CHECK-LABEL: test_mm256_fmadd_pd:
    286 ; CHECK:       # %bb.0: # %entry
    287 ; CHECK-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
    288 ; CHECK-NEXT:    retq
    289 entry:
    290   %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2
    291   ret <4 x double> %0
    292 }
    293 
    294 define <8 x float> @test_mm256_fmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
    295 ; CHECK-LABEL: test_mm256_fmsub_ps:
    296 ; CHECK:       # %bb.0: # %entry
    297 ; CHECK-NEXT:    vxorps {{.*}}(%rip), %ymm2, %ymm2
    298 ; CHECK-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
    299 ; CHECK-NEXT:    retq
    300 entry:
    301   %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    302   %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
    303   ret <8 x float> %0
    304 }
    305 
    306 define <4 x double> @test_mm256_fmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
    307 ; CHECK-LABEL: test_mm256_fmsub_pd:
    308 ; CHECK:       # %bb.0: # %entry
    309 ; CHECK-NEXT:    vxorpd {{.*}}(%rip), %ymm2, %ymm2
    310 ; CHECK-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
    311 ; CHECK-NEXT:    retq
    312 entry:
    313   %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
    314   %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2
    315   ret <4 x double> %0
    316 }
    317 
    318 define <8 x float> @test_mm256_fnmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
    319 ; CHECK-LABEL: test_mm256_fnmadd_ps:
    320 ; CHECK:       # %bb.0: # %entry
    321 ; CHECK-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
    322 ; CHECK-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
    323 ; CHECK-NEXT:    retq
    324 entry:
    325   %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
    326   %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %c) #2
    327   ret <8 x float> %0
    328 }
    329 
    330 define <4 x double> @test_mm256_fnmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
    331 ; CHECK-LABEL: test_mm256_fnmadd_pd:
    332 ; CHECK:       # %bb.0: # %entry
    333 ; CHECK-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
    334 ; CHECK-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
    335 ; CHECK-NEXT:    retq
    336 entry:
    337   %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a
    338   %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %c) #2
    339   ret <4 x double> %0
    340 }
    341 
    342 define <8 x float> @test_mm256_fnmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
    343 ; CHECK-LABEL: test_mm256_fnmsub_ps:
    344 ; CHECK:       # %bb.0: # %entry
    345 ; CHECK-NEXT:    vmovaps {{.*#+}} ymm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
    346 ; CHECK-NEXT:    vxorps %ymm3, %ymm0, %ymm4
    347 ; CHECK-NEXT:    vxorps %ymm3, %ymm2, %ymm0
    348 ; CHECK-NEXT:    vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0
    349 ; CHECK-NEXT:    retq
    350 entry:
    351   %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
    352   %sub1.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    353   %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %sub1.i) #2
    354   ret <8 x float> %0
    355 }
    356 
    357 define <4 x double> @test_mm256_fnmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
    358 ; CHECK-LABEL: test_mm256_fnmsub_pd:
    359 ; CHECK:       # %bb.0: # %entry
    360 ; CHECK-NEXT:    vmovapd {{.*#+}} ymm3 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
    361 ; CHECK-NEXT:    vxorpd %ymm3, %ymm0, %ymm4
    362 ; CHECK-NEXT:    vxorpd %ymm3, %ymm2, %ymm0
    363 ; CHECK-NEXT:    vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm4) + ymm0
    364 ; CHECK-NEXT:    retq
    365 entry:
    366   %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a
    367   %sub1.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
    368   %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %sub1.i) #2
    369   ret <4 x double> %0
    370 }
    371 
    372 define <8 x float> @test_mm256_fmaddsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
    373 ; CHECK-LABEL: test_mm256_fmaddsub_ps:
    374 ; CHECK:       # %bb.0: # %entry
    375 ; CHECK-NEXT:    vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2
    376 ; CHECK-NEXT:    retq
    377 entry:
    378   %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
    379   %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    380   %2 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %1) #2
    381   %3 = shufflevector <8 x float> %2, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    382   ret <8 x float> %3
    383 }
    384 
    385 define <4 x double> @test_mm256_fmaddsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
    386 ; CHECK-LABEL: test_mm256_fmaddsub_pd:
    387 ; CHECK:       # %bb.0: # %entry
    388 ; CHECK-NEXT:    vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2
    389 ; CHECK-NEXT:    retq
    390 entry:
    391   %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2
    392   %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
    393   %2 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %1) #2
    394   %3 = shufflevector <4 x double> %2, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    395   ret <4 x double> %3
    396 }
    397 
    398 define <8 x float> @test_mm256_fmsubadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
    399 ; CHECK-LABEL: test_mm256_fmsubadd_ps:
    400 ; CHECK:       # %bb.0: # %entry
    401 ; CHECK-NEXT:    vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2
    402 ; CHECK-NEXT:    retq
    403 entry:
    404   %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    405   %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
    406   %1 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
    407   %2 = shufflevector <8 x float> %1, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
    408   ret <8 x float> %2
    409 }
    410 
    411 define <4 x double> @test_mm256_fmsubadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
    412 ; CHECK-LABEL: test_mm256_fmsubadd_pd:
    413 ; CHECK:       # %bb.0: # %entry
    414 ; CHECK-NEXT:    vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2
    415 ; CHECK-NEXT:    retq
    416 entry:
    417   %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
    418   %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2
    419   %1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2
    420   %2 = shufflevector <4 x double> %1, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    421   ret <4 x double> %2
    422 }
    423 
    424 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
    425 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #1
    426 declare float @llvm.fma.f32(float, float, float) #1
    427 declare double @llvm.fma.f64(double, double, double) #1
    428 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #1
    429 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #1
    430