Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq  | FileCheck %s  --check-prefix=CHECK --check-prefix=SKX
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
      4 
      5 ; This test checks combinations of FNEG and FMA intrinsics on AVX-512 target
      6 ; PR28892
      7 
      8 define <16 x float> @test1(<16 x float> %a, <16 x float> %b, <16 x float> %c)  {
      9 ; CHECK-LABEL: test1:
     10 ; CHECK:       # %bb.0: # %entry
     11 ; CHECK-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
     12 ; CHECK-NEXT:    retq
     13 entry:
     14   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
     15   %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 4) #2
     16   ret <16 x float> %0
     17 }
     18 
     19 declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32)
     20 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
     21 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
     22 
     23 
     24 define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
     25 ; CHECK-LABEL: test2:
     26 ; CHECK:       # %bb.0: # %entry
     27 ; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
     28 ; CHECK-NEXT:    retq
     29 entry:
     30   %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i32 4) #2
     31   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
     32   ret <16 x float> %sub.i
     33 }
     34 
     35 define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c)  {
     36 ; CHECK-LABEL: test3:
     37 ; CHECK:       # %bb.0: # %entry
     38 ; CHECK-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
     39 ; CHECK-NEXT:    retq
     40 entry:
     41   %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
     42   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
     43   ret <16 x float> %sub.i
     44 }
     45 
     46 define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
     47 ; CHECK-LABEL: test4:
     48 ; CHECK:       # %bb.0: # %entry
     49 ; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
     50 ; CHECK-NEXT:    retq
     51 entry:
     52   %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 4) #2
     53   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
     54   ret <16 x float> %sub.i
     55 }
     56 
     57 define <16 x float> @test5(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
     58 ; CHECK-LABEL: test5:
     59 ; CHECK:       # %bb.0: # %entry
     60 ; CHECK-NEXT:    vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0
     61 ; CHECK-NEXT:    retq
     62 entry:
     63   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
     64   %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 2) #2
     65   ret <16 x float> %0
     66 }
     67 
     68 define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
     69 ; CHECK-LABEL: test6:
     70 ; CHECK:       # %bb.0: # %entry
     71 ; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
     72 ; CHECK-NEXT:    retq
     73 entry:
     74   %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 -1, i32 2) #2
     75   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
     76   ret <16 x float> %sub.i
     77 }
     78 
     79 
     80 define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
     81 ; CHECK-LABEL: test7:
     82 ; CHECK:       # %bb.0: # %entry
     83 ; CHECK-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
     84 ; CHECK-NEXT:    retq
     85 entry:
     86   %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
     87   %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
     88   ret <8 x float> %sub.i
     89 }
     90 
     91 define <8 x float> @test8(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
     92 ; CHECK-LABEL: test8:
     93 ; CHECK:       # %bb.0: # %entry
     94 ; CHECK-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
     95 ; CHECK-NEXT:    retq
     96 entry:
     97   %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
     98   %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2
     99   ret <8 x float> %0
    100 }
    101 
    102 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
    103 
    104 
    105 define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) {
    106 ; CHECK-LABEL: test9:
    107 ; CHECK:       # %bb.0: # %entry
    108 ; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
    109 ; CHECK-NEXT:    retq
    110 entry:
    111   %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2
    112   %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %0
    113   ret <8 x double> %sub.i
    114 }
    115 
    116 declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32)
    117 
    118 define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
    119 ; CHECK-LABEL: test10:
    120 ; CHECK:       # %bb.0: # %entry
    121 ; CHECK-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
    122 ; CHECK-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
    123 ; CHECK-NEXT:    retq
    124 entry:
    125   %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1, i32 4) #2
    126   %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0
    127   ret <2 x double> %sub.i
    128 }
    129 
    130 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8, i32)
    131 
    132 define <4 x float> @test11(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
    133 ; SKX-LABEL: test11:
    134 ; SKX:       # %bb.0: # %entry
    135 ; SKX-NEXT:    vxorps {{.*}}(%rip){1to4}, %xmm2, %xmm3
    136 ; SKX-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
    137 ; SKX-NEXT:    kmovd %edi, %k1
    138 ; SKX-NEXT:    vmovss %xmm0, %xmm3, %xmm3 {%k1}
    139 ; SKX-NEXT:    vmovaps %xmm3, %xmm0
    140 ; SKX-NEXT:    retq
    141 ;
    142 ; KNL-LABEL: test11:
    143 ; KNL:       # %bb.0: # %entry
    144 ; KNL-NEXT:    vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
    145 ; KNL-NEXT:    vxorps %xmm3, %xmm2, %xmm3
    146 ; KNL-NEXT:    vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
    147 ; KNL-NEXT:    kmovw %edi, %k1
    148 ; KNL-NEXT:    vmovss %xmm0, %xmm3, %xmm3 {%k1}
    149 ; KNL-NEXT:    vmovaps %xmm3, %xmm0
    150 ; KNL-NEXT:    retq
    151 entry:
    152   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    153   %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10
    154   ret <4 x float> %0
    155 }
    156 
    157 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
    158 
    159 define <4 x float> @test11b(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
    160 ; SKX-LABEL: test11b:
    161 ; SKX:       # %bb.0: # %entry
    162 ; SKX-NEXT:    kmovd %edi, %k1
    163 ; SKX-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
    164 ; SKX-NEXT:    retq
    165 ;
    166 ; KNL-LABEL: test11b:
    167 ; KNL:       # %bb.0: # %entry
    168 ; KNL-NEXT:    kmovw %edi, %k1
    169 ; KNL-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
    170 ; KNL-NEXT:    retq
    171 entry:
    172   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    173   %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 4) #10
    174   ret <4 x float> %0
    175 }
    176 
    177 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
    178 
    179 define <8 x double> @test12(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
    180 ; SKX-LABEL: test12:
    181 ; SKX:       # %bb.0: # %entry
    182 ; SKX-NEXT:    kmovd %edi, %k1
    183 ; SKX-NEXT:    vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1}
    184 ; SKX-NEXT:    vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
    185 ; SKX-NEXT:    retq
    186 ;
    187 ; KNL-LABEL: test12:
    188 ; KNL:       # %bb.0: # %entry
    189 ; KNL-NEXT:    kmovw %edi, %k1
    190 ; KNL-NEXT:    vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1}
    191 ; KNL-NEXT:    vpxorq {{.*}}(%rip){1to8}, %zmm0, %zmm0
    192 ; KNL-NEXT:    retq
    193 entry:
    194   %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i32 4) #2
    195   %bc = bitcast i8 %mask to <8 x i1>
    196   %sel = select <8 x i1> %bc, <8 x double> %0, <8 x double> %a
    197   %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %sel
    198   ret <8 x double> %sub.i
    199 }
    200 
    201 define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
    202 ; SKX-LABEL: test13:
    203 ; SKX:       # %bb.0: # %entry
    204 ; SKX-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm3
    205 ; SKX-NEXT:    vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2
    206 ; SKX-NEXT:    kmovd %edi, %k1
    207 ; SKX-NEXT:    vmovsd %xmm1, %xmm3, %xmm3 {%k1}
    208 ; SKX-NEXT:    vmovapd %xmm3, %xmm0
    209 ; SKX-NEXT:    retq
    210 ;
    211 ; KNL-LABEL: test13:
    212 ; KNL:       # %bb.0: # %entry
    213 ; KNL-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm3
    214 ; KNL-NEXT:    vfnmadd213sd {{.*#+}} xmm1 = -(xmm0 * xmm1) + xmm2
    215 ; KNL-NEXT:    kmovw %edi, %k1
    216 ; KNL-NEXT:    vmovsd %xmm1, %xmm3, %xmm3 {%k1}
    217 ; KNL-NEXT:    vmovapd %xmm3, %xmm0
    218 ; KNL-NEXT:    retq
    219 
    220 entry:
    221   %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
    222   %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
    223   ret <2 x double> %0
    224 }
    225 
    226 define <16 x float> @test14(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
    227 ; SKX-LABEL: test14:
    228 ; SKX:       # %bb.0: # %entry
    229 ; SKX-NEXT:    kmovd %edi, %k1
    230 ; SKX-NEXT:    vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    231 ; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
    232 ; SKX-NEXT:    retq
    233 ;
    234 ; KNL-LABEL: test14:
    235 ; KNL:       # %bb.0: # %entry
    236 ; KNL-NEXT:    kmovw %edi, %k1
    237 ; KNL-NEXT:    vfnmsub132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    238 ; KNL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
    239 ; KNL-NEXT:    retq
    240 entry:
    241   %0 = tail call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 2) #2
    242   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
    243   ret <16 x float> %sub.i
    244 }
    245 
    246 define <16 x float> @test15(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask)  {
    247 ; SKX-LABEL: test15:
    248 ; SKX:       # %bb.0: # %entry
    249 ; SKX-NEXT:    kmovd %edi, %k1
    250 ; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm3
    251 ; SKX-NEXT:    vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1
    252 ; SKX-NEXT:    vmovaps %zmm1, %zmm3 {%k1}
    253 ; SKX-NEXT:    vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1}
    254 ; SKX-NEXT:    vmovaps %zmm3, %zmm0
    255 ; SKX-NEXT:    retq
    256 ;
    257 ; KNL-LABEL: test15:
    258 ; KNL:       # %bb.0: # %entry
    259 ; KNL-NEXT:    kmovw %edi, %k1
    260 ; KNL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm3
    261 ; KNL-NEXT:    vfnmadd213ps {ru-sae}, %zmm2, %zmm0, %zmm1
    262 ; KNL-NEXT:    vmovaps %zmm1, %zmm3 {%k1}
    263 ; KNL-NEXT:    vfnmadd132ps {rd-sae}, %zmm0, %zmm2, %zmm3 {%k1}
    264 ; KNL-NEXT:    vmovaps %zmm3, %zmm0
    265 ; KNL-NEXT:    retq
    266 entry:
    267   %bc = bitcast i16 %mask to <16 x i1>
    268   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
    269   %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub.i, <16 x float> %b, <16 x float> %c, i32 2)
    270   %sel = select <16 x i1> %bc, <16 x float> %0, <16 x float> %sub.i
    271   %1 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sel, <16 x float> %sub.i, <16 x float> %c, i32 1)
    272   %sel2 = select <16 x i1> %bc, <16 x float> %1, <16 x float> %sel
    273   ret <16 x float> %sel2
    274 }
    275 
    276 define <16 x float> @test16(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
    277 ; SKX-LABEL: test16:
    278 ; SKX:       # %bb.0:
    279 ; SKX-NEXT:    kmovd %edi, %k1
    280 ; SKX-NEXT:    vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    281 ; SKX-NEXT:    retq
    282 ;
    283 ; KNL-LABEL: test16:
    284 ; KNL:       # %bb.0:
    285 ; KNL-NEXT:    kmovw %edi, %k1
    286 ; KNL-NEXT:    vfmsubadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    287 ; KNL-NEXT:    retq
    288   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    289   %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %sub.i, i32 1)
    290   %bc = bitcast i16 %mask to <16 x i1>
    291   %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a
    292   ret <16 x float> %sel
    293 }
    294 declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32)
    295 
    296 define <8 x double> @test17(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
    297 ; SKX-LABEL: test17:
    298 ; SKX:       # %bb.0:
    299 ; SKX-NEXT:    kmovd %edi, %k1
    300 ; SKX-NEXT:    vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1}
    301 ; SKX-NEXT:    retq
    302 ;
    303 ; KNL-LABEL: test17:
    304 ; KNL:       # %bb.0:
    305 ; KNL-NEXT:    kmovw %edi, %k1
    306 ; KNL-NEXT:    vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1}
    307 ; KNL-NEXT:    retq
    308   %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c
    309   %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %sub.i, i32 4)
    310   %bc = bitcast i8 %mask to <8 x i1>
    311   %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a
    312   ret <8 x double> %sel
    313 }
    314 declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32)
    315 
    316 define <4 x float> @test18(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
    317 ; SKX-LABEL: test18:
    318 ; SKX:       # %bb.0: # %entry
    319 ; SKX-NEXT:    kmovd %edi, %k1
    320 ; SKX-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1}
    321 ; SKX-NEXT:    retq
    322 ;
    323 ; KNL-LABEL: test18:
    324 ; KNL:       # %bb.0: # %entry
    325 ; KNL-NEXT:    kmovw %edi, %k1
    326 ; KNL-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1}
    327 ; KNL-NEXT:    retq
    328 entry:
    329   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
    330   %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10
    331   ret <4 x float> %0
    332 }
    333 
    334 define <4 x float> @test19(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
    335 ; SKX-LABEL: test19:
    336 ; SKX:       # %bb.0: # %entry
    337 ; SKX-NEXT:    kmovd %edi, %k1
    338 ; SKX-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
    339 ; SKX-NEXT:    retq
    340 ;
    341 ; KNL-LABEL: test19:
    342 ; KNL:       # %bb.0: # %entry
    343 ; KNL-NEXT:    kmovw %edi, %k1
    344 ; KNL-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1}
    345 ; KNL-NEXT:    retq
    346 entry:
    347   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
    348   %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    349   %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 4) #10
    350   ret <4 x float> %0
    351 }
    352 
    353 define <4 x float> @test20(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
    354 ; SKX-LABEL: test20:
    355 ; SKX:       # %bb.0: # %entry
    356 ; SKX-NEXT:    kmovd %edi, %k1
    357 ; SKX-NEXT:    vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1}
    358 ; SKX-NEXT:    vmovaps %xmm2, %xmm0
    359 ; SKX-NEXT:    retq
    360 ;
    361 ; KNL-LABEL: test20:
    362 ; KNL:       # %bb.0: # %entry
    363 ; KNL-NEXT:    kmovw %edi, %k1
    364 ; KNL-NEXT:    vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1}
    365 ; KNL-NEXT:    vmovaps %xmm2, %xmm0
    366 ; KNL-NEXT:    retq
    367 entry:
    368   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
    369   %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 4) #10
    370   ret <4 x float> %0
    371 }
    372 
    373 define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
    374 ; SKX-LABEL: test21:
    375 ; SKX:       # %bb.0: # %entry
    376 ; SKX-NEXT:    kmovd %edi, %k1
    377 ; SKX-NEXT:    vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
    378 ; SKX-NEXT:    retq
    379 ;
    380 ; KNL-LABEL: test21:
    381 ; KNL:       # %bb.0: # %entry
    382 ; KNL-NEXT:    kmovw %edi, %k1
    383 ; KNL-NEXT:    vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
    384 ; KNL-NEXT:    retq
    385 entry:
    386   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
    387   %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10
    388   ret <4 x float> %0
    389 }
    390 
    391 define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
    392 ; SKX-LABEL: test22:
    393 ; SKX:       # %bb.0: # %entry
    394 ; SKX-NEXT:    kmovd %edi, %k1
    395 ; SKX-NEXT:    vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
    396 ; SKX-NEXT:    retq
    397 ;
    398 ; KNL-LABEL: test22:
    399 ; KNL:       # %bb.0: # %entry
    400 ; KNL-NEXT:    kmovw %edi, %k1
    401 ; KNL-NEXT:    vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
    402 ; KNL-NEXT:    retq
    403 entry:
    404   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
    405   %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    406   %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %sub.i.2, i8 %mask, i32 8) #10
    407   ret <4 x float> %0
    408 }
    409 
    410 define <4 x float> @test23(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
    411 ; SKX-LABEL: test23:
    412 ; SKX:       # %bb.0: # %entry
    413 ; SKX-NEXT:    kmovd %edi, %k1
    414 ; SKX-NEXT:    vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
    415 ; SKX-NEXT:    vmovaps %xmm2, %xmm0
    416 ; SKX-NEXT:    retq
    417 ;
    418 ; KNL-LABEL: test23:
    419 ; KNL:       # %bb.0: # %entry
    420 ; KNL-NEXT:    kmovw %edi, %k1
    421 ; KNL-NEXT:    vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
    422 ; KNL-NEXT:    vmovaps %xmm2, %xmm0
    423 ; KNL-NEXT:    retq
    424 entry:
    425   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
    426   %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c, i8 %mask, i32 8) #10
    427   ret <4 x float> %0
    428 }
    429 
    430 define <4 x float> @test24(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 zeroext %mask) local_unnamed_addr #0 {
    431 ; SKX-LABEL: test24:
    432 ; SKX:       # %bb.0: # %entry
    433 ; SKX-NEXT:    kmovd %edi, %k1
    434 ; SKX-NEXT:    vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
    435 ; SKX-NEXT:    retq
    436 ;
    437 ; KNL-LABEL: test24:
    438 ; KNL:       # %bb.0: # %entry
    439 ; KNL-NEXT:    kmovw %edi, %k1
    440 ; KNL-NEXT:    vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
    441 ; KNL-NEXT:    retq
    442 entry:
    443   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    444   %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i, i8 %mask, i32 8) #10
    445   ret <4 x float> %0
    446 }
    447 
    448 define <16 x float> @test25(<16 x float> %a, <16 x float> %b, <16 x float> %c)  {
    449 ; CHECK-LABEL: test25:
    450 ; CHECK:       # %bb.0: # %entry
    451 ; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    452 ; CHECK-NEXT:    retq
    453 entry:
    454   %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
    455   %sub.i.2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
    456   %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %sub.i, <16 x float> %sub.i.2, i32 8) #2
    457   ret <16 x float> %0
    458 }
    459