Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
      3 
      4 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
      5 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
      6 
      7 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
      8 ; CHECK-LABEL: test_x86_vfnmadd_ps_z:
      9 ; CHECK:       ## BB#0:
     10 ; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
     11 ; CHECK-NEXT:    retq
     12   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
     13   ret <16 x float> %res
     14 }
     15 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
     16 
     17 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
     18 ; CHECK-LABEL: test_mask_vfnmadd_ps:
     19 ; CHECK:       ## BB#0:
     20 ; CHECK-NEXT:    kmovw %edi, %k1
     21 ; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0 {%k1}
     22 ; CHECK-NEXT:    retq
     23   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
     24   ret <16 x float> %res
     25 }
     26 
     27 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     28 ; CHECK-LABEL: test_x86_vfnmadd_pd_z:
     29 ; CHECK:       ## BB#0:
     30 ; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
     31 ; CHECK-NEXT:    retq
     32   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
     33   ret <8 x double> %res
     34 }
     35 declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
     36 
     37 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
     38 ; CHECK-LABEL: test_mask_vfnmadd_pd:
     39 ; CHECK:       ## BB#0:
     40 ; CHECK-NEXT:    kmovw %edi, %k1
     41 ; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0 {%k1}
     42 ; CHECK-NEXT:    retq
     43   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
     44   ret <8 x double> %res
     45 }
     46 
     47 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     48 ; CHECK-LABEL: test_x86_vfnmsubps_z:
     49 ; CHECK:       ## BB#0:
     50 ; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
     51 ; CHECK-NEXT:    retq
     52   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
     53   ret <16 x float> %res
     54 }
     55 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
     56 
     57 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
     58 ; CHECK-LABEL: test_mask_vfnmsub_ps:
     59 ; CHECK:       ## BB#0:
     60 ; CHECK-NEXT:    kmovw %edi, %k1
     61 ; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0 {%k1}
     62 ; CHECK-NEXT:    retq
     63   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
     64   ret <16 x float> %res
     65 }
     66 
     67 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     68 ; CHECK-LABEL: test_x86_vfnmsubpd_z:
     69 ; CHECK:       ## BB#0:
     70 ; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
     71 ; CHECK-NEXT:    retq
     72   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
     73   ret <8 x double> %res
     74 }
     75 declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
     76 
     77 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
     78 ; CHECK-LABEL: test_mask_vfnmsub_pd:
     79 ; CHECK:       ## BB#0:
     80 ; CHECK-NEXT:    kmovw %edi, %k1
     81 ; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1}
     82 ; CHECK-NEXT:    retq
     83   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
     84   ret <8 x double> %res
     85 }
     86 
     87 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     88 ; CHECK-LABEL: test_x86_vfmaddsubps_z:
     89 ; CHECK:       ## BB#0:
     90 ; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm0
     91 ; CHECK-NEXT:    retq
     92   %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
     93   ret <16 x float> %res
     94 }
     95 
     96 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
     97 ; CHECK-LABEL: test_mask_fmaddsub_ps:
     98 ; CHECK:       ## BB#0:
     99 ; CHECK-NEXT:    kmovw %edi, %k1
    100 ; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1}
    101 ; CHECK-NEXT:    retq
    102   %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
    103   ret <16 x float> %res
    104 }
    105 
    106 declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
    107 
    108 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    109 ; CHECK-LABEL: test_x86_vfmaddsubpd_z:
    110 ; CHECK:       ## BB#0:
    111 ; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm0
    112 ; CHECK-NEXT:    retq
    113   %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
    114   ret <8 x double> %res
    115 }
    116 declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
    117 
    118 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    119 ; CHECK-LABEL: test_mask_vfmaddsub_pd:
    120 ; CHECK:       ## BB#0:
    121 ; CHECK-NEXT:    kmovw %edi, %k1
    122 ; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1}
    123 ; CHECK-NEXT:    retq
    124   %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
    125   ret <8 x double> %res
    126 }
    127 
    128 define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    129 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
    130 ; CHECK:       ## BB#0:
    131 ; CHECK-NEXT:    kmovw %edi, %k1
    132 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    133 ; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1}
    134 ; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    135 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    136 ; CHECK-NEXT:    retq
    137   %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    138   %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    139   %res2 = fadd <8 x double> %res, %res1
    140   ret <8 x double> %res2
    141 }
    142 
    143 declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    144 
    145 define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    146 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
    147 ; CHECK:       ## BB#0:
    148 ; CHECK-NEXT:    kmovw %edi, %k1
    149 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    150 ; CHECK-NEXT:    vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1}
    151 ; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    152 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    153 ; CHECK-NEXT:    retq
    154   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    155   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    156   %res2 = fadd <8 x double> %res, %res1
    157   ret <8 x double> %res2
    158 }
    159 
    160 declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    161 
    162 define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    163 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
    164 ; CHECK:       ## BB#0:
    165 ; CHECK-NEXT:    kmovw %edi, %k1
    166 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    167 ; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
    168 ; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    169 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    170 ; CHECK-NEXT:    retq
    171   %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    172   %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    173   %res2 = fadd <8 x double> %res, %res1
    174   ret <8 x double> %res2
    175 }
    176 
    177 define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    178 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
    179 ; CHECK:       ## BB#0:
    180 ; CHECK-NEXT:    kmovw %edi, %k1
    181 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    182 ; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1}
    183 ; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    184 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    185 ; CHECK-NEXT:    retq
    186   %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    187   %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    188   %res2 = fadd <16 x float> %res, %res1
    189   ret <16 x float> %res2
    190 }
    191 
    192 declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    193 
    194 define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    195 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
    196 ; CHECK:       ## BB#0:
    197 ; CHECK-NEXT:    kmovw %edi, %k1
    198 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    199 ; CHECK-NEXT:    vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1}
    200 ; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    201 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    202 ; CHECK-NEXT:    retq
    203   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    204   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    205   %res2 = fadd <16 x float> %res, %res1
    206   ret <16 x float> %res2
    207 }
    208 
    209 declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    210 
    211 define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    212 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
    213 ; CHECK:       ## BB#0:
    214 ; CHECK-NEXT:    kmovw %edi, %k1
    215 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    216 ; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
    217 ; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    218 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    219 ; CHECK-NEXT:    retq
    220   %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    221   %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    222   %res2 = fadd <16 x float> %res, %res1
    223   ret <16 x float> %res2
    224 }
    225 
    226 declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    227 
    228 define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    229 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
    230 ; CHECK:       ## BB#0:
    231 ; CHECK-NEXT:    kmovw %edi, %k1
    232 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    233 ; CHECK-NEXT:    vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1}
    234 ; CHECK-NEXT:    vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    235 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    236 ; CHECK-NEXT:    retq
    237   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    238   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    239   %res2 = fadd <8 x double> %res, %res1
    240   ret <8 x double> %res2
    241 }
    242 
    243 declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    244 
    245 define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    246 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
    247 ; CHECK:       ## BB#0:
    248 ; CHECK-NEXT:    kmovw %edi, %k1
    249 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    250 ; CHECK-NEXT:    vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1}
    251 ; CHECK-NEXT:    vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    252 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    253 ; CHECK-NEXT:    retq
    254   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    255   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    256   %res2 = fadd <16 x float> %res, %res1
    257   ret <16 x float> %res2
    258 }
    259 
    260 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    261 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne:
    262 ; CHECK:       ## BB#0:
    263 ; CHECK-NEXT:    kmovw %edi, %k1
    264 ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    265 ; CHECK-NEXT:    retq
    266   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
    267   ret <16 x float> %res
    268 }
    269 
    270 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    271 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn:
    272 ; CHECK:       ## BB#0:
    273 ; CHECK-NEXT:    kmovw %edi, %k1
    274 ; CHECK-NEXT:    vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    275 ; CHECK-NEXT:    retq
    276   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
    277   ret <16 x float> %res
    278 }
    279 
    280 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    281 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp:
    282 ; CHECK:       ## BB#0:
    283 ; CHECK-NEXT:    kmovw %edi, %k1
    284 ; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    285 ; CHECK-NEXT:    retq
    286   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
    287   ret <16 x float> %res
    288 }
    289 
    290 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    291 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz:
    292 ; CHECK:       ## BB#0:
    293 ; CHECK-NEXT:    kmovw %edi, %k1
    294 ; CHECK-NEXT:    vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    295 ; CHECK-NEXT:    retq
    296   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
    297   ret <16 x float> %res
    298 }
    299 
    300 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    301 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current:
    302 ; CHECK:       ## BB#0:
    303 ; CHECK-NEXT:    kmovw %edi, %k1
    304 ; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1}
    305 ; CHECK-NEXT:    retq
    306   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
    307   ret <16 x float> %res
    308 }
    309 
    310 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    311 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne:
    312 ; CHECK:       ## BB#0:
    313 ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    314 ; CHECK-NEXT:    retq
    315   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
    316   ret <16 x float> %res
    317 }
    318 
    319 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    320 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn:
    321 ; CHECK:       ## BB#0:
    322 ; CHECK-NEXT:    vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0
    323 ; CHECK-NEXT:    retq
    324   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
    325   ret <16 x float> %res
    326 }
    327 
    328 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    329 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp:
    330 ; CHECK:       ## BB#0:
    331 ; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
    332 ; CHECK-NEXT:    retq
    333   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
    334   ret <16 x float> %res
    335 }
    336 
    337 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    338 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz:
    339 ; CHECK:       ## BB#0:
    340 ; CHECK-NEXT:    vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0
    341 ; CHECK-NEXT:    retq
    342   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
    343   ret <16 x float> %res
    344 }
    345 
    346 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    347 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current:
    348 ; CHECK:       ## BB#0:
    349 ; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
    350 ; CHECK-NEXT:    retq
    351   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
    352   ret <16 x float> %res
    353 }
    354 
    355 declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    356 
    357 define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    358 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
    359 ; CHECK:       ## BB#0:
    360 ; CHECK-NEXT:    kmovw %edi, %k1
    361 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    362 ; CHECK-NEXT:    vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
    363 ; CHECK-NEXT:    vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    364 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    365 ; CHECK-NEXT:    retq
    366   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    367   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    368   %res2 = fadd <8 x double> %res, %res1
    369   ret <8 x double> %res2
    370 }
    371 
    372 declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    373 
    374 define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    375 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
    376 ; CHECK:       ## BB#0:
    377 ; CHECK-NEXT:    kmovw %edi, %k1
    378 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    379 ; CHECK-NEXT:    vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
    380 ; CHECK-NEXT:    vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    381 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    382 ; CHECK-NEXT:    retq
    383   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    384   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    385   %res2 = fadd <16 x float> %res, %res1
    386   ret <16 x float> %res2
    387 }
    388 
    389 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    390 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne:
    391 ; CHECK:       ## BB#0:
    392 ; CHECK-NEXT:    kmovw %edi, %k1
    393 ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    394 ; CHECK-NEXT:    retq
    395   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
    396   ret <8 x double> %res
    397 }
    398 
    399 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    400 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn:
    401 ; CHECK:       ## BB#0:
    402 ; CHECK-NEXT:    kmovw %edi, %k1
    403 ; CHECK-NEXT:    vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    404 ; CHECK-NEXT:    retq
    405   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
    406   ret <8 x double> %res
    407 }
    408 
    409 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    410 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp:
    411 ; CHECK:       ## BB#0:
    412 ; CHECK-NEXT:    kmovw %edi, %k1
    413 ; CHECK-NEXT:    vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    414 ; CHECK-NEXT:    retq
    415   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
    416   ret <8 x double> %res
    417 }
    418 
    419 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    420 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz:
    421 ; CHECK:       ## BB#0:
    422 ; CHECK-NEXT:    kmovw %edi, %k1
    423 ; CHECK-NEXT:    vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    424 ; CHECK-NEXT:    retq
    425   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
    426   ret <8 x double> %res
    427 }
    428 
    429 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    430 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current:
    431 ; CHECK:       ## BB#0:
    432 ; CHECK-NEXT:    kmovw %edi, %k1
    433 ; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1}
    434 ; CHECK-NEXT:    retq
    435   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
    436   ret <8 x double> %res
    437 }
    438 
    439 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    440 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne:
    441 ; CHECK:       ## BB#0:
    442 ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    443 ; CHECK-NEXT:    retq
    444   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
    445   ret <8 x double> %res
    446 }
    447 
    448 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    449 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn:
    450 ; CHECK:       ## BB#0:
    451 ; CHECK-NEXT:    vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0
    452 ; CHECK-NEXT:    retq
    453   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
    454   ret <8 x double> %res
    455 }
    456 
    457 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    458 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp:
    459 ; CHECK:       ## BB#0:
    460 ; CHECK-NEXT:    vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0
    461 ; CHECK-NEXT:    retq
    462   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
    463   ret <8 x double> %res
    464 }
    465 
    466 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    467 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz:
    468 ; CHECK:       ## BB#0:
    469 ; CHECK-NEXT:    vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0
    470 ; CHECK-NEXT:    retq
    471   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
    472   ret <8 x double> %res
    473 }
    474 
    475 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    476 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current:
    477 ; CHECK:       ## BB#0:
    478 ; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
    479 ; CHECK-NEXT:    retq
    480   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
    481   ret <8 x double> %res
    482 }
    483 
    484 define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    485 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
    486 ; CHECK:       ## BB#0:
    487 ; CHECK-NEXT:    kmovw %edi, %k1
    488 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    489 ; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
    490 ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    491 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    492 ; CHECK-NEXT:    retq
    493   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    494   %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    495   %res2 = fadd <8 x double> %res, %res1
    496   ret <8 x double> %res2
    497 }
    498 
    499 declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    500 
    501 define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    502 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
    503 ; CHECK:       ## BB#0:
    504 ; CHECK-NEXT:    kmovw %edi, %k1
    505 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    506 ; CHECK-NEXT:    vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1}
    507 ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    508 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    509 ; CHECK-NEXT:    retq
    510   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    511   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    512   %res2 = fadd <8 x double> %res, %res1
    513   ret <8 x double> %res2
    514 }
    515 
    516 declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    517 
    518 define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    519 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
    520 ; CHECK:       ## BB#0:
    521 ; CHECK-NEXT:    kmovw %edi, %k1
    522 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    523 ; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
    524 ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    525 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    526 ; CHECK-NEXT:    retq
    527   %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    528   %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    529   %res2 = fadd <8 x double> %res, %res1
    530   ret <8 x double> %res2
    531 }
    532 
    533 define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    534 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
    535 ; CHECK:       ## BB#0:
    536 ; CHECK-NEXT:    kmovw %edi, %k1
    537 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    538 ; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
    539 ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    540 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    541 ; CHECK-NEXT:    retq
    542   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    543   %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    544   %res2 = fadd <16 x float> %res, %res1
    545   ret <16 x float> %res2
    546 }
    547 
    548 declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    549 
    550 define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    551 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
    552 ; CHECK:       ## BB#0:
    553 ; CHECK-NEXT:    kmovw %edi, %k1
    554 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    555 ; CHECK-NEXT:    vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1}
    556 ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    557 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    558 ; CHECK-NEXT:    retq
    559   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    560   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    561   %res2 = fadd <16 x float> %res, %res1
    562   ret <16 x float> %res2
    563 }
    564 
    565 declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    566 
    567 define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    568 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
    569 ; CHECK:       ## BB#0:
    570 ; CHECK-NEXT:    kmovw %edi, %k1
    571 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    572 ; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
    573 ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    574 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    575 ; CHECK-NEXT:    retq
    576   %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    577   %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    578   %res2 = fadd <16 x float> %res, %res1
    579   ret <16 x float> %res2
    580 }
    581 
    582 
    583 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    584 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
    585 ; CHECK:       ## BB#0:
    586 ; CHECK-NEXT:    kmovw %edi, %k1
    587 ; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    588 ; CHECK-NEXT:    retq
    589   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
    590   ret <8 x double> %res
    591 }
    592 
    593 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    594 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn:
    595 ; CHECK:       ## BB#0:
    596 ; CHECK-NEXT:    kmovw %edi, %k1
    597 ; CHECK-NEXT:    vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    598 ; CHECK-NEXT:    retq
    599   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
    600   ret <8 x double> %res
    601 }
    602 
    603 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    604 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp:
    605 ; CHECK:       ## BB#0:
    606 ; CHECK-NEXT:    kmovw %edi, %k1
    607 ; CHECK-NEXT:    vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    608 ; CHECK-NEXT:    retq
    609   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
    610   ret <8 x double> %res
    611 }
    612 
    613 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    614 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz:
    615 ; CHECK:       ## BB#0:
    616 ; CHECK-NEXT:    kmovw %edi, %k1
    617 ; CHECK-NEXT:    vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1}
    618 ; CHECK-NEXT:    retq
    619   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
    620   ret <8 x double> %res
    621 }
    622 
    623 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    624 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current:
    625 ; CHECK:       ## BB#0:
    626 ; CHECK-NEXT:    kmovw %edi, %k1
    627 ; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1}
    628 ; CHECK-NEXT:    retq
    629   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
    630   ret <8 x double> %res
    631 }
    632 
    633 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    634 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne:
    635 ; CHECK:       ## BB#0:
    636 ; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    637 ; CHECK-NEXT:    retq
    638   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
    639   ret <8 x double> %res
    640 }
    641 
    642 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    643 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn:
    644 ; CHECK:       ## BB#0:
    645 ; CHECK-NEXT:    vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0
    646 ; CHECK-NEXT:    retq
    647   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
    648   ret <8 x double> %res
    649 }
    650 
    651 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    652 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp:
    653 ; CHECK:       ## BB#0:
    654 ; CHECK-NEXT:    vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0
    655 ; CHECK-NEXT:    retq
    656   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
    657   ret <8 x double> %res
    658 }
    659 
    660 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    661 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz:
    662 ; CHECK:       ## BB#0:
    663 ; CHECK-NEXT:    vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0
    664 ; CHECK-NEXT:    retq
    665   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
    666   ret <8 x double> %res
    667 }
    668 
    669 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    670 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current:
    671 ; CHECK:       ## BB#0:
    672 ; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
    673 ; CHECK-NEXT:    retq
    674   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
    675   ret <8 x double> %res
    676 }
    677 
    678 define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    679 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
    680 ; CHECK:       ## BB#0:
    681 ; CHECK-NEXT:    kmovw %edi, %k1
    682 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    683 ; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1}
    684 ; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    685 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    686 ; CHECK-NEXT:    retq
    687   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    688   %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    689   %res2 = fadd <8 x double> %res, %res1
    690   ret <8 x double> %res2
    691 }
    692 
    693 declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    694 
    695 define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    696 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
    697 ; CHECK:       ## BB#0:
    698 ; CHECK-NEXT:    kmovw %edi, %k1
    699 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    700 ; CHECK-NEXT:    vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
    701 ; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    702 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    703 ; CHECK-NEXT:    retq
    704   %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    705   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    706   %res2 = fadd <8 x double> %res, %res1
    707   ret <8 x double> %res2
    708 }
    709 
    710 define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    711 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
    712 ; CHECK:       ## BB#0:
    713 ; CHECK-NEXT:    kmovw %edi, %k1
    714 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    715 ; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1}
    716 ; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    717 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    718 ; CHECK-NEXT:    retq
    719   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    720   %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    721   %res2 = fadd <16 x float> %res, %res1
    722   ret <16 x float> %res2
    723 }
    724 
    725 declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    726 
    727 define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    728 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
    729 ; CHECK:       ## BB#0:
    730 ; CHECK-NEXT:    kmovw %edi, %k1
    731 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    732 ; CHECK-NEXT:    vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
    733 ; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    734 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    735 ; CHECK-NEXT:    retq
    736   %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    737   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    738   %res2 = fadd <16 x float> %res, %res1
    739   ret <16 x float> %res2
    740 }
    741 
    742 define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    743 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
    744 ; CHECK:       ## BB#0:
    745 ; CHECK-NEXT:    kmovw %edi, %k1
    746 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    747 ; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
    748 ; CHECK-NEXT:    vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    749 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    750 ; CHECK-NEXT:    retq
    751   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    752   %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    753   %res2 = fadd <8 x double> %res, %res1
    754   ret <8 x double> %res2
    755 }
    756 
    757 define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    758 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
    759 ; CHECK:       ## BB#0:
    760 ; CHECK-NEXT:    kmovw %edi, %k1
    761 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    762 ; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
    763 ; CHECK-NEXT:    vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    764 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    765 ; CHECK-NEXT:    retq
    766   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    767   %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    768   %res2 = fadd <16 x float> %res, %res1
    769   ret <16 x float> %res2
    770 }
    771