Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s
      3 
      4 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
      5 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
      6 
      7 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
      8 ; CHECK-LABEL: test_x86_vfnmadd_ps_z:
      9 ; CHECK:       ## %bb.0:
     10 ; CHECK-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
     11 ; CHECK-NEXT:    retq
     12   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
     13   ret <16 x float> %res
     14 }
     15 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
     16 
     17 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
     18 ; CHECK-LABEL: test_mask_vfnmadd_ps:
     19 ; CHECK:       ## %bb.0:
     20 ; CHECK-NEXT:    kmovw %edi, %k1
     21 ; CHECK-NEXT:    vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
     22 ; CHECK-NEXT:    retq
     23   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
     24   ret <16 x float> %res
     25 }
     26 
     27 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     28 ; CHECK-LABEL: test_x86_vfnmadd_pd_z:
     29 ; CHECK:       ## %bb.0:
     30 ; CHECK-NEXT:    vfnmadd213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
     31 ; CHECK-NEXT:    retq
     32   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
     33   ret <8 x double> %res
     34 }
     35 declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
     36 
     37 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
     38 ; CHECK-LABEL: test_mask_vfnmadd_pd:
     39 ; CHECK:       ## %bb.0:
     40 ; CHECK-NEXT:    kmovw %edi, %k1
     41 ; CHECK-NEXT:    vfnmadd132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
     42 ; CHECK-NEXT:    retq
     43   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
     44   ret <8 x double> %res
     45 }
     46 
     47 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     48 ; CHECK-LABEL: test_x86_vfnmsubps_z:
     49 ; CHECK:       ## %bb.0:
     50 ; CHECK-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
     51 ; CHECK-NEXT:    retq
     52   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
     53   ret <16 x float> %res
     54 }
     55 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
     56 
     57 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
     58 ; CHECK-LABEL: test_mask_vfnmsub_ps:
     59 ; CHECK:       ## %bb.0:
     60 ; CHECK-NEXT:    kmovw %edi, %k1
     61 ; CHECK-NEXT:    vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
     62 ; CHECK-NEXT:    retq
     63   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
     64   ret <16 x float> %res
     65 }
     66 
     67 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     68 ; CHECK-LABEL: test_x86_vfnmsubpd_z:
     69 ; CHECK:       ## %bb.0:
     70 ; CHECK-NEXT:    vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
     71 ; CHECK-NEXT:    retq
     72   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
     73   ret <8 x double> %res
     74 }
     75 declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
     76 
     77 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
     78 ; CHECK-LABEL: test_mask_vfnmsub_pd:
     79 ; CHECK:       ## %bb.0:
     80 ; CHECK-NEXT:    kmovw %edi, %k1
     81 ; CHECK-NEXT:    vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
     82 ; CHECK-NEXT:    retq
     83   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
     84   ret <8 x double> %res
     85 }
     86 
     87 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     88 ; CHECK-LABEL: test_x86_vfmaddsubps_z:
     89 ; CHECK:       ## %bb.0:
     90 ; CHECK-NEXT:    vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
     91 ; CHECK-NEXT:    retq
     92   %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
     93   ret <16 x float> %res
     94 }
     95 
     96 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
     97 ; CHECK-LABEL: test_mask_fmaddsub_ps:
     98 ; CHECK:       ## %bb.0:
     99 ; CHECK-NEXT:    kmovw %edi, %k1
    100 ; CHECK-NEXT:    vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
    101 ; CHECK-NEXT:    retq
    102   %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
    103   ret <16 x float> %res
    104 }
    105 
    106 declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
    107 
    108 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    109 ; CHECK-LABEL: test_x86_vfmaddsubpd_z:
    110 ; CHECK:       ## %bb.0:
    111 ; CHECK-NEXT:    vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
    112 ; CHECK-NEXT:    retq
    113   %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
    114   ret <8 x double> %res
    115 }
    116 declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
    117 
    118 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    119 ; CHECK-LABEL: test_mask_vfmaddsub_pd:
    120 ; CHECK:       ## %bb.0:
    121 ; CHECK-NEXT:    kmovw %edi, %k1
    122 ; CHECK-NEXT:    vfmaddsub132pd {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
    123 ; CHECK-NEXT:    retq
    124   %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
    125   ret <8 x double> %res
    126 }
    127 
    128 define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    129 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
    130 ; CHECK:       ## %bb.0:
    131 ; CHECK-NEXT:    kmovw %edi, %k1
    132 ; CHECK-NEXT:    vfmaddsub132pd {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
    133 ; CHECK-NEXT:    retq
    134   %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    135   ret <8 x double> %res
    136 }
    137 
    138 declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    139 
    140 define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    141 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
    142 ; CHECK:       ## %bb.0:
    143 ; CHECK-NEXT:    kmovw %edi, %k1
    144 ; CHECK-NEXT:    vfmaddsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2
    145 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
    146 ; CHECK-NEXT:    retq
    147   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    148   ret <8 x double> %res
    149 }
    150 
    151 declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    152 
    153 define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    154 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
    155 ; CHECK:       ## %bb.0:
    156 ; CHECK-NEXT:    kmovw %edi, %k1
    157 ; CHECK-NEXT:    vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
    158 ; CHECK-NEXT:    retq
    159   %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    160   ret <8 x double> %res
    161 }
    162 
    163 define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    164 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
    165 ; CHECK:       ## %bb.0:
    166 ; CHECK-NEXT:    kmovw %edi, %k1
    167 ; CHECK-NEXT:    vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
    168 ; CHECK-NEXT:    retq
    169   %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    170   ret <16 x float> %res
    171 }
    172 
    173 declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    174 
    175 define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    176 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
    177 ; CHECK:       ## %bb.0:
    178 ; CHECK-NEXT:    kmovw %edi, %k1
    179 ; CHECK-NEXT:    vfmaddsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2
    180 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
    181 ; CHECK-NEXT:    retq
    182   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    183   ret <16 x float> %res
    184 }
    185 
    186 declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    187 
    188 define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    189 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
    190 ; CHECK:       ## %bb.0:
    191 ; CHECK-NEXT:    kmovw %edi, %k1
    192 ; CHECK-NEXT:    vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
    193 ; CHECK-NEXT:    retq
    194   %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    195   ret <16 x float> %res
    196 }
    197 
    198 declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    199 
    200 define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    201 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
    202 ; CHECK:       ## %bb.0:
    203 ; CHECK-NEXT:    kmovw %edi, %k1
    204 ; CHECK-NEXT:    vfmsubadd231pd {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2
    205 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
    206 ; CHECK-NEXT:    retq
    207   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    208   ret <8 x double> %res
    209 }
    210 
    211 declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    212 
    213 define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    214 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
    215 ; CHECK:       ## %bb.0:
    216 ; CHECK-NEXT:    kmovw %edi, %k1
    217 ; CHECK-NEXT:    vfmsubadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2
    218 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
    219 ; CHECK-NEXT:    retq
    220   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    221   ret <16 x float> %res
    222 }
    223 
    224 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    225 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne:
    226 ; CHECK:       ## %bb.0:
    227 ; CHECK-NEXT:    kmovw %edi, %k1
    228 ; CHECK-NEXT:    vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    229 ; CHECK-NEXT:    retq
    230   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
    231   ret <16 x float> %res
    232 }
    233 
    234 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    235 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn:
    236 ; CHECK:       ## %bb.0:
    237 ; CHECK-NEXT:    kmovw %edi, %k1
    238 ; CHECK-NEXT:    vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    239 ; CHECK-NEXT:    retq
    240   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
    241   ret <16 x float> %res
    242 }
    243 
    244 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    245 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp:
    246 ; CHECK:       ## %bb.0:
    247 ; CHECK-NEXT:    kmovw %edi, %k1
    248 ; CHECK-NEXT:    vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    249 ; CHECK-NEXT:    retq
    250   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
    251   ret <16 x float> %res
    252 }
    253 
    254 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    255 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz:
    256 ; CHECK:       ## %bb.0:
    257 ; CHECK-NEXT:    kmovw %edi, %k1
    258 ; CHECK-NEXT:    vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    259 ; CHECK-NEXT:    retq
    260   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
    261   ret <16 x float> %res
    262 }
    263 
    264 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    265 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current:
    266 ; CHECK:       ## %bb.0:
    267 ; CHECK-NEXT:    kmovw %edi, %k1
    268 ; CHECK-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
    269 ; CHECK-NEXT:    retq
    270   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
    271   ret <16 x float> %res
    272 }
    273 
    274 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    275 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne:
    276 ; CHECK:       ## %bb.0:
    277 ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    278 ; CHECK-NEXT:    retq
    279   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
    280   ret <16 x float> %res
    281 }
    282 
    283 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    284 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn:
    285 ; CHECK:       ## %bb.0:
    286 ; CHECK-NEXT:    vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0
    287 ; CHECK-NEXT:    retq
    288   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
    289   ret <16 x float> %res
    290 }
    291 
    292 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    293 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp:
    294 ; CHECK:       ## %bb.0:
    295 ; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
    296 ; CHECK-NEXT:    retq
    297   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
    298   ret <16 x float> %res
    299 }
    300 
    301 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    302 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz:
    303 ; CHECK:       ## %bb.0:
    304 ; CHECK-NEXT:    vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0
    305 ; CHECK-NEXT:    retq
    306   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
    307   ret <16 x float> %res
    308 }
    309 
    310 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    311 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current:
    312 ; CHECK:       ## %bb.0:
    313 ; CHECK-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
    314 ; CHECK-NEXT:    retq
    315   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
    316   ret <16 x float> %res
    317 }
    318 
    319 declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    320 
    321 define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    322 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
    323 ; CHECK:       ## %bb.0:
    324 ; CHECK-NEXT:    kmovw %edi, %k1
    325 ; CHECK-NEXT:    vfmsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2
    326 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
    327 ; CHECK-NEXT:    retq
    328   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    329   ret <8 x double> %res
    330 }
    331 
    332 declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    333 
    334 define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    335 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
    336 ; CHECK:       ## %bb.0:
    337 ; CHECK-NEXT:    kmovw %edi, %k1
    338 ; CHECK-NEXT:    vfmsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2
    339 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
    340 ; CHECK-NEXT:    retq
    341   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    342   ret <16 x float> %res
    343 }
    344 
    345 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    346 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne:
    347 ; CHECK:       ## %bb.0:
    348 ; CHECK-NEXT:    kmovw %edi, %k1
    349 ; CHECK-NEXT:    vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    350 ; CHECK-NEXT:    retq
    351   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
    352   ret <8 x double> %res
    353 }
    354 
    355 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    356 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn:
    357 ; CHECK:       ## %bb.0:
    358 ; CHECK-NEXT:    kmovw %edi, %k1
    359 ; CHECK-NEXT:    vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    360 ; CHECK-NEXT:    retq
    361   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
    362   ret <8 x double> %res
    363 }
    364 
    365 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    366 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp:
    367 ; CHECK:       ## %bb.0:
    368 ; CHECK-NEXT:    kmovw %edi, %k1
    369 ; CHECK-NEXT:    vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    370 ; CHECK-NEXT:    retq
    371   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
    372   ret <8 x double> %res
    373 }
    374 
    375 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    376 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz:
    377 ; CHECK:       ## %bb.0:
    378 ; CHECK-NEXT:    kmovw %edi, %k1
    379 ; CHECK-NEXT:    vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    380 ; CHECK-NEXT:    retq
    381   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
    382   ret <8 x double> %res
    383 }
    384 
    385 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    386 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current:
    387 ; CHECK:       ## %bb.0:
    388 ; CHECK-NEXT:    kmovw %edi, %k1
    389 ; CHECK-NEXT:    vfmadd132pd {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
    390 ; CHECK-NEXT:    retq
    391   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
    392   ret <8 x double> %res
    393 }
    394 
    395 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    396 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne:
    397 ; CHECK:       ## %bb.0:
    398 ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    399 ; CHECK-NEXT:    retq
    400   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
    401   ret <8 x double> %res
    402 }
    403 
    404 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    405 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn:
    406 ; CHECK:       ## %bb.0:
    407 ; CHECK-NEXT:    vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0
    408 ; CHECK-NEXT:    retq
    409   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
    410   ret <8 x double> %res
    411 }
    412 
    413 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    414 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp:
    415 ; CHECK:       ## %bb.0:
    416 ; CHECK-NEXT:    vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0
    417 ; CHECK-NEXT:    retq
    418   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
    419   ret <8 x double> %res
    420 }
    421 
    422 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    423 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz:
    424 ; CHECK:       ## %bb.0:
    425 ; CHECK-NEXT:    vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0
    426 ; CHECK-NEXT:    retq
    427   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
    428   ret <8 x double> %res
    429 }
    430 
    431 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    432 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current:
    433 ; CHECK:       ## %bb.0:
    434 ; CHECK-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
    435 ; CHECK-NEXT:    retq
    436   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
    437   ret <8 x double> %res
    438 }
    439 
    440 define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    441 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
    442 ; CHECK:       ## %bb.0:
    443 ; CHECK-NEXT:    kmovw %edi, %k1
    444 ; CHECK-NEXT:    vfmadd132pd {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
    445 ; CHECK-NEXT:    retq
    446   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    447   ret <8 x double> %res
    448 }
    449 
    450 declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    451 
    452 define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    453 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
    454 ; CHECK:       ## %bb.0:
    455 ; CHECK-NEXT:    kmovw %edi, %k1
    456 ; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2
    457 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
    458 ; CHECK-NEXT:    retq
    459   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    460   ret <8 x double> %res
    461 }
    462 
    463 declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    464 
    465 define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    466 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
    467 ; CHECK:       ## %bb.0:
    468 ; CHECK-NEXT:    kmovw %edi, %k1
    469 ; CHECK-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
    470 ; CHECK-NEXT:    retq
    471   %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    472   ret <8 x double> %res
    473 }
    474 
    475 define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    476 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
    477 ; CHECK:       ## %bb.0:
    478 ; CHECK-NEXT:    kmovw %edi, %k1
    479 ; CHECK-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
    480 ; CHECK-NEXT:    retq
    481   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    482   ret <16 x float> %res
    483 }
    484 
    485 declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    486 
    487 define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    488 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
    489 ; CHECK:       ## %bb.0:
    490 ; CHECK-NEXT:    kmovw %edi, %k1
    491 ; CHECK-NEXT:    vfmadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2
    492 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
    493 ; CHECK-NEXT:    retq
    494   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    495   ret <16 x float> %res
    496 }
    497 
    498 declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    499 
    500 define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    501 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
    502 ; CHECK:       ## %bb.0:
    503 ; CHECK-NEXT:    kmovw %edi, %k1
    504 ; CHECK-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
    505 ; CHECK-NEXT:    retq
    506   %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    507   ret <16 x float> %res
    508 }
    509 
    510 
    511 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    512 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
    513 ; CHECK:       ## %bb.0:
    514 ; CHECK-NEXT:    kmovw %edi, %k1
    515 ; CHECK-NEXT:    vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    516 ; CHECK-NEXT:    retq
    517   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
    518   ret <8 x double> %res
    519 }
    520 
    521 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    522 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn:
    523 ; CHECK:       ## %bb.0:
    524 ; CHECK-NEXT:    kmovw %edi, %k1
    525 ; CHECK-NEXT:    vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    526 ; CHECK-NEXT:    retq
    527   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
    528   ret <8 x double> %res
    529 }
    530 
    531 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    532 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp:
    533 ; CHECK:       ## %bb.0:
    534 ; CHECK-NEXT:    kmovw %edi, %k1
    535 ; CHECK-NEXT:    vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    536 ; CHECK-NEXT:    retq
    537   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
    538   ret <8 x double> %res
    539 }
    540 
    541 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    542 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz:
    543 ; CHECK:       ## %bb.0:
    544 ; CHECK-NEXT:    kmovw %edi, %k1
    545 ; CHECK-NEXT:    vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1}
    546 ; CHECK-NEXT:    retq
    547   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
    548   ret <8 x double> %res
    549 }
    550 
    551 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    552 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current:
    553 ; CHECK:       ## %bb.0:
    554 ; CHECK-NEXT:    kmovw %edi, %k1
    555 ; CHECK-NEXT:    vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
    556 ; CHECK-NEXT:    retq
    557   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
    558   ret <8 x double> %res
    559 }
    560 
    561 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    562 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne:
    563 ; CHECK:       ## %bb.0:
    564 ; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    565 ; CHECK-NEXT:    retq
    566   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
    567   ret <8 x double> %res
    568 }
    569 
    570 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    571 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn:
    572 ; CHECK:       ## %bb.0:
    573 ; CHECK-NEXT:    vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0
    574 ; CHECK-NEXT:    retq
    575   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
    576   ret <8 x double> %res
    577 }
    578 
    579 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    580 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp:
    581 ; CHECK:       ## %bb.0:
    582 ; CHECK-NEXT:    vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0
    583 ; CHECK-NEXT:    retq
    584   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
    585   ret <8 x double> %res
    586 }
    587 
    588 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    589 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz:
    590 ; CHECK:       ## %bb.0:
    591 ; CHECK-NEXT:    vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0
    592 ; CHECK-NEXT:    retq
    593   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
    594   ret <8 x double> %res
    595 }
    596 
    597 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    598 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current:
    599 ; CHECK:       ## %bb.0:
    600 ; CHECK-NEXT:    vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
    601 ; CHECK-NEXT:    retq
    602   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
    603   ret <8 x double> %res
    604 }
    605 
    606 define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    607 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
    608 ; CHECK:       ## %bb.0:
    609 ; CHECK-NEXT:    kmovw %edi, %k1
    610 ; CHECK-NEXT:    vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
    611 ; CHECK-NEXT:    retq
    612   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    613   ret <8 x double> %res
    614 }
    615 
    616 declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    617 
    618 define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    619 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
    620 ; CHECK:       ## %bb.0:
    621 ; CHECK-NEXT:    kmovw %edi, %k1
    622 ; CHECK-NEXT:    vfnmsub231pd {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2
    623 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
    624 ; CHECK-NEXT:    retq
    625   %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    626   ret <8 x double> %res
    627 }
    628 
    629 define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    630 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
    631 ; CHECK:       ## %bb.0:
    632 ; CHECK-NEXT:    kmovw %edi, %k1
    633 ; CHECK-NEXT:    vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
    634 ; CHECK-NEXT:    retq
    635   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    636   ret <16 x float> %res
    637 }
    638 
    639 declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    640 
    641 define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    642 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
    643 ; CHECK:       ## %bb.0:
    644 ; CHECK-NEXT:    kmovw %edi, %k1
    645 ; CHECK-NEXT:    vfnmsub231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2
    646 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
    647 ; CHECK-NEXT:    retq
    648   %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    649   ret <16 x float> %res
    650 }
    651 
    652 define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    653 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
    654 ; CHECK:       ## %bb.0:
    655 ; CHECK-NEXT:    kmovw %edi, %k1
    656 ; CHECK-NEXT:    vfnmadd132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
    657 ; CHECK-NEXT:    retq
    658   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    659   ret <8 x double> %res
    660 }
    661 
    662 define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    663 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
    664 ; CHECK:       ## %bb.0:
    665 ; CHECK-NEXT:    kmovw %edi, %k1
    666 ; CHECK-NEXT:    vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
    667 ; CHECK-NEXT:    retq
    668   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    669   ret <16 x float> %res
    670 }
    671