Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s
      2 
      3 declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
      4 declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
      5 
      6 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
      7   ; CHECK-LABEL: test_x86_vfnmadd_ps_z
      8   ; CHECK: vfnmadd213ps %zmm
      9   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
     10   ret <16 x float> %res
     11 }
     12 declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
     13 
     14 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
     15   ; CHECK-LABEL: test_mask_vfnmadd_ps
     16   ; CHECK: vfnmadd213ps %zmm
     17   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
     18   ret <16 x float> %res
     19 }
     20 
     21 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     22   ; CHECK-LABEL: test_x86_vfnmadd_pd_z
     23   ; CHECK: vfnmadd213pd %zmm
     24   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
     25   ret <8 x double> %res
     26 }
     27 declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
     28 
     29 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
     30   ; CHECK-LABEL: test_mask_vfnmadd_pd
     31   ; CHECK: vfnmadd213pd %zmm
     32   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
     33   ret <8 x double> %res
     34 }
     35 
     36 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     37   ; CHECK-LABEL: test_x86_vfnmsubps_z
     38   ; CHECK: vfnmsub213ps %zmm
     39   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
     40   ret <16 x float> %res
     41 }
     42 declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
     43 
     44 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
     45   ; CHECK-LABEL: test_mask_vfnmsub_ps
     46   ; CHECK: vfnmsub213ps %zmm
     47   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
     48   ret <16 x float> %res
     49 }
     50 
     51 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     52   ; CHECK-LABEL: test_x86_vfnmsubpd_z
     53   ; CHECK: vfnmsub213pd %zmm
     54   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
     55   ret <8 x double> %res
     56 }
     57 declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
     58 
     59 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
     60   ; CHECK-LABEL: test_mask_vfnmsub_pd
     61   ; CHECK: vfnmsub213pd %zmm
     62   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
     63   ret <8 x double> %res
     64 }
     65 
     66 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
     67   ; CHECK-LABEL: test_x86_vfmaddsubps_z
     68   ; CHECK: vfmaddsub213ps %zmm
     69   %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
     70   ret <16 x float> %res
     71 }
     72 
     73 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
     74 ; CHECK-LABEL: test_mask_fmaddsub_ps:
     75 ; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2]
     76   %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
     77   ret <16 x float> %res
     78 }
     79 
     80 declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
     81 
     82 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
     83   ; CHECK-LABEL: test_x86_vfmaddsubpd_z
     84   ; CHECK: vfmaddsub213pd %zmm
     85   %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
     86   ret <8 x double> %res
     87 }
     88 declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
     89 
     90 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
     91   ; CHECK-LABEL: test_mask_vfmaddsub_pd
     92   ; CHECK: vfmaddsub213pd %zmm
     93   %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
     94   ret <8 x double> %res
     95 }
     96 
     97 define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
     98 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
     99 ; CHECK:       ## BB#0:
    100 ; CHECK-NEXT:    movzbl %dil, %eax
    101 ; CHECK-NEXT:    kmovw %eax, %k1
    102 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    103 ; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1}
    104 ; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    105 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    106 ; CHECK-NEXT:    retq
    107   %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    108   %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    109   %res2 = fadd <8 x double> %res, %res1
    110   ret <8 x double> %res2
    111 }
    112 
    113 declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    114 
    115 define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    116 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
    117 ; CHECK:       ## BB#0:
    118 ; CHECK-NEXT:    movzbl %dil, %eax
    119 ; CHECK-NEXT:    kmovw %eax, %k1
    120 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    121 ; CHECK-NEXT:    vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1}
    122 ; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    123 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    124 ; CHECK-NEXT:    retq
    125   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    126   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    127   %res2 = fadd <8 x double> %res, %res1
    128   ret <8 x double> %res2
    129 }
    130 
    131 declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    132 
    133 define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    134 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
    135 ; CHECK:       ## BB#0:
    136 ; CHECK-NEXT:    movzbl %dil, %eax
    137 ; CHECK-NEXT:    kmovw %eax, %k1
    138 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    139 ; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
    140 ; CHECK-NEXT:    vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    141 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    142 ; CHECK-NEXT:    retq
    143   %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    144   %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    145   %res2 = fadd <8 x double> %res, %res1
    146   ret <8 x double> %res2
    147 }
    148 
    149 define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    150 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
    151 ; CHECK:       ## BB#0:
    152 ; CHECK-NEXT:    kmovw %edi, %k1
    153 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    154 ; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1}
    155 ; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    156 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    157 ; CHECK-NEXT:    retq
    158   %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    159   %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    160   %res2 = fadd <16 x float> %res, %res1
    161   ret <16 x float> %res2
    162 }
    163 
    164 declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    165 
    166 define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    167 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
    168 ; CHECK:       ## BB#0:
    169 ; CHECK-NEXT:    kmovw %edi, %k1
    170 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    171 ; CHECK-NEXT:    vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1}
    172 ; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    173 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    174 ; CHECK-NEXT:    retq
    175   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    176   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    177   %res2 = fadd <16 x float> %res, %res1
    178   ret <16 x float> %res2
    179 }
    180 
    181 declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    182 
    183 define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    184 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
    185 ; CHECK:       ## BB#0:
    186 ; CHECK-NEXT:    kmovw %edi, %k1
    187 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    188 ; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
    189 ; CHECK-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    190 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    191 ; CHECK-NEXT:    retq
    192   %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    193   %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    194   %res2 = fadd <16 x float> %res, %res1
    195   ret <16 x float> %res2
    196 }
    197 
    198 declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    199 
    200 define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    201 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
    202 ; CHECK:       ## BB#0:
    203 ; CHECK-NEXT:    movzbl %dil, %eax
    204 ; CHECK-NEXT:    kmovw %eax, %k1
    205 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    206 ; CHECK-NEXT:    vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1}
    207 ; CHECK-NEXT:    vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    208 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    209 ; CHECK-NEXT:    retq
    210   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    211   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    212   %res2 = fadd <8 x double> %res, %res1
    213   ret <8 x double> %res2
    214 }
    215 
    216 declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    217 
    218 define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    219 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
    220 ; CHECK:       ## BB#0:
    221 ; CHECK-NEXT:    kmovw %edi, %k1
    222 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    223 ; CHECK-NEXT:    vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1}
    224 ; CHECK-NEXT:    vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    225 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    226 ; CHECK-NEXT:    retq
    227   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    228   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    229   %res2 = fadd <16 x float> %res, %res1
    230   ret <16 x float> %res2
    231 }
    232 
    233 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    234   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
    235   ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
    236   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
    237   ret <16 x float> %res
    238 }
    239 
    240 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    241   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
    242   ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
    243   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
    244   ret <16 x float> %res
    245 }
    246 
    247 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    248   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
    249   ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
    250   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
    251   ret <16 x float> %res
    252 }
    253 
    254 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    255   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
    256   ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
    257   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
    258   ret <16 x float> %res
    259 }
    260 
    261 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
    262   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
    263   ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2]
    264   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
    265   ret <16 x float> %res
    266 }
    267 
    268 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    269   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
    270   ; CHECK: vfmadd213ps  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
    271   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
    272   ret <16 x float> %res
    273 }
    274 
    275 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    276   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
    277   ; CHECK: vfmadd213ps  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
    278   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
    279   ret <16 x float> %res
    280 }
    281 
    282 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    283   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
    284   ; CHECK: vfmadd213ps  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
    285   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
    286   ret <16 x float> %res
    287 }
    288 
    289 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    290   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
    291   ; CHECK: vfmadd213ps  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
    292   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
    293   ret <16 x float> %res
    294 }
    295 
    296 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
    297   ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
    298   ; CHECK: vfmadd213ps  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
    299   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
    300   ret <16 x float> %res
    301 }
    302 
    303 declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    304 
    305 define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    306 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
    307 ; CHECK:       ## BB#0:
    308 ; CHECK-NEXT:    movzbl %dil, %eax
    309 ; CHECK-NEXT:    kmovw %eax, %k1
    310 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    311 ; CHECK-NEXT:    vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
    312 ; CHECK-NEXT:    vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    313 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    314 ; CHECK-NEXT:    retq
    315   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    316   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    317   %res2 = fadd <8 x double> %res, %res1
    318   ret <8 x double> %res2
    319 }
    320 
    321 declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    322 
    323 define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    324 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
    325 ; CHECK:       ## BB#0:
    326 ; CHECK-NEXT:    kmovw %edi, %k1
    327 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    328 ; CHECK-NEXT:    vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
    329 ; CHECK-NEXT:    vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    330 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    331 ; CHECK-NEXT:    retq
    332   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    333   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    334   %res2 = fadd <16 x float> %res, %res1
    335   ret <16 x float> %res2
    336 }
    337 
    338 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    339   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
    340   ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
    341   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
    342   ret <8 x double> %res
    343 }
    344 
    345 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    346   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
    347   ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
    348   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
    349   ret <8 x double> %res
    350 }
    351 
    352 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    353   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
    354   ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
    355   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
    356   ret <8 x double> %res
    357 }
    358 
    359 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    360   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
    361   ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
    362   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
    363   ret <8 x double> %res
    364 }
    365 
    366 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    367   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
    368   ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
    369   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
    370   ret <8 x double> %res
    371 }
    372 
    373 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    374   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
    375   ; CHECK: vfmadd213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
    376   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
    377   ret <8 x double> %res
    378 }
    379 
    380 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    381   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
    382   ; CHECK: vfmadd213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
    383   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
    384   ret <8 x double> %res
    385 }
    386 
    387 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    388   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
    389   ; CHECK: vfmadd213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
    390   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
    391   ret <8 x double> %res
    392 }
    393 
    394 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    395   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
    396   ; CHECK: vfmadd213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
    397   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
    398   ret <8 x double> %res
    399 }
    400 
    401 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    402   ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
    403   ; CHECK: vfmadd213pd  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
    404   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
    405   ret <8 x double> %res
    406 }
    407 
    408 define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    409 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
    410 ; CHECK:       ## BB#0:
    411 ; CHECK-NEXT:    movzbl %dil, %eax
    412 ; CHECK-NEXT:    kmovw %eax, %k1
    413 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    414 ; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
    415 ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    416 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    417 ; CHECK-NEXT:    retq
    418   %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    419   %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    420   %res2 = fadd <8 x double> %res, %res1
    421   ret <8 x double> %res2
    422 }
    423 
    424 declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    425 
    426 define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    427 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
    428 ; CHECK:       ## BB#0:
    429 ; CHECK-NEXT:    movzbl %dil, %eax
    430 ; CHECK-NEXT:    kmovw %eax, %k1
    431 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    432 ; CHECK-NEXT:    vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1}
    433 ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    434 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    435 ; CHECK-NEXT:    retq
    436   %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    437   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    438   %res2 = fadd <8 x double> %res, %res1
    439   ret <8 x double> %res2
    440 }
    441 
    442 declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    443 
    444 define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    445 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
    446 ; CHECK:       ## BB#0:
    447 ; CHECK-NEXT:    movzbl %dil, %eax
    448 ; CHECK-NEXT:    kmovw %eax, %k1
    449 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    450 ; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z}
    451 ; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    452 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    453 ; CHECK-NEXT:    retq
    454   %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    455   %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    456   %res2 = fadd <8 x double> %res, %res1
    457   ret <8 x double> %res2
    458 }
    459 
    460 define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    461 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
    462 ; CHECK:       ## BB#0:
    463 ; CHECK-NEXT:    kmovw %edi, %k1
    464 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    465 ; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
    466 ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    467 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    468 ; CHECK-NEXT:    retq
    469   %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    470   %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    471   %res2 = fadd <16 x float> %res, %res1
    472   ret <16 x float> %res2
    473 }
    474 
    475 declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    476 
    477 define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    478 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
    479 ; CHECK:       ## BB#0:
    480 ; CHECK-NEXT:    kmovw %edi, %k1
    481 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    482 ; CHECK-NEXT:    vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1}
    483 ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    484 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    485 ; CHECK-NEXT:    retq
    486   %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    487   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    488   %res2 = fadd <16 x float> %res, %res1
    489   ret <16 x float> %res2
    490 }
    491 
    492 declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    493 
    494 define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    495 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
    496 ; CHECK:       ## BB#0:
    497 ; CHECK-NEXT:    kmovw %edi, %k1
    498 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    499 ; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z}
    500 ; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    501 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    502 ; CHECK-NEXT:    retq
    503   %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    504   %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    505   %res2 = fadd <16 x float> %res, %res1
    506   ret <16 x float> %res2
    507 }
    508 
    509 
    510 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    511   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
    512   ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
    513   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
    514   ret <8 x double> %res
    515 }
    516 
    517 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    518   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
    519   ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
    520   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
    521   ret <8 x double> %res
    522 }
    523 
    524 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    525   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
    526   ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
    527   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
    528   ret <8 x double> %res
    529 }
    530 
    531 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    532   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
    533   ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
    534   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
    535   ret <8 x double> %res
    536 }
    537 
    538 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
    539   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
    540   ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2]
    541   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
    542   ret <8 x double> %res
    543 }
    544 
    545 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    546   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
    547   ; CHECK: vfnmsub213pd  {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
    548   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
    549   ret <8 x double> %res
    550 }
    551 
    552 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    553   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
    554   ; CHECK: vfnmsub213pd  {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
    555   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
    556   ret <8 x double> %res
    557 }
    558 
    559 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    560   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
    561   ; CHECK: vfnmsub213pd  {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
    562   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
    563   ret <8 x double> %res
    564 }
    565 
    566 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    567   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
    568   ; CHECK: vfnmsub213pd  {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
    569   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
    570   ret <8 x double> %res
    571 }
    572 
    573 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
    574   ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
    575   ; CHECK: vfnmsub213pd  %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
    576   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
    577   ret <8 x double> %res
    578 }
    579 
    580 define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    581 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
    582 ; CHECK:       ## BB#0:
    583 ; CHECK-NEXT:    movzbl %dil, %eax
    584 ; CHECK-NEXT:    kmovw %eax, %k1
    585 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    586 ; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1}
    587 ; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    588 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    589 ; CHECK-NEXT:    retq
    590   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    591   %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    592   %res2 = fadd <8 x double> %res, %res1
    593   ret <8 x double> %res2
    594 }
    595 
    596 declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
    597 
    598 define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    599 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
    600 ; CHECK:       ## BB#0:
    601 ; CHECK-NEXT:    movzbl %dil, %eax
    602 ; CHECK-NEXT:    kmovw %eax, %k1
    603 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    604 ; CHECK-NEXT:    vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1}
    605 ; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    606 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    607 ; CHECK-NEXT:    retq
    608   %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    609   %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    610   %res2 = fadd <8 x double> %res, %res1
    611   ret <8 x double> %res2
    612 }
    613 
    614 define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    615 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
    616 ; CHECK:       ## BB#0:
    617 ; CHECK-NEXT:    kmovw %edi, %k1
    618 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    619 ; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1}
    620 ; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    621 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    622 ; CHECK-NEXT:    retq
    623   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    624   %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    625   %res2 = fadd <16 x float> %res, %res1
    626   ret <16 x float> %res2
    627 }
    628 
    629 declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
    630 
    631 define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    632 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
    633 ; CHECK:       ## BB#0:
    634 ; CHECK-NEXT:    kmovw %edi, %k1
    635 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3
    636 ; CHECK-NEXT:    vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1}
    637 ; CHECK-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    638 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    639 ; CHECK-NEXT:    retq
    640   %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    641   %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    642   %res2 = fadd <16 x float> %res, %res1
    643   ret <16 x float> %res2
    644 }
    645 
    646 define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
    647 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
    648 ; CHECK:       ## BB#0:
    649 ; CHECK-NEXT:    movzbl %dil, %eax
    650 ; CHECK-NEXT:    kmovw %eax, %k1
    651 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    652 ; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1}
    653 ; CHECK-NEXT:    vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0
    654 ; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
    655 ; CHECK-NEXT:    retq
    656   %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4)
    657   %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
    658   %res2 = fadd <8 x double> %res, %res1
    659   ret <8 x double> %res2
    660 }
    661 
    662 define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
    663 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
    664 ; CHECK:       ## BB#0:
    665 ; CHECK-NEXT:    kmovw %edi, %k1
    666 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3
    667 ; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1}
    668 ; CHECK-NEXT:    vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0
    669 ; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
    670 ; CHECK-NEXT:    retq
    671   %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4)
    672   %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
    673   %res2 = fadd <16 x float> %res, %res1
    674   ret <16 x float> %res2
    675 }
    676