Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s
      3 
      4 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
      5 
      6 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
      7 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512:
      8 ; CHECK:       ## BB#0:
      9 ; CHECK-NEXT:    kmovb %edi, %k1
     10 ; CHECK-NEXT:    vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1}
     11 ; CHECK-NEXT:    vcvtpd2qq {rn-sae}, %zmm0, %zmm0
     12 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     13 ; CHECK-NEXT:    retq
     14   %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
     15   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
     16   %res2 = add <8 x i64> %res, %res1
     17   ret <8 x i64> %res2
     18 }
     19 
     20 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
     21 
     22 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
     23 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512:
     24 ; CHECK:       ## BB#0:
     25 ; CHECK-NEXT:    kmovb %edi, %k1
     26 ; CHECK-NEXT:    vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1}
     27 ; CHECK-NEXT:    vcvtpd2uqq {rn-sae}, %zmm0, %zmm0
     28 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     29 ; CHECK-NEXT:    retq
     30   %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
     31   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
     32   %res2 = add <8 x i64> %res, %res1
     33   ret <8 x i64> %res2
     34 }
     35 
     36 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32)
     37 
     38 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
     39 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512:
     40 ; CHECK:       ## BB#0:
     41 ; CHECK-NEXT:    kmovb %edi, %k1
     42 ; CHECK-NEXT:    vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1}
     43 ; CHECK-NEXT:    vcvtps2qq {rn-sae}, %ymm0, %zmm0
     44 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     45 ; CHECK-NEXT:    retq
     46   %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
     47   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
     48   %res2 = add <8 x i64> %res, %res1
     49   ret <8 x i64> %res2
     50 }
     51 
     52 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
     53 
     54 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
     55 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512:
     56 ; CHECK:       ## BB#0:
     57 ; CHECK-NEXT:    kmovb %edi, %k1
     58 ; CHECK-NEXT:    vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1}
     59 ; CHECK-NEXT:    vcvtps2uqq {rn-sae}, %ymm0, %zmm0
     60 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     61 ; CHECK-NEXT:    retq
     62   %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
     63   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
     64   %res2 = add <8 x i64> %res, %res1
     65   ret <8 x i64> %res2
     66 }
     67 
     68 declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
     69 
     70 define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
     71 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
     72 ; CHECK:       ## BB#0:
     73 ; CHECK-NEXT:    kmovb %edi, %k1
     74 ; CHECK-NEXT:    vcvtqq2pd %zmm0, %zmm1 {%k1}
     75 ; CHECK-NEXT:    vcvtqq2pd {rn-sae}, %zmm0, %zmm0
     76 ; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
     77 ; CHECK-NEXT:    retq
     78   %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
     79   %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
     80   %res2 = fadd <8 x double> %res, %res1
     81   ret <8 x double> %res2
     82 }
     83 
     84 declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
     85 
     86 define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
     87 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
     88 ; CHECK:       ## BB#0:
     89 ; CHECK-NEXT:    kmovb %edi, %k1
     90 ; CHECK-NEXT:    vcvtqq2ps %zmm0, %ymm1 {%k1}
     91 ; CHECK-NEXT:    vcvtqq2ps {rn-sae}, %zmm0, %ymm0
     92 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
     93 ; CHECK-NEXT:    retq
     94   %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
     95   %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
     96   %res2 = fadd <8 x float> %res, %res1
     97   ret <8 x float> %res2
     98 }
     99 
    100 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
    101 
    102 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
    103 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
    104 ; CHECK:       ## BB#0:
    105 ; CHECK-NEXT:    kmovb %edi, %k1
    106 ; CHECK-NEXT:    vcvttpd2qq %zmm0, %zmm1 {%k1}
    107 ; CHECK-NEXT:    vcvttpd2qq {sae}, %zmm0, %zmm0
    108 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    109 ; CHECK-NEXT:    retq
    110   %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
    111   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
    112   %res2 = add <8 x i64> %res, %res1
    113   ret <8 x i64> %res2
    114 }
    115 
    116 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
    117 
    118 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
    119 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
    120 ; CHECK:       ## BB#0:
    121 ; CHECK-NEXT:    kmovb %edi, %k1
    122 ; CHECK-NEXT:    vcvttpd2uqq %zmm0, %zmm1 {%k1}
    123 ; CHECK-NEXT:    vcvttpd2uqq {sae}, %zmm0, %zmm0
    124 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    125 ; CHECK-NEXT:    retq
    126   %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
    127   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
    128   %res2 = add <8 x i64> %res, %res1
    129   ret <8 x i64> %res2
    130 }
    131 
    132 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
    133 
    134 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
    135 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
    136 ; CHECK:       ## BB#0:
    137 ; CHECK-NEXT:    kmovb %edi, %k1
    138 ; CHECK-NEXT:    vcvttps2qq %ymm0, %zmm1 {%k1}
    139 ; CHECK-NEXT:    vcvttps2qq {sae}, %ymm0, %zmm0
    140 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    141 ; CHECK-NEXT:    retq
    142   %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
    143   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
    144   %res2 = add <8 x i64> %res, %res1
    145   ret <8 x i64> %res2
    146 }
    147 
    148 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
    149 
    150 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
    151 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
    152 ; CHECK:       ## BB#0:
    153 ; CHECK-NEXT:    kmovb %edi, %k1
    154 ; CHECK-NEXT:    vcvttps2uqq %ymm0, %zmm1 {%k1}
    155 ; CHECK-NEXT:    vcvttps2uqq {sae}, %ymm0, %zmm0
    156 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    157 ; CHECK-NEXT:    retq
    158   %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
    159   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
    160   %res2 = add <8 x i64> %res, %res1
    161   ret <8 x i64> %res2
    162 }
    163 
    164 declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
    165 
    166 define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
    167 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
    168 ; CHECK:       ## BB#0:
    169 ; CHECK-NEXT:    kmovb %edi, %k1
    170 ; CHECK-NEXT:    vcvtuqq2pd %zmm0, %zmm1 {%k1}
    171 ; CHECK-NEXT:    vcvtuqq2pd {rn-sae}, %zmm0, %zmm0
    172 ; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
    173 ; CHECK-NEXT:    retq
    174   %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
    175   %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
    176   %res2 = fadd <8 x double> %res, %res1
    177   ret <8 x double> %res2
    178 }
    179 
    180 declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
    181 
    182 define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
    183 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
    184 ; CHECK:       ## BB#0:
    185 ; CHECK-NEXT:    kmovb %edi, %k1
    186 ; CHECK-NEXT:    vcvtuqq2ps %zmm0, %ymm1 {%k1}
    187 ; CHECK-NEXT:    vcvtuqq2ps {rn-sae}, %zmm0, %ymm0
    188 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
    189 ; CHECK-NEXT:    retq
    190   %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
    191   %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
    192   %res2 = fadd <8 x float> %res, %res1
    193   ret <8 x float> %res2
    194 }
    195 
    196 declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
    197 
    198 define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
    199 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_512:
    200 ; CHECK:       ## BB#0:
    201 ; CHECK-NEXT:    kmovb %edi, %k1
    202 ; CHECK-NEXT:    vreducepd $8, %zmm0, %zmm1 {%k1}
    203 ; CHECK-NEXT:    vreducepd $4, {sae}, %zmm0, %zmm0
    204 ; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
    205 ; CHECK-NEXT:    retq
    206   %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4)
    207   %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8)
    208   %res2 = fadd <8 x double> %res, %res1
    209   ret <8 x double> %res2
    210 }
    211 
    212 declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
    213 
    214 define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
    215 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_512:
    216 ; CHECK:       ## BB#0:
    217 ; CHECK-NEXT:    kmovw %edi, %k1
    218 ; CHECK-NEXT:    vreduceps $44, {sae}, %zmm0, %zmm1 {%k1}
    219 ; CHECK-NEXT:    vreduceps $11, %zmm0, %zmm0
    220 ; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
    221 ; CHECK-NEXT:    retq
    222   %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8)
    223   %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4)
    224   %res2 = fadd <16 x float> %res, %res1
    225   ret <16 x float> %res2
    226 }
    227 
    228 declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32)
    229 
    230 define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
    231 ; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_512:
    232 ; CHECK:       ## BB#0:
    233 ; CHECK-NEXT:    kmovb %edi, %k1
    234 ; CHECK-NEXT:    vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1}
    235 ; CHECK-NEXT:    vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0
    236 ; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
    237 ; CHECK-NEXT:    retq
    238   %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4)
    239   %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8)
    240   %res2 = fadd <8 x double> %res, %res1
    241   ret <8 x double> %res2
    242 }
    243 
    244 declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32)
    245 
    246 define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
    247 ; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_512:
    248 ; CHECK:       ## BB#0:
    249 ; CHECK-NEXT:    kmovw %edi, %k1
    250 ; CHECK-NEXT:    vrangeps $88, %zmm1, %zmm0, %zmm2 {%k1}
    251 ; CHECK-NEXT:    vrangeps $4, {sae}, %zmm1, %zmm0, %zmm0
    252 ; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
    253 ; CHECK-NEXT:    retq
    254   %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4)
    255   %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8)
    256   %res2 = fadd <16 x float> %res, %res1
    257   ret <16 x float> %res2
    258 }
    259 
    260 declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
    261 
    262 define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
    263 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ss:
    264 ; CHECK:       ## BB#0:
    265 ; CHECK-NEXT:    andl $1, %edi
    266 ; CHECK-NEXT:    kmovw %edi, %k1
    267 ; CHECK-NEXT:    vreducess $4, %xmm1, %xmm0, %xmm2 {%k1}
    268 ; CHECK-NEXT:    vreducess $4, {sae}, %xmm1, %xmm0, %xmm0
    269 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
    270 ; CHECK-NEXT:    retq
    271   %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4)
    272   %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
    273   %res2 = fadd <4 x float> %res, %res1
    274   ret <4 x float> %res2
    275 }
    276 
    277 declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
    278 
    279 define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
    280 ; CHECK-LABEL: test_int_x86_avx512_mask_range_ss:
    281 ; CHECK:       ## BB#0:
    282 ; CHECK-NEXT:    andl $1, %edi
    283 ; CHECK-NEXT:    kmovw %edi, %k1
    284 ; CHECK-NEXT:    vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1}
    285 ; CHECK-NEXT:    vrangess $4, {sae}, %xmm1, %xmm0, %xmm0
    286 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
    287 ; CHECK-NEXT:    retq
    288   %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
    289   %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
    290   %res2 = fadd <4 x float> %res, %res1
    291   ret <4 x float> %res2
    292 }
    293 
    294 declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
    295 
    296 define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
    297 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sd:
    298 ; CHECK:       ## BB#0:
    299 ; CHECK-NEXT:    andl $1, %edi
    300 ; CHECK-NEXT:    kmovw %edi, %k1
    301 ; CHECK-NEXT:    vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1}
    302 ; CHECK-NEXT:    vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0
    303 ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
    304 ; CHECK-NEXT:    retq
    305   %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
    306   %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
    307   %res2 = fadd <2 x double> %res, %res1
    308   ret <2 x double> %res2
    309 }
    310 
    311 declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
    312 
    313 define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
    314 ; CHECK-LABEL: test_int_x86_avx512_mask_range_sd:
    315 ; CHECK:       ## BB#0:
    316 ; CHECK-NEXT:    andl $1, %edi
    317 ; CHECK-NEXT:    kmovw %edi, %k1
    318 ; CHECK-NEXT:    vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1}
    319 ; CHECK-NEXT:    vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0
    320 ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
    321 ; CHECK-NEXT:    retq
    322   %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
    323   %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
    324   %res2 = fadd <2 x double> %res, %res1
    325   ret <2 x double> %res2
    326 }
    327 
    328 
    329 declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8)
    330 
    331 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
    332 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
    333 ; CHECK:       ## BB#0:
    334 ; CHECK-NEXT:    kmovb %edi, %k1
    335 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1}
    336 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z}
    337 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm0
    338 ; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
    339 ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
    340 ; CHECK-NEXT:    retq
    341   %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
    342   %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
    343   %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
    344   %res3 = fadd <2 x double> %res, %res1
    345   %res4 = fadd <2 x double> %res2, %res3
    346   ret <2 x double> %res4
    347 }
    348 
    349 declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8)
    350 
    351 define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) {
    352 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8:
    353 ; CHECK:       ## BB#0:
    354 ; CHECK-NEXT:    kmovb %edi, %k1
    355 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1}
    356 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z}
    357 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm0
    358 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
    359 ; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
    360 ; CHECK-NEXT:    retq
    361   %res  = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3)
    362   %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3)
    363   %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1)
    364   %res3 = fadd <8 x float> %res, %res1
    365   %res4 = fadd <8 x float> %res2, %res3
    366   ret <8 x float> %res4
    367 }
    368 
    369 declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
    370 
    371 define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
    372 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
    373 ; CHECK:       ## BB#0:
    374 ; CHECK-NEXT:    kmovw %edi, %k1
    375 ; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
    376 ; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
    377 ; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
    378 ; CHECK-NEXT:    vaddps %zmm3, %zmm2, %zmm1
    379 ; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0
    380 ; CHECK-NEXT:    retq
    381   %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
    382   %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
    383   %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
    384   %res3 = fadd <16 x float> %res, %res1
    385   %res4 = fadd <16 x float> %res2, %res3
    386   ret <16 x float> %res4
    387 }
    388 
    389 declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
    390 
    391 define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
    392 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
    393 ; CHECK:       ## BB#0:
    394 ; CHECK-NEXT:    kmovb %edi, %k1
    395 ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
    396 ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
    397 ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm0
    398 ; CHECK-NEXT:    vaddpd %zmm3, %zmm2, %zmm1
    399 ; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
    400 ; CHECK-NEXT:    retq
    401   %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
    402   %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
    403   %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
    404   %res3 = fadd <8 x double> %res, %res1
    405   %res4 = fadd <8 x double> %res3, %res2
    406   ret <8 x double> %res4
    407 }
    408 
    409 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
    410 
    411 define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
    412 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
    413 ; CHECK:       ## BB#0:
    414 ; CHECK-NEXT:    kmovw %edi, %k1
    415 ; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
    416 ; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
    417 ; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm0
    418 ; CHECK-NEXT:    vpaddd %zmm3, %zmm2, %zmm1
    419 ; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    420 ; CHECK-NEXT:    retq
    421   %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
    422   %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
    423   %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
    424   %res3 = add <16 x i32> %res, %res1
    425   %res4 = add <16 x i32> %res3, %res2
    426   ret <16 x i32> %res4
    427 }
    428 
    429 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
    430 
    431 define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
    432 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
    433 ; CHECK:       ## BB#0:
    434 ; CHECK-NEXT:    kmovb %edi, %k1
    435 ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
    436 ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
    437 ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
    438 ; CHECK-NEXT:    vpaddq %zmm3, %zmm2, %zmm1
    439 ; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
    440 ; CHECK-NEXT:    retq
    441   %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
    442   %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
    443   %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
    444   %res3 = add <8 x i64> %res, %res1
    445   %res4 = add <8 x i64> %res2, %res3
    446   ret <8 x i64> %res4
    447 }
    448 
    449 declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
    450 
    451 define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) {
    452 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512:
    453 ; CHECK:       ## BB#0:
    454 ; CHECK-NEXT:    kmovb %edi, %k1
    455 ; CHECK-NEXT:    vfpclasspd $2, %zmm0, %k0 {%k1}
    456 ; CHECK-NEXT:    kmovb %k0, %ecx
    457 ; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0
    458 ; CHECK-NEXT:    kmovb %k0, %eax
    459 ; CHECK-NEXT:    addb %cl, %al
    460 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    461 ; CHECK-NEXT:    retq
    462     %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1)
    463     %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
    464     %res2 = add i8 %res, %res1
    465     ret i8 %res2
    466 }
    467 declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)
    468 
    469 define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) {
    470 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512:
    471 ; CHECK:       ## BB#0:
    472 ; CHECK-NEXT:    kmovw %edi, %k1
    473 ; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 {%k1}
    474 ; CHECK-NEXT:    kmovw %k0, %ecx
    475 ; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0
    476 ; CHECK-NEXT:    kmovw %k0, %eax
    477 ; CHECK-NEXT:    addl %ecx, %eax
    478 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    479 ; CHECK-NEXT:    retq
    480     %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1)
    481     %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
    482     %res2 = add i16 %res, %res1
    483     ret i16 %res2
    484 }
    485 
    486 declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
    487 
    488 define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
    489 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd:
    490 ; CHECK:       ## BB#0:
    491 ; CHECK-NEXT:    andl $1, %edi
    492 ; CHECK-NEXT:    kmovw %edi, %k1
    493 ; CHECK-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1}
    494 ; CHECK-NEXT:    kmovw %k0, %eax
    495 ; CHECK-NEXT:    testb %al, %al
    496 ; CHECK-NEXT:    je LBB28_2
    497 ; CHECK-NEXT:  ## BB#1:
    498 ; CHECK-NEXT:    movb $-1, %al
    499 ; CHECK-NEXT:  LBB28_2:
    500 ; CHECK-NEXT:    vfpclasssd $4, %xmm0, %k0
    501 ; CHECK-NEXT:    kmovw %k0, %ecx
    502 ; CHECK-NEXT:    testb %cl, %cl
    503 ; CHECK-NEXT:    je LBB28_4
    504 ; CHECK-NEXT:  ## BB#3:
    505 ; CHECK-NEXT:    movb $-1, %cl
    506 ; CHECK-NEXT:  LBB28_4:
    507 ; CHECK-NEXT:    addb %cl, %al
    508 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %AX<kill>
    509 ; CHECK-NEXT:    retq
    510   %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
    511   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
    512   %res2 = add i8 %res, %res1
    513   ret i8 %res2
    514 }
    515 
    516 declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
    517 
    518 define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
    519 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss:
    520 ; CHECK:       ## BB#0:
    521 ; CHECK-NEXT:    andl $1, %edi
    522 ; CHECK-NEXT:    kmovw %edi, %k1
    523 ; CHECK-NEXT:    vfpclassss $4, %xmm0, %k0 {%k1}
    524 ; CHECK-NEXT:    kmovw %k0, %eax
    525 ; CHECK-NEXT:    testb %al, %al
    526 ; CHECK-NEXT:    je LBB29_2
    527 ; CHECK-NEXT:  ## BB#1:
    528 ; CHECK-NEXT:    movb $-1, %al
    529 ; CHECK-NEXT:  LBB29_2:
    530 ; CHECK-NEXT:    vfpclassss $4, %xmm0, %k0
    531 ; CHECK-NEXT:    kmovw %k0, %ecx
    532 ; CHECK-NEXT:    testb %cl, %cl
    533 ; CHECK-NEXT:    je LBB29_4
    534 ; CHECK-NEXT:  ## BB#3:
    535 ; CHECK-NEXT:    movb $-1, %cl
    536 ; CHECK-NEXT:  LBB29_4:
    537 ; CHECK-NEXT:    addb %cl, %al
    538 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %AX<kill>
    539 ; CHECK-NEXT:    retq
    540   %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
    541   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
    542   %res2 = add i8 %res, %res1
    543   ret i8 %res2
    544 }
    545 
    546 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16)
    547 
    548 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) {
    549 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
    550 ; CHECK:       ## BB#0:
    551 ; CHECK-NEXT:    kmovw %edi, %k1
    552 ; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm1 {%k1}
    553 ; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z}
    554 ; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm0
    555 ; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm1
    556 ; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
    557 ; CHECK-NEXT:    retq
    558   %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>  %x0, <16 x float> %x2, i16 %x3)
    559   %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3)
    560   %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
    561   %res3 = fadd <16 x float> %res, %res1
    562   %res4 = fadd <16 x float> %res3, %res2
    563   ret <16 x float> %res4
    564 }
    565 
    566 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16)
    567 
    568 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) {
    569 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
    570 ; CHECK:       ## BB#0:
    571 ; CHECK-NEXT:    kmovw %edi, %k1
    572 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm1 {%k1}
    573 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z}
    574 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm0
    575 ; CHECK-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
    576 ; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    577 ; CHECK-NEXT:    retq
    578   %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>  %x0, <16 x i32> %x2, i16 %x3)
    579   %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3)
    580   %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
    581   %res3 = add <16 x i32> %res, %res1
    582   %res4 = add <16 x i32> %res3, %res2
    583   ret <16 x i32> %res4
    584 }
    585 
    586 declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>)
    587 
    588 define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
    589 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512:
    590 ; CHECK:       ## BB#0:
    591 ; CHECK-NEXT:    vpmovd2m %zmm0, %k0
    592 ; CHECK-NEXT:    kmovw %k0, %eax
    593 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    594 ; CHECK-NEXT:    retq
    595   %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
    596   ret i16 %res
    597 }
    598 
    599 declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>)
    600 
    601 define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
    602 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512:
    603 ; CHECK:       ## BB#0:
    604 ; CHECK-NEXT:    vpmovq2m %zmm0, %k0
    605 ; CHECK-NEXT:    kmovb %k0, %eax
    606 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    607 ; CHECK-NEXT:    retq
    608   %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
    609   ret i8 %res
    610 }
    611 
    612 declare <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16)
    613 
    614 define <16 x i32>@test_int_x86_avx512_cvtmask2d_512(i16 %x0) {
    615 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_512:
    616 ; CHECK:       ## BB#0:
    617 ; CHECK-NEXT:    kmovw %edi, %k0
    618 ; CHECK-NEXT:    vpmovm2d %k0, %zmm0
    619 ; CHECK-NEXT:    retq
    620   %res = call <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16 %x0)
    621   ret <16 x i32> %res
    622 }
    623 
    624 declare <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8)
    625 
    626 define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) {
    627 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_512:
    628 ; CHECK:       ## BB#0:
    629 ; CHECK-NEXT:    kmovb %edi, %k0
    630 ; CHECK-NEXT:    vpmovm2q %k0, %zmm0
    631 ; CHECK-NEXT:    retq
    632   %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0)
    633   ret <8 x i64> %res
    634 }
    635 
    636 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x float>, i16)
    637 
    638 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) {
    639 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
    640 ; CHECK:       ## BB#0:
    641 ; CHECK-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
    642 ; CHECK-NEXT:    kmovw %edi, %k1
    643 ; CHECK-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
    644 ; CHECK-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
    645 ; CHECK-NEXT:    vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
    646 ; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0
    647 ; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
    648 ; CHECK-NEXT:    retq
    649 
    650   %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 -1)
    651   %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask)
    652   %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
    653   %res4 = fadd <16 x float> %res1, %res2
    654   %res5 = fadd <16 x float> %res3, %res4
    655   ret <16 x float> %res5
    656 }
    657 
    658 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x double>, i8)
    659 
    660 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) {
    661 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
    662 ; CHECK:       ## BB#0:
    663 ; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
    664 ; CHECK-NEXT:    kmovb %edi, %k1
    665 ; CHECK-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
    666 ; CHECK-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
    667 ; CHECK-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
    668 ; CHECK-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
    669 ; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
    670 ; CHECK-NEXT:    retq
    671 
    672   %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1)
    673   %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask)
    674   %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
    675   %res4 = fadd <8 x double> %res1, %res2
    676   %res5 = fadd <8 x double> %res3, %res4
    677   ret <8 x double> %res5
    678 }
    679 
    680 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32>, i16)
    681 
    682 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) {
    683 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
    684 ; CHECK:       ## BB#0:
    685 ; CHECK-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
    686 ; CHECK-NEXT:    kmovw %edi, %k1
    687 ; CHECK-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
    688 ; CHECK-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
    689 ; CHECK-NEXT:    vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
    690 ; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
    691 ; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
    692 ; CHECK-NEXT:    retq
    693 
    694   %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 -1)
    695   %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask)
    696   %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
    697   %res4 = add <16 x i32> %res1, %res2
    698   %res5 = add <16 x i32> %res3, %res4
    699   ret <16 x i32> %res5
    700 }
    701 
    702 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>, i8)
    703 
    704 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) {
    705 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
    706 ; CHECK:       ## BB#0:
    707 ; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
    708 ; CHECK-NEXT:    kmovb %edi, %k1
    709 ; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
    710 ; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
    711 ; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
    712 ; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
    713 ; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
    714 ; CHECK-NEXT:    retq
    715 
    716   %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1)
    717   %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask)
    718   %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
    719   %res4 = add <8 x i64> %res1, %res2
    720   %res5 = add <8 x i64> %res3, %res4
    721   ret <8 x i64> %res5
    722 }
    723