Home | History | Annotate | Download | only in X86
      1 
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s
      3 
      4 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
      5 
      6 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
      7 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512:
      8 ; CHECK:       ## BB#0:
      9 ; CHECK-NEXT:    kmovb %edi, %k1
     10 ; CHECK-NEXT:    vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1}
     11 ; CHECK-NEXT:    vcvtpd2qq {rn-sae}, %zmm0, %zmm0
     12 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     13 ; CHECK-NEXT:    retq
     14   %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
     15   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
     16   %res2 = add <8 x i64> %res, %res1
     17   ret <8 x i64> %res2
     18 }
     19 
     20 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
     21 
     22 define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
     23 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512:
     24 ; CHECK:       ## BB#0:
     25 ; CHECK-NEXT:    kmovb %edi, %k1
     26 ; CHECK-NEXT:    vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1}
     27 ; CHECK-NEXT:    vcvtpd2uqq {rn-sae}, %zmm0, %zmm0
     28 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     29 ; CHECK-NEXT:    retq
     30   %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
     31   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
     32   %res2 = add <8 x i64> %res, %res1
     33   ret <8 x i64> %res2
     34 }
     35 
     36 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32)
     37 
     38 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
     39 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512:
     40 ; CHECK:       ## BB#0:
     41 ; CHECK-NEXT:    kmovb %edi, %k1
     42 ; CHECK-NEXT:    vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1}
     43 ; CHECK-NEXT:    vcvtps2qq {rn-sae}, %ymm0, %zmm0
     44 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     45 ; CHECK-NEXT:    retq
     46   %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
     47   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
     48   %res2 = add <8 x i64> %res, %res1
     49   ret <8 x i64> %res2
     50 }
     51 
     52 declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
     53 
     54 define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
     55 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512:
     56 ; CHECK:       ## BB#0:
     57 ; CHECK-NEXT:    kmovb %edi, %k1
     58 ; CHECK-NEXT:    vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1}
     59 ; CHECK-NEXT:    vcvtps2uqq {rn-sae}, %ymm0, %zmm0
     60 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     61 ; CHECK-NEXT:    retq
     62   %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
     63   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
     64   %res2 = add <8 x i64> %res, %res1
     65   ret <8 x i64> %res2
     66 }
     67 
     68 declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
     69 
     70 define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
     71 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
     72 ; CHECK:       ## BB#0:
     73 ; CHECK-NEXT:    kmovb %edi, %k1
     74 ; CHECK-NEXT:    vcvtqq2pd %zmm0, %zmm1 {%k1}
     75 ; CHECK-NEXT:    vcvtqq2pd {rn-sae}, %zmm0, %zmm0
     76 ; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
     77 ; CHECK-NEXT:    retq
     78   %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
     79   %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
     80   %res2 = fadd <8 x double> %res, %res1
     81   ret <8 x double> %res2
     82 }
     83 
     84 declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
     85 
     86 define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
     87 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
     88 ; CHECK:       ## BB#0:
     89 ; CHECK-NEXT:    kmovb %edi, %k1
     90 ; CHECK-NEXT:    vcvtqq2ps %zmm0, %ymm1 {%k1}
     91 ; CHECK-NEXT:    vcvtqq2ps {rn-sae}, %zmm0, %ymm0
     92 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
     93 ; CHECK-NEXT:    retq
     94   %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
     95   %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
     96   %res2 = fadd <8 x float> %res, %res1
     97   ret <8 x float> %res2
     98 }
     99 
    100 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
    101 
    102 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
    103 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
    104 ; CHECK:       ## BB#0:
    105 ; CHECK-NEXT:    kmovb %edi, %k1
    106 ; CHECK-NEXT:    vcvttpd2qq %zmm0, %zmm1 {%k1}
    107 ; CHECK-NEXT:    vcvttpd2qq {sae}, %zmm0, %zmm0
    108 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    109 ; CHECK-NEXT:    retq
    110   %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
    111   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
    112   %res2 = add <8 x i64> %res, %res1
    113   ret <8 x i64> %res2
    114 }
    115 
    116 declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
    117 
    118 define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
    119 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
    120 ; CHECK:       ## BB#0:
    121 ; CHECK-NEXT:    kmovb %edi, %k1
    122 ; CHECK-NEXT:    vcvttpd2uqq %zmm0, %zmm1 {%k1}
    123 ; CHECK-NEXT:    vcvttpd2uqq {sae}, %zmm0, %zmm0
    124 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    125 ; CHECK-NEXT:    retq
    126   %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
    127   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
    128   %res2 = add <8 x i64> %res, %res1
    129   ret <8 x i64> %res2
    130 }
    131 
    132 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
    133 
    134 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
    135 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
    136 ; CHECK:       ## BB#0:
    137 ; CHECK-NEXT:    kmovb %edi, %k1
    138 ; CHECK-NEXT:    vcvttps2qq %ymm0, %zmm1 {%k1}
    139 ; CHECK-NEXT:    vcvttps2qq {sae}, %ymm0, %zmm0
    140 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    141 ; CHECK-NEXT:    retq
    142   %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
    143   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
    144   %res2 = add <8 x i64> %res, %res1
    145   ret <8 x i64> %res2
    146 }
    147 
    148 declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
    149 
    150 define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
    151 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
    152 ; CHECK:       ## BB#0:
    153 ; CHECK-NEXT:    kmovb %edi, %k1
    154 ; CHECK-NEXT:    vcvttps2uqq %ymm0, %zmm1 {%k1}
    155 ; CHECK-NEXT:    vcvttps2uqq {sae}, %ymm0, %zmm0
    156 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    157 ; CHECK-NEXT:    retq
    158   %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
    159   %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
    160   %res2 = add <8 x i64> %res, %res1
    161   ret <8 x i64> %res2
    162 }
    163 
    164 declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
    165 
    166 define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
    167 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
    168 ; CHECK:       ## BB#0:
    169 ; CHECK-NEXT:    kmovb %edi, %k1
    170 ; CHECK-NEXT:    vcvtuqq2pd %zmm0, %zmm1 {%k1}
    171 ; CHECK-NEXT:    vcvtuqq2pd {rn-sae}, %zmm0, %zmm0
    172 ; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
    173 ; CHECK-NEXT:    retq
    174   %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
    175   %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
    176   %res2 = fadd <8 x double> %res, %res1
    177   ret <8 x double> %res2
    178 }
    179 
    180 declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
    181 
    182 define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
    183 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
    184 ; CHECK:       ## BB#0:
    185 ; CHECK-NEXT:    kmovb %edi, %k1
    186 ; CHECK-NEXT:    vcvtuqq2ps %zmm0, %ymm1 {%k1}
    187 ; CHECK-NEXT:    vcvtuqq2ps {rn-sae}, %zmm0, %ymm0
    188 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
    189 ; CHECK-NEXT:    retq
    190   %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
    191   %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
    192   %res2 = fadd <8 x float> %res, %res1
    193   ret <8 x float> %res2
    194 }
    195 
    196 declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
    197 ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512
    198 ; CHECK-NOT: call
    199 ; CHECK: kmov
    200 ; CHECK: vreducepd {{.*}}{%k1}
    201 ; CHECK: vreducepd
    202 ; CHECK: {sae}
    203 define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
    204   %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4)
    205   %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8)
    206   %res2 = fadd <8 x double> %res, %res1
    207   ret <8 x double> %res2
    208 }
    209 
    210 declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
    211 ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512
    212 ; CHECK-NOT: call 
    213 ; CHECK: kmov 
    214 ; CHECK: vreduceps
    215 ; CHECK: {sae}
    216 ; CKECK: {%k1}
    217 ; CHECK: vreduceps
    218 define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
    219   %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8)
    220   %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4)
    221   %res2 = fadd <16 x float> %res, %res1
    222   ret <16 x float> %res2
    223 }
    224 
    225 declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32)
    226 ; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512
    227 ; CHECK-NOT: call 
    228 ; CHECK: kmov 
    229 ; CHECK: vrangepd
    230 ; CKECK: {%k1}
    231 ; CHECK: vrangepd
    232 ; CHECK: {sae}
    233 define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
    234   %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4)
    235   %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8)
    236   %res2 = fadd <8 x double> %res, %res1
    237   ret <8 x double> %res2
    238 }
    239 
    240 declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32)
    241 
    242 ; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512
    243 ; CHECK-NOT: call 
    244 ; CHECK: kmov 
    245 ; CHECK: vrangeps
    246 ; CKECK: {%k1}
    247 ; CHECK: vrangeps
    248 ; CHECK: {sae}
    249 define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
    250   %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4)
    251   %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8)
    252   %res2 = fadd <16 x float> %res, %res1
    253   ret <16 x float> %res2
    254 }
    255 
    256 declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
    257 
    258 ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss
    259 ; CHECK-NOT: call 
    260 ; CHECK: kmov 
    261 ; CHECK: vreducess
    262 ; CKECK: {%k1}
    263 ; CHECK: vreducess
    264 ; CHECK: {sae}
    265 define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
    266   %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4)
    267   %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
    268   %res2 = fadd <4 x float> %res, %res1
    269   ret <4 x float> %res2
    270 }
    271 
    272 declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
    273 ; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss
    274 ; CHECK-NOT: call 
    275 ; CHECK: kmov 
    276 ; CHECK: vrangess
    277 ; CHECK: {sae}
    278 ; CKECK: {%k1}
    279 ; CHECK: vrangess
    280 ; CHECK: {sae} 
    281 define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
    282   %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
    283   %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
    284   %res2 = fadd <4 x float> %res, %res1
    285   ret <4 x float> %res2
    286 }
    287 
    288 declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
    289 
    290 ; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd
    291 ; CHECK-NOT: call 
    292 ; CHECK: kmov 
    293 ; CHECK: vreducesd
    294 ; CKECK: {%k1}
    295 ; CHECK: vreducesd
    296 ; CHECK: {sae} 
    297 define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
    298   %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
    299   %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
    300   %res2 = fadd <2 x double> %res, %res1
    301   ret <2 x double> %res2
    302 }
    303 
    304 declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
    305 ; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd
    306 ; CHECK-NOT: call 
    307 ; CHECK: kmov 
    308 ; CHECK: vrangesd
    309 ; CKECK: {%k1}
    310 ; CHECK: vrangesd
    311 ; CHECK: {sae} 
    312 define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
    313   %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
    314   %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
    315   %res2 = fadd <2 x double> %res, %res1
    316   ret <2 x double> %res2
    317 }
    318 
    319 
    320 declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8)
    321 
    322 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
    323 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
    324 ; CHECK:       ## BB#0:
    325 ; CHECK-NEXT:    kmovb %edi, %k1
    326 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1}
    327 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z}
    328 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm0
    329 ; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
    330 ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
    331 ; CHECK-NEXT:    retq
    332   %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
    333   %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
    334   %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
    335   %res3 = fadd <2 x double> %res, %res1
    336   %res4 = fadd <2 x double> %res2, %res3
    337   ret <2 x double> %res4
    338 }
    339 
    340 declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8)
    341 
    342 define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) {
    343 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8:
    344 ; CHECK:       ## BB#0:
    345 ; CHECK-NEXT:    kmovb %edi, %k1
    346 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1}
    347 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z}
    348 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm0
    349 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
    350 ; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
    351 ; CHECK-NEXT:    retq
    352   %res  = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3)
    353   %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3)
    354   %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1)
    355   %res3 = fadd <8 x float> %res, %res1
    356   %res4 = fadd <8 x float> %res2, %res3
    357   ret <8 x float> %res4
    358 }
    359 
    360 declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
    361 
    362 define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
    363 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
    364 ; CHECK:       ## BB#0:
    365 ; CHECK-NEXT:    kmovw %edi, %k1
    366 ; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
    367 ; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
    368 ; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
    369 ; CHECK-NEXT:    vaddps %zmm3, %zmm2, %zmm1
    370 ; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0
    371 ; CHECK-NEXT:    retq
    372   %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
    373   %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
    374   %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
    375   %res3 = fadd <16 x float> %res, %res1
    376   %res4 = fadd <16 x float> %res2, %res3
    377   ret <16 x float> %res4
    378 }
    379 
    380 declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
    381 
    382 define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
    383 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
    384 ; CHECK:       ## BB#0:
    385 ; CHECK-NEXT:    kmovb %edi, %k1
    386 ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
    387 ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
    388 ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm0
    389 ; CHECK-NEXT:    vaddpd %zmm3, %zmm2, %zmm1
    390 ; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
    391 ; CHECK-NEXT:    retq
    392   %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
    393   %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
    394   %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
    395   %res3 = fadd <8 x double> %res, %res1
    396   %res4 = fadd <8 x double> %res3, %res2
    397   ret <8 x double> %res4
    398 }
    399 
    400 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
    401 
    402 define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
    403 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
    404 ; CHECK:       ## BB#0:
    405 ; CHECK-NEXT:    kmovw %edi, %k1
    406 ; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
    407 ; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
    408 ; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm0
    409 ; CHECK-NEXT:    vpaddd %zmm3, %zmm2, %zmm1
    410 ; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    411 ; CHECK-NEXT:    retq
    412   %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
    413   %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
    414   %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
    415   %res3 = add <16 x i32> %res, %res1
    416   %res4 = add <16 x i32> %res3, %res2
    417   ret <16 x i32> %res4
    418 }
    419 
    420 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
    421 
    422 define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
    423 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
    424 ; CHECK:       ## BB#0:
    425 ; CHECK-NEXT:    kmovb %edi, %k1
    426 ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
    427 ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
    428 ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
    429 ; CHECK-NEXT:    vpaddq %zmm3, %zmm2, %zmm1
    430 ; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
    431 ; CHECK-NEXT:    retq
    432   %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
    433   %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
    434   %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
    435   %res3 = add <8 x i64> %res, %res1
    436   %res4 = add <8 x i64> %res2, %res3
    437   ret <8 x i64> %res4
    438 }
    439 
    440 declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
    441 
    442 ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_512
    443 ; CHECK-NOT: call 
    444 ; CHECK: kmov 
    445 ; CHECK: vfpclasspd
    446 ; CHECK: {%k1} 
    447 ; CHECK: vfpclasspd
    448 ; CHECK: kmovb   %k0
    449 define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) {
    450 	%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1)
    451 	%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
    452 	%res2 = add i8 %res, %res1
    453 	ret i8 %res2
    454 }
    455 declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)
    456 
    457 ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_512
    458 ; CHECK-NOT: call 
    459 ; CHECK: kmov 
    460 ; CHECK: vfpclassps
    461 ; CHECK: vfpclassps
    462 ; CHECK: {%k1} 
    463 ; CHECK: kmov
    464 define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) {
    465 	%res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1)
    466 	%res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
    467 	%res2 = add i16 %res, %res1
    468 	ret i16 %res2
    469 }
    470 
    471 declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
    472 
    473 ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd
    474 ; CHECK-NOT: call 
    475 ; CHECK: kmov 
    476 ; CHECK: vfpclasssd
    477 ; CHECK: %k0 {%k1}
    478 ; CHECK: vfpclasssd
    479 ; CHECK: %k0
    480 define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
    481   %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
    482   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
    483   %res2 = add i8 %res, %res1
    484   ret i8 %res2
    485 }
    486 
    487 declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
    488 
    489 ; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss
    490 ; CHECK-NOT: call 
    491 ; CHECK: kmovw 
    492 ; CHECK: vfpclassss
    493 ; CHECK: %k0
    494 ; CHECK: {%k1}
    495 ; CHECK: kmovw
    496 ; CHECK: vfpclassss
    497 ; CHECK: %k0
    498 define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
    499   %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
    500   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
    501   %res2 = add i8 %res, %res1
    502   ret i8 %res2
    503 }
    504 
    505 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16)
    506 
    507 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) {
    508 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
    509 ; CHECK:       ## BB#0:
    510 ; CHECK-NEXT:    kmovw %edi, %k1
    511 ; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm1 {%k1}
    512 ; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z}
    513 ; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm0
    514 ; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm1
    515 ; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
    516 ; CHECK-NEXT:    retq
    517   %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>  %x0, <16 x float> %x2, i16 %x3)
    518   %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3)
    519   %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
    520   %res3 = fadd <16 x float> %res, %res1
    521   %res4 = fadd <16 x float> %res3, %res2
    522   ret <16 x float> %res4
    523 }
    524 
    525 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16)
    526 
    527 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) {
    528 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
    529 ; CHECK:       ## BB#0:
    530 ; CHECK-NEXT:    kmovw %edi, %k1
    531 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm1 {%k1}
    532 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z}
    533 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm0
    534 ; CHECK-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
    535 ; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    536 ; CHECK-NEXT:    retq
    537   %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>  %x0, <16 x i32> %x2, i16 %x3)
    538   %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3)
    539   %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
    540   %res3 = add <16 x i32> %res, %res1
    541   %res4 = add <16 x i32> %res3, %res2
    542   ret <16 x i32> %res4
    543 }
    544