Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq -mattr=+avx512vl --show-mc-encoding| FileCheck %s
      3 
      4 define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
      5 ; CHECK-LABEL: test_mask_mullo_epi64_rr_512:
      6 ; CHECK:       ## BB#0:
      7 ; CHECK-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
      8 ; CHECK-NEXT:    retq ## encoding: [0xc3]
      9   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
     10   ret <8 x i64> %res
     11 }
     12 
     13 define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
     14 ; CHECK-LABEL: test_mask_mullo_epi64_rrk_512:
     15 ; CHECK:       ## BB#0:
     16 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
     17 ; CHECK-NEXT:    vpmullq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
     18 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
     19 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     20   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
     21   ret <8 x i64> %res
     22 }
     23 
     24 define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
     25 ; CHECK-LABEL: test_mask_mullo_epi64_rrkz_512:
     26 ; CHECK:       ## BB#0:
     27 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
     28 ; CHECK-NEXT:    vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
     29 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     30   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
     31   ret <8 x i64> %res
     32 }
     33 
     34 define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) {
     35 ; CHECK-LABEL: test_mask_mullo_epi64_rm_512:
     36 ; CHECK:       ## BB#0:
     37 ; CHECK-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07]
     38 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     39   %b = load <8 x i64>, <8 x i64>* %ptr_b
     40   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
     41   ret <8 x i64> %res
     42 }
     43 
     44 define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
     45 ; CHECK-LABEL: test_mask_mullo_epi64_rmk_512:
     46 ; CHECK:       ## BB#0:
     47 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
     48 ; CHECK-NEXT:    vpmullq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f]
     49 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
     50 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     51   %b = load <8 x i64>, <8 x i64>* %ptr_b
     52   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
     53   ret <8 x i64> %res
     54 }
     55 
     56 define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
     57 ; CHECK-LABEL: test_mask_mullo_epi64_rmkz_512:
     58 ; CHECK:       ## BB#0:
     59 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
     60 ; CHECK-NEXT:    vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07]
     61 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     62   %b = load <8 x i64>, <8 x i64>* %ptr_b
     63   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
     64   ret <8 x i64> %res
     65 }
     66 
     67 define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) {
     68 ; CHECK-LABEL: test_mask_mullo_epi64_rmb_512:
     69 ; CHECK:       ## BB#0:
     70 ; CHECK-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07]
     71 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     72   %q = load i64, i64* %ptr_b
     73   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
     74   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
     75   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
     76   ret <8 x i64> %res
     77 }
     78 
     79 define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
     80 ; CHECK-LABEL: test_mask_mullo_epi64_rmbk_512:
     81 ; CHECK:       ## BB#0:
     82 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
     83 ; CHECK-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f]
     84 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
     85 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     86   %q = load i64, i64* %ptr_b
     87   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
     88   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
     89   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
     90   ret <8 x i64> %res
     91 }
     92 
     93 define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
     94 ; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_512:
     95 ; CHECK:       ## BB#0:
     96 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
     97 ; CHECK-NEXT:    vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07]
     98 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     99   %q = load i64, i64* %ptr_b
    100   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
    101   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
    102   %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
    103   ret <8 x i64> %res
    104 }
    105 declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
    106 
    107 define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
    108 ; CHECK-LABEL: test_mask_mullo_epi64_rr_256:
    109 ; CHECK:       ## BB#0:
    110 ; CHECK-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
    111 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    112   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
    113   ret <4 x i64> %res
    114 }
    115 
    116 define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
    117 ; CHECK-LABEL: test_mask_mullo_epi64_rrk_256:
    118 ; CHECK:       ## BB#0:
    119 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    120 ; CHECK-NEXT:    vpmullq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
    121 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    122 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    123   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
    124   ret <4 x i64> %res
    125 }
    126 
    127 define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
    128 ; CHECK-LABEL: test_mask_mullo_epi64_rrkz_256:
    129 ; CHECK:       ## BB#0:
    130 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    131 ; CHECK-NEXT:    vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
    132 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    133   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
    134   ret <4 x i64> %res
    135 }
    136 
    137 define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
    138 ; CHECK-LABEL: test_mask_mullo_epi64_rm_256:
    139 ; CHECK:       ## BB#0:
    140 ; CHECK-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07]
    141 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    142   %b = load <4 x i64>, <4 x i64>* %ptr_b
    143   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
    144   ret <4 x i64> %res
    145 }
    146 
    147 define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
    148 ; CHECK-LABEL: test_mask_mullo_epi64_rmk_256:
    149 ; CHECK:       ## BB#0:
    150 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    151 ; CHECK-NEXT:    vpmullq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f]
    152 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    153 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    154   %b = load <4 x i64>, <4 x i64>* %ptr_b
    155   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
    156   ret <4 x i64> %res
    157 }
    158 
    159 define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
    160 ; CHECK-LABEL: test_mask_mullo_epi64_rmkz_256:
    161 ; CHECK:       ## BB#0:
    162 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    163 ; CHECK-NEXT:    vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07]
    164 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    165   %b = load <4 x i64>, <4 x i64>* %ptr_b
    166   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
    167   ret <4 x i64> %res
    168 }
    169 
    170 define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
    171 ; CHECK-LABEL: test_mask_mullo_epi64_rmb_256:
    172 ; CHECK:       ## BB#0:
    173 ; CHECK-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07]
    174 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    175   %q = load i64, i64* %ptr_b
    176   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
    177   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
    178   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
    179   ret <4 x i64> %res
    180 }
    181 
    182 define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
    183 ; CHECK-LABEL: test_mask_mullo_epi64_rmbk_256:
    184 ; CHECK:       ## BB#0:
    185 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    186 ; CHECK-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f]
    187 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    188 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    189   %q = load i64, i64* %ptr_b
    190   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
    191   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
    192   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
    193   ret <4 x i64> %res
    194 }
    195 
    196 define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
    197 ; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_256:
    198 ; CHECK:       ## BB#0:
    199 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    200 ; CHECK-NEXT:    vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07]
    201 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    202   %q = load i64, i64* %ptr_b
    203   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
    204   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
    205   %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
    206   ret <4 x i64> %res
    207 }
    208 
    209 declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
    210 
    211 define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
    212 ; CHECK-LABEL: test_mask_mullo_epi64_rr_128:
    213 ; CHECK:       ## BB#0:
    214 ; CHECK-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
    215 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    216   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
    217   ret <2 x i64> %res
    218 }
    219 
    220 define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
    221 ; CHECK-LABEL: test_mask_mullo_epi64_rrk_128:
    222 ; CHECK:       ## BB#0:
    223 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    224 ; CHECK-NEXT:    vpmullq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
    225 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    226 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    227   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
    228   ret <2 x i64> %res
    229 }
    230 
    231 define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
    232 ; CHECK-LABEL: test_mask_mullo_epi64_rrkz_128:
    233 ; CHECK:       ## BB#0:
    234 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    235 ; CHECK-NEXT:    vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
    236 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    237   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
    238   ret <2 x i64> %res
    239 }
    240 
    241 define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
    242 ; CHECK-LABEL: test_mask_mullo_epi64_rm_128:
    243 ; CHECK:       ## BB#0:
    244 ; CHECK-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07]
    245 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    246   %b = load <2 x i64>, <2 x i64>* %ptr_b
    247   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
    248   ret <2 x i64> %res
    249 }
    250 
    251 define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
    252 ; CHECK-LABEL: test_mask_mullo_epi64_rmk_128:
    253 ; CHECK:       ## BB#0:
    254 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    255 ; CHECK-NEXT:    vpmullq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f]
    256 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    257 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    258   %b = load <2 x i64>, <2 x i64>* %ptr_b
    259   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
    260   ret <2 x i64> %res
    261 }
    262 
    263 define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
    264 ; CHECK-LABEL: test_mask_mullo_epi64_rmkz_128:
    265 ; CHECK:       ## BB#0:
    266 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    267 ; CHECK-NEXT:    vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07]
    268 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    269   %b = load <2 x i64>, <2 x i64>* %ptr_b
    270   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
    271   ret <2 x i64> %res
    272 }
    273 
    274 define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
    275 ; CHECK-LABEL: test_mask_mullo_epi64_rmb_128:
    276 ; CHECK:       ## BB#0:
    277 ; CHECK-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07]
    278 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    279   %q = load i64, i64* %ptr_b
    280   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
    281   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
    282   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
    283   ret <2 x i64> %res
    284 }
    285 
    286 define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
    287 ; CHECK-LABEL: test_mask_mullo_epi64_rmbk_128:
    288 ; CHECK:       ## BB#0:
    289 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    290 ; CHECK-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f]
    291 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    292 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    293   %q = load i64, i64* %ptr_b
    294   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
    295   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
    296   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
    297   ret <2 x i64> %res
    298 }
    299 
    300 define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
    301 ; CHECK-LABEL: test_mask_mullo_epi64_rmbkz_128:
    302 ; CHECK:       ## BB#0:
    303 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    304 ; CHECK-NEXT:    vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07]
    305 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    306   %q = load i64, i64* %ptr_b
    307   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
    308   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
    309   %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
    310   ret <2 x i64> %res
    311 }
    312 
    313 declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
    314 
    315 define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) {
    316 ; CHECK-LABEL: test_mask_andnot_ps_rr_128:
    317 ; CHECK:       ## BB#0:
    318 ; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0xc1]
    319 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    320   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    321   ret <4 x float> %res
    322 }
    323 
    324 define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
    325 ; CHECK-LABEL: test_mask_andnot_ps_rrk_128:
    326 ; CHECK:       ## BB#0:
    327 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    328 ; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1]
    329 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    330 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    331   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    332   ret <4 x float> %res
    333 }
    334 
    335 define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
    336 ; CHECK-LABEL: test_mask_andnot_ps_rrkz_128:
    337 ; CHECK:       ## BB#0:
    338 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    339 ; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1]
    340 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    341   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    342   ret <4 x float> %res
    343 }
    344 
    345 define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
    346 ; CHECK-LABEL: test_mask_andnot_ps_rm_128:
    347 ; CHECK:       ## BB#0:
    348 ; CHECK-NEXT:    vandnps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0x07]
    349 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    350   %b = load <4 x float>, <4 x float>* %ptr_b
    351   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    352   ret <4 x float> %res
    353 }
    354 
    355 define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
    356 ; CHECK-LABEL: test_mask_andnot_ps_rmk_128:
    357 ; CHECK:       ## BB#0:
    358 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    359 ; CHECK-NEXT:    vandnps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f]
    360 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    361 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    362   %b = load <4 x float>, <4 x float>* %ptr_b
    363   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    364   ret <4 x float> %res
    365 }
    366 
    367 define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
    368 ; CHECK-LABEL: test_mask_andnot_ps_rmkz_128:
    369 ; CHECK:       ## BB#0:
    370 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    371 ; CHECK-NEXT:    vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07]
    372 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    373   %b = load <4 x float>, <4 x float>* %ptr_b
    374   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    375   ret <4 x float> %res
    376 }
    377 
    378 define <4 x float> @test_mask_andnot_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
    379 ; CHECK-LABEL: test_mask_andnot_ps_rmb_128:
    380 ; CHECK:       ## BB#0:
    381 ; CHECK-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x55,0x07]
    382 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    383   %q = load float, float* %ptr_b
    384   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    385   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    386   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    387   ret <4 x float> %res
    388 }
    389 
    390 define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
    391 ; CHECK-LABEL: test_mask_andnot_ps_rmbk_128:
    392 ; CHECK:       ## BB#0:
    393 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    394 ; CHECK-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f]
    395 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    396 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    397   %q = load float, float* %ptr_b
    398   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    399   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    400   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    401   ret <4 x float> %res
    402 }
    403 
    404 define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
    405 ; CHECK-LABEL: test_mask_andnot_ps_rmbkz_128:
    406 ; CHECK:       ## BB#0:
    407 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    408 ; CHECK-NEXT:    vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07]
    409 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    410   %q = load float, float* %ptr_b
    411   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    412   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    413   %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    414   ret <4 x float> %res
    415 }
    416 
    417 declare <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
    418 
    419 define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) {
    420 ; CHECK-LABEL: test_mask_andnot_ps_rr_256:
    421 ; CHECK:       ## BB#0:
    422 ; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0xc1]
    423 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    424   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    425   ret <8 x float> %res
    426 }
    427 
    428 define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
    429 ; CHECK-LABEL: test_mask_andnot_ps_rrk_256:
    430 ; CHECK:       ## BB#0:
    431 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    432 ; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1]
    433 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    434 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    435   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    436   ret <8 x float> %res
    437 }
    438 
    439 define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
    440 ; CHECK-LABEL: test_mask_andnot_ps_rrkz_256:
    441 ; CHECK:       ## BB#0:
    442 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    443 ; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1]
    444 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    445   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    446   ret <8 x float> %res
    447 }
    448 
    449 define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
    450 ; CHECK-LABEL: test_mask_andnot_ps_rm_256:
    451 ; CHECK:       ## BB#0:
    452 ; CHECK-NEXT:    vandnps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0x07]
    453 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    454   %b = load <8 x float>, <8 x float>* %ptr_b
    455   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    456   ret <8 x float> %res
    457 }
    458 
    459 define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
    460 ; CHECK-LABEL: test_mask_andnot_ps_rmk_256:
    461 ; CHECK:       ## BB#0:
    462 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    463 ; CHECK-NEXT:    vandnps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f]
    464 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    465 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    466   %b = load <8 x float>, <8 x float>* %ptr_b
    467   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    468   ret <8 x float> %res
    469 }
    470 
    471 define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
    472 ; CHECK-LABEL: test_mask_andnot_ps_rmkz_256:
    473 ; CHECK:       ## BB#0:
    474 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    475 ; CHECK-NEXT:    vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07]
    476 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    477   %b = load <8 x float>, <8 x float>* %ptr_b
    478   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    479   ret <8 x float> %res
    480 }
    481 
    482 define <8 x float> @test_mask_andnot_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
    483 ; CHECK-LABEL: test_mask_andnot_ps_rmb_256:
    484 ; CHECK:       ## BB#0:
    485 ; CHECK-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x55,0x07]
    486 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    487   %q = load float, float* %ptr_b
    488   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    489   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    490   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    491   ret <8 x float> %res
    492 }
    493 
    494 define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
    495 ; CHECK-LABEL: test_mask_andnot_ps_rmbk_256:
    496 ; CHECK:       ## BB#0:
    497 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    498 ; CHECK-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f]
    499 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    500 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    501   %q = load float, float* %ptr_b
    502   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    503   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    504   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    505   ret <8 x float> %res
    506 }
    507 
    508 define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
    509 ; CHECK-LABEL: test_mask_andnot_ps_rmbkz_256:
    510 ; CHECK:       ## BB#0:
    511 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    512 ; CHECK-NEXT:    vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07]
    513 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    514   %q = load float, float* %ptr_b
    515   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    516   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    517   %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    518   ret <8 x float> %res
    519 }
    520 
    521 declare <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
    522 
    523 define <16 x float> @test_mask_andnot_ps_rr_512(<16 x float> %a, <16 x float> %b) {
    524 ; CHECK-LABEL: test_mask_andnot_ps_rr_512:
    525 ; CHECK:       ## BB#0:
    526 ; CHECK-NEXT:    vandnps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x55,0xc1]
    527 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    528   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    529   ret <16 x float> %res
    530 }
    531 
    532 define <16 x float> @test_mask_andnot_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
    533 ; CHECK-LABEL: test_mask_andnot_ps_rrk_512:
    534 ; CHECK:       ## BB#0:
    535 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    536 ; CHECK-NEXT:    vandnps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1]
    537 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    538 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    539   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    540   ret <16 x float> %res
    541 }
    542 
    543 define <16 x float> @test_mask_andnot_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
    544 ; CHECK-LABEL: test_mask_andnot_ps_rrkz_512:
    545 ; CHECK:       ## BB#0:
    546 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    547 ; CHECK-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1]
    548 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    549   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    550   ret <16 x float> %res
    551 }
    552 
    553 define <16 x float> @test_mask_andnot_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
    554 ; CHECK-LABEL: test_mask_andnot_ps_rm_512:
    555 ; CHECK:       ## BB#0:
    556 ; CHECK-NEXT:    vandnps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x55,0x07]
    557 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    558   %b = load <16 x float>, <16 x float>* %ptr_b
    559   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    560   ret <16 x float> %res
    561 }
    562 
    563 define <16 x float> @test_mask_andnot_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
    564 ; CHECK-LABEL: test_mask_andnot_ps_rmk_512:
    565 ; CHECK:       ## BB#0:
    566 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    567 ; CHECK-NEXT:    vandnps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x55,0x0f]
    568 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    569 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    570   %b = load <16 x float>, <16 x float>* %ptr_b
    571   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    572   ret <16 x float> %res
    573 }
    574 
    575 define <16 x float> @test_mask_andnot_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
    576 ; CHECK-LABEL: test_mask_andnot_ps_rmkz_512:
    577 ; CHECK:       ## BB#0:
    578 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    579 ; CHECK-NEXT:    vandnps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x07]
    580 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    581   %b = load <16 x float>, <16 x float>* %ptr_b
    582   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    583   ret <16 x float> %res
    584 }
    585 
    586 define <16 x float> @test_mask_andnot_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
    587 ; CHECK-LABEL: test_mask_andnot_ps_rmb_512:
    588 ; CHECK:       ## BB#0:
    589 ; CHECK-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x55,0x07]
    590 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    591   %q = load float, float* %ptr_b
    592   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    593   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    594   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    595   ret <16 x float> %res
    596 }
    597 
    598 define <16 x float> @test_mask_andnot_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
    599 ; CHECK-LABEL: test_mask_andnot_ps_rmbk_512:
    600 ; CHECK:       ## BB#0:
    601 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    602 ; CHECK-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x55,0x0f]
    603 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    604 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    605   %q = load float, float* %ptr_b
    606   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    607   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    608   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    609   ret <16 x float> %res
    610 }
    611 
    612 define <16 x float> @test_mask_andnot_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
    613 ; CHECK-LABEL: test_mask_andnot_ps_rmbkz_512:
    614 ; CHECK:       ## BB#0:
    615 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    616 ; CHECK-NEXT:    vandnps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x07]
    617 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    618   %q = load float, float* %ptr_b
    619   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    620   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    621   %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    622   ret <16 x float> %res
    623 }
    624 
    625 declare <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
    626 
    627 define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) {
    628 ; CHECK-LABEL: test_mask_and_ps_rr_128:
    629 ; CHECK:       ## BB#0:
    630 ; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0xc1]
    631 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    632   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    633   ret <4 x float> %res
    634 }
    635 
    636 define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
    637 ; CHECK-LABEL: test_mask_and_ps_rrk_128:
    638 ; CHECK:       ## BB#0:
    639 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    640 ; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1]
    641 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    642 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    643   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    644   ret <4 x float> %res
    645 }
    646 
    647 define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
    648 ; CHECK-LABEL: test_mask_and_ps_rrkz_128:
    649 ; CHECK:       ## BB#0:
    650 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    651 ; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1]
    652 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    653   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    654   ret <4 x float> %res
    655 }
    656 
    657 define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
    658 ; CHECK-LABEL: test_mask_and_ps_rm_128:
    659 ; CHECK:       ## BB#0:
    660 ; CHECK-NEXT:    vandps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0x07]
    661 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    662   %b = load <4 x float>, <4 x float>* %ptr_b
    663   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    664   ret <4 x float> %res
    665 }
    666 
    667 define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
    668 ; CHECK-LABEL: test_mask_and_ps_rmk_128:
    669 ; CHECK:       ## BB#0:
    670 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    671 ; CHECK-NEXT:    vandps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f]
    672 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    673 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    674   %b = load <4 x float>, <4 x float>* %ptr_b
    675   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    676   ret <4 x float> %res
    677 }
    678 
    679 define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
    680 ; CHECK-LABEL: test_mask_and_ps_rmkz_128:
    681 ; CHECK:       ## BB#0:
    682 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    683 ; CHECK-NEXT:    vandps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07]
    684 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    685   %b = load <4 x float>, <4 x float>* %ptr_b
    686   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    687   ret <4 x float> %res
    688 }
    689 
    690 define <4 x float> @test_mask_and_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
    691 ; CHECK-LABEL: test_mask_and_ps_rmb_128:
    692 ; CHECK:       ## BB#0:
    693 ; CHECK-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x54,0x07]
    694 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    695   %q = load float, float* %ptr_b
    696   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    697   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    698   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    699   ret <4 x float> %res
    700 }
    701 
    702 define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
    703 ; CHECK-LABEL: test_mask_and_ps_rmbk_128:
    704 ; CHECK:       ## BB#0:
    705 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    706 ; CHECK-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f]
    707 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    708 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    709   %q = load float, float* %ptr_b
    710   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    711   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    712   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    713   ret <4 x float> %res
    714 }
    715 
    716 define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
    717 ; CHECK-LABEL: test_mask_and_ps_rmbkz_128:
    718 ; CHECK:       ## BB#0:
    719 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    720 ; CHECK-NEXT:    vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07]
    721 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    722   %q = load float, float* %ptr_b
    723   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
    724   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
    725   %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    726   ret <4 x float> %res
    727 }
    728 
    729 declare <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
    730 
    731 define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) {
    732 ; CHECK-LABEL: test_mask_and_ps_rr_256:
    733 ; CHECK:       ## BB#0:
    734 ; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0xc1]
    735 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    736   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    737   ret <8 x float> %res
    738 }
    739 
    740 define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
    741 ; CHECK-LABEL: test_mask_and_ps_rrk_256:
    742 ; CHECK:       ## BB#0:
    743 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    744 ; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1]
    745 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    746 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    747   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    748   ret <8 x float> %res
    749 }
    750 
    751 define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
    752 ; CHECK-LABEL: test_mask_and_ps_rrkz_256:
    753 ; CHECK:       ## BB#0:
    754 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    755 ; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1]
    756 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    757   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    758   ret <8 x float> %res
    759 }
    760 
    761 define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
    762 ; CHECK-LABEL: test_mask_and_ps_rm_256:
    763 ; CHECK:       ## BB#0:
    764 ; CHECK-NEXT:    vandps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0x07]
    765 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    766   %b = load <8 x float>, <8 x float>* %ptr_b
    767   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    768   ret <8 x float> %res
    769 }
    770 
    771 define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
    772 ; CHECK-LABEL: test_mask_and_ps_rmk_256:
    773 ; CHECK:       ## BB#0:
    774 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    775 ; CHECK-NEXT:    vandps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f]
    776 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    777 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    778   %b = load <8 x float>, <8 x float>* %ptr_b
    779   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    780   ret <8 x float> %res
    781 }
    782 
    783 define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
    784 ; CHECK-LABEL: test_mask_and_ps_rmkz_256:
    785 ; CHECK:       ## BB#0:
    786 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    787 ; CHECK-NEXT:    vandps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07]
    788 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    789   %b = load <8 x float>, <8 x float>* %ptr_b
    790   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    791   ret <8 x float> %res
    792 }
    793 
    794 define <8 x float> @test_mask_and_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
    795 ; CHECK-LABEL: test_mask_and_ps_rmb_256:
    796 ; CHECK:       ## BB#0:
    797 ; CHECK-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x54,0x07]
    798 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    799   %q = load float, float* %ptr_b
    800   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    801   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    802   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
    803   ret <8 x float> %res
    804 }
    805 
    806 define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
    807 ; CHECK-LABEL: test_mask_and_ps_rmbk_256:
    808 ; CHECK:       ## BB#0:
    809 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    810 ; CHECK-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f]
    811 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    812 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    813   %q = load float, float* %ptr_b
    814   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    815   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    816   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
    817   ret <8 x float> %res
    818 }
    819 
    820 define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
    821 ; CHECK-LABEL: test_mask_and_ps_rmbkz_256:
    822 ; CHECK:       ## BB#0:
    823 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    824 ; CHECK-NEXT:    vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07]
    825 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    826   %q = load float, float* %ptr_b
    827   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
    828   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
    829   %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
    830   ret <8 x float> %res
    831 }
    832 
    833 declare <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
    834 
    835 define <16 x float> @test_mask_and_ps_rr_512(<16 x float> %a, <16 x float> %b) {
    836 ; CHECK-LABEL: test_mask_and_ps_rr_512:
    837 ; CHECK:       ## BB#0:
    838 ; CHECK-NEXT:    vandps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x54,0xc1]
    839 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    840   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    841   ret <16 x float> %res
    842 }
    843 
    844 define <16 x float> @test_mask_and_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
    845 ; CHECK-LABEL: test_mask_and_ps_rrk_512:
    846 ; CHECK:       ## BB#0:
    847 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    848 ; CHECK-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1]
    849 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    850 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    851   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    852   ret <16 x float> %res
    853 }
    854 
    855 define <16 x float> @test_mask_and_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
    856 ; CHECK-LABEL: test_mask_and_ps_rrkz_512:
    857 ; CHECK:       ## BB#0:
    858 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    859 ; CHECK-NEXT:    vandps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1]
    860 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    861   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    862   ret <16 x float> %res
    863 }
    864 
    865 define <16 x float> @test_mask_and_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
    866 ; CHECK-LABEL: test_mask_and_ps_rm_512:
    867 ; CHECK:       ## BB#0:
    868 ; CHECK-NEXT:    vandps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x54,0x07]
    869 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    870   %b = load <16 x float>, <16 x float>* %ptr_b
    871   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    872   ret <16 x float> %res
    873 }
    874 
    875 define <16 x float> @test_mask_and_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
    876 ; CHECK-LABEL: test_mask_and_ps_rmk_512:
    877 ; CHECK:       ## BB#0:
    878 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    879 ; CHECK-NEXT:    vandps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x54,0x0f]
    880 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    881 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    882   %b = load <16 x float>, <16 x float>* %ptr_b
    883   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    884   ret <16 x float> %res
    885 }
    886 
    887 define <16 x float> @test_mask_and_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
    888 ; CHECK-LABEL: test_mask_and_ps_rmkz_512:
    889 ; CHECK:       ## BB#0:
    890 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    891 ; CHECK-NEXT:    vandps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x07]
    892 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    893   %b = load <16 x float>, <16 x float>* %ptr_b
    894   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    895   ret <16 x float> %res
    896 }
    897 
    898 define <16 x float> @test_mask_and_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
    899 ; CHECK-LABEL: test_mask_and_ps_rmb_512:
    900 ; CHECK:       ## BB#0:
    901 ; CHECK-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x54,0x07]
    902 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    903   %q = load float, float* %ptr_b
    904   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    905   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    906   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
    907   ret <16 x float> %res
    908 }
    909 
    910 define <16 x float> @test_mask_and_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
    911 ; CHECK-LABEL: test_mask_and_ps_rmbk_512:
    912 ; CHECK:       ## BB#0:
    913 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    914 ; CHECK-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x54,0x0f]
    915 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    916 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    917   %q = load float, float* %ptr_b
    918   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    919   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    920   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
    921   ret <16 x float> %res
    922 }
    923 
    924 define <16 x float> @test_mask_and_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
    925 ; CHECK-LABEL: test_mask_and_ps_rmbkz_512:
    926 ; CHECK:       ## BB#0:
    927 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    928 ; CHECK-NEXT:    vandps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x07]
    929 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    930   %q = load float, float* %ptr_b
    931   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
    932   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
    933   %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
    934   ret <16 x float> %res
    935 }
    936 
    937 declare <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
    938 
    939 define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) {
    940 ; CHECK-LABEL: test_mask_or_ps_rr_128:
    941 ; CHECK:       ## BB#0:
    942 ; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0xc1]
    943 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    944   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    945   ret <4 x float> %res
    946 }
    947 
    948 define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
    949 ; CHECK-LABEL: test_mask_or_ps_rrk_128:
    950 ; CHECK:       ## BB#0:
    951 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    952 ; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1]
    953 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
    954 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    955   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    956   ret <4 x float> %res
    957 }
    958 
    959 define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
    960 ; CHECK-LABEL: test_mask_or_ps_rrkz_128:
    961 ; CHECK:       ## BB#0:
    962 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
    963 ; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1]
    964 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    965   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    966   ret <4 x float> %res
    967 }
    968 
    969 define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
    970 ; CHECK-LABEL: test_mask_or_ps_rm_128:
    971 ; CHECK:       ## BB#0:
    972 ; CHECK-NEXT:    vorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0x07]
    973 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    974   %b = load <4 x float>, <4 x float>* %ptr_b
    975   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
    976   ret <4 x float> %res
    977 }
    978 
    979 define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
    980 ; CHECK-LABEL: test_mask_or_ps_rmk_128:
    981 ; CHECK:       ## BB#0:
    982 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    983 ; CHECK-NEXT:    vorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f]
    984 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
    985 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    986   %b = load <4 x float>, <4 x float>* %ptr_b
    987   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
    988   ret <4 x float> %res
    989 }
    990 
    991 define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
    992 ; CHECK-LABEL: test_mask_or_ps_rmkz_128:
    993 ; CHECK:       ## BB#0:
    994 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
    995 ; CHECK-NEXT:    vorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07]
    996 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    997   %b = load <4 x float>, <4 x float>* %ptr_b
    998   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
    999   ret <4 x float> %res
   1000 }
   1001 
   1002 define <4 x float> @test_mask_or_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
   1003 ; CHECK-LABEL: test_mask_or_ps_rmb_128:
   1004 ; CHECK:       ## BB#0:
   1005 ; CHECK-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x56,0x07]
   1006 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1007   %q = load float, float* %ptr_b
   1008   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1009   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1010   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
   1011   ret <4 x float> %res
   1012 }
   1013 
   1014 define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
   1015 ; CHECK-LABEL: test_mask_or_ps_rmbk_128:
   1016 ; CHECK:       ## BB#0:
   1017 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1018 ; CHECK-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f]
   1019 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1020 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1021   %q = load float, float* %ptr_b
   1022   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1023   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1024   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
   1025   ret <4 x float> %res
   1026 }
   1027 
   1028 define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
   1029 ; CHECK-LABEL: test_mask_or_ps_rmbkz_128:
   1030 ; CHECK:       ## BB#0:
   1031 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1032 ; CHECK-NEXT:    vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07]
   1033 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1034   %q = load float, float* %ptr_b
   1035   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1036   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1037   %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
   1038   ret <4 x float> %res
   1039 }
   1040 
   1041 declare <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1042 
   1043 define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) {
   1044 ; CHECK-LABEL: test_mask_or_ps_rr_256:
   1045 ; CHECK:       ## BB#0:
   1046 ; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0xc1]
   1047 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1048   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1049   ret <8 x float> %res
   1050 }
   1051 
   1052 define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
   1053 ; CHECK-LABEL: test_mask_or_ps_rrk_256:
   1054 ; CHECK:       ## BB#0:
   1055 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1056 ; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1]
   1057 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1058 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1059   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1060   ret <8 x float> %res
   1061 }
   1062 
   1063 define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
   1064 ; CHECK-LABEL: test_mask_or_ps_rrkz_256:
   1065 ; CHECK:       ## BB#0:
   1066 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1067 ; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1]
   1068 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1069   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1070   ret <8 x float> %res
   1071 }
   1072 
   1073 define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
   1074 ; CHECK-LABEL: test_mask_or_ps_rm_256:
   1075 ; CHECK:       ## BB#0:
   1076 ; CHECK-NEXT:    vorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0x07]
   1077 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1078   %b = load <8 x float>, <8 x float>* %ptr_b
   1079   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1080   ret <8 x float> %res
   1081 }
   1082 
   1083 define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
   1084 ; CHECK-LABEL: test_mask_or_ps_rmk_256:
   1085 ; CHECK:       ## BB#0:
   1086 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1087 ; CHECK-NEXT:    vorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f]
   1088 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1089 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1090   %b = load <8 x float>, <8 x float>* %ptr_b
   1091   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1092   ret <8 x float> %res
   1093 }
   1094 
   1095 define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
   1096 ; CHECK-LABEL: test_mask_or_ps_rmkz_256:
   1097 ; CHECK:       ## BB#0:
   1098 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1099 ; CHECK-NEXT:    vorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07]
   1100 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1101   %b = load <8 x float>, <8 x float>* %ptr_b
   1102   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1103   ret <8 x float> %res
   1104 }
   1105 
   1106 define <8 x float> @test_mask_or_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
   1107 ; CHECK-LABEL: test_mask_or_ps_rmb_256:
   1108 ; CHECK:       ## BB#0:
   1109 ; CHECK-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x56,0x07]
   1110 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1111   %q = load float, float* %ptr_b
   1112   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1113   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1114   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1115   ret <8 x float> %res
   1116 }
   1117 
   1118 define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
   1119 ; CHECK-LABEL: test_mask_or_ps_rmbk_256:
   1120 ; CHECK:       ## BB#0:
   1121 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1122 ; CHECK-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f]
   1123 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1124 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1125   %q = load float, float* %ptr_b
   1126   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1127   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1128   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1129   ret <8 x float> %res
   1130 }
   1131 
   1132 define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
   1133 ; CHECK-LABEL: test_mask_or_ps_rmbkz_256:
   1134 ; CHECK:       ## BB#0:
   1135 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1136 ; CHECK-NEXT:    vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07]
   1137 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1138   %q = load float, float* %ptr_b
   1139   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1140   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1141   %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1142   ret <8 x float> %res
   1143 }
   1144 
   1145 declare <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1146 
   1147 define <16 x float> @test_mask_or_ps_rr_512(<16 x float> %a, <16 x float> %b) {
   1148 ; CHECK-LABEL: test_mask_or_ps_rr_512:
   1149 ; CHECK:       ## BB#0:
   1150 ; CHECK-NEXT:    vorps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x56,0xc1]
   1151 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1152   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1153   ret <16 x float> %res
   1154 }
   1155 
   1156 define <16 x float> @test_mask_or_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
   1157 ; CHECK-LABEL: test_mask_or_ps_rrk_512:
   1158 ; CHECK:       ## BB#0:
   1159 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1160 ; CHECK-NEXT:    vorps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1]
   1161 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1162 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1163   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1164   ret <16 x float> %res
   1165 }
   1166 
   1167 define <16 x float> @test_mask_or_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
   1168 ; CHECK-LABEL: test_mask_or_ps_rrkz_512:
   1169 ; CHECK:       ## BB#0:
   1170 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1171 ; CHECK-NEXT:    vorps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1]
   1172 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1173   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1174   ret <16 x float> %res
   1175 }
   1176 
   1177 define <16 x float> @test_mask_or_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
   1178 ; CHECK-LABEL: test_mask_or_ps_rm_512:
   1179 ; CHECK:       ## BB#0:
   1180 ; CHECK-NEXT:    vorps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x56,0x07]
   1181 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1182   %b = load <16 x float>, <16 x float>* %ptr_b
   1183   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1184   ret <16 x float> %res
   1185 }
   1186 
   1187 define <16 x float> @test_mask_or_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
   1188 ; CHECK-LABEL: test_mask_or_ps_rmk_512:
   1189 ; CHECK:       ## BB#0:
   1190 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1191 ; CHECK-NEXT:    vorps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x56,0x0f]
   1192 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1193 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1194   %b = load <16 x float>, <16 x float>* %ptr_b
   1195   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1196   ret <16 x float> %res
   1197 }
   1198 
   1199 define <16 x float> @test_mask_or_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
   1200 ; CHECK-LABEL: test_mask_or_ps_rmkz_512:
   1201 ; CHECK:       ## BB#0:
   1202 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1203 ; CHECK-NEXT:    vorps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x07]
   1204 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1205   %b = load <16 x float>, <16 x float>* %ptr_b
   1206   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1207   ret <16 x float> %res
   1208 }
   1209 
   1210 define <16 x float> @test_mask_or_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
   1211 ; CHECK-LABEL: test_mask_or_ps_rmb_512:
   1212 ; CHECK:       ## BB#0:
   1213 ; CHECK-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x56,0x07]
   1214 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1215   %q = load float, float* %ptr_b
   1216   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1217   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1218   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1219   ret <16 x float> %res
   1220 }
   1221 
   1222 define <16 x float> @test_mask_or_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
   1223 ; CHECK-LABEL: test_mask_or_ps_rmbk_512:
   1224 ; CHECK:       ## BB#0:
   1225 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1226 ; CHECK-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x56,0x0f]
   1227 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1228 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1229   %q = load float, float* %ptr_b
   1230   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1231   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1232   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1233   ret <16 x float> %res
   1234 }
   1235 
   1236 define <16 x float> @test_mask_or_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
   1237 ; CHECK-LABEL: test_mask_or_ps_rmbkz_512:
   1238 ; CHECK:       ## BB#0:
   1239 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1240 ; CHECK-NEXT:    vorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x07]
   1241 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1242   %q = load float, float* %ptr_b
   1243   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1244   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1245   %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1246   ret <16 x float> %res
   1247 }
   1248 
   1249 declare <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
   1250 
   1251 define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) {
   1252 ; CHECK-LABEL: test_mask_xor_ps_rr_128:
   1253 ; CHECK:       ## BB#0:
   1254 ; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0xc1]
   1255 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1256   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
   1257   ret <4 x float> %res
   1258 }
   1259 
   1260 define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
   1261 ; CHECK-LABEL: test_mask_xor_ps_rrk_128:
   1262 ; CHECK:       ## BB#0:
   1263 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1264 ; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1]
   1265 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1266 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1267   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
   1268   ret <4 x float> %res
   1269 }
   1270 
   1271 define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
   1272 ; CHECK-LABEL: test_mask_xor_ps_rrkz_128:
   1273 ; CHECK:       ## BB#0:
   1274 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1275 ; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1]
   1276 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1277   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
   1278   ret <4 x float> %res
   1279 }
   1280 
   1281 define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
   1282 ; CHECK-LABEL: test_mask_xor_ps_rm_128:
   1283 ; CHECK:       ## BB#0:
   1284 ; CHECK-NEXT:    vxorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0x07]
   1285 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1286   %b = load <4 x float>, <4 x float>* %ptr_b
   1287   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
   1288   ret <4 x float> %res
   1289 }
   1290 
   1291 define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
   1292 ; CHECK-LABEL: test_mask_xor_ps_rmk_128:
   1293 ; CHECK:       ## BB#0:
   1294 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1295 ; CHECK-NEXT:    vxorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f]
   1296 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1297 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1298   %b = load <4 x float>, <4 x float>* %ptr_b
   1299   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
   1300   ret <4 x float> %res
   1301 }
   1302 
   1303 define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
   1304 ; CHECK-LABEL: test_mask_xor_ps_rmkz_128:
   1305 ; CHECK:       ## BB#0:
   1306 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1307 ; CHECK-NEXT:    vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07]
   1308 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1309   %b = load <4 x float>, <4 x float>* %ptr_b
   1310   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
   1311   ret <4 x float> %res
   1312 }
   1313 
   1314 define <4 x float> @test_mask_xor_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
   1315 ; CHECK-LABEL: test_mask_xor_ps_rmb_128:
   1316 ; CHECK:       ## BB#0:
   1317 ; CHECK-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x57,0x07]
   1318 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1319   %q = load float, float* %ptr_b
   1320   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1321   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1322   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
   1323   ret <4 x float> %res
   1324 }
   1325 
   1326 define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
   1327 ; CHECK-LABEL: test_mask_xor_ps_rmbk_128:
   1328 ; CHECK:       ## BB#0:
   1329 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1330 ; CHECK-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f]
   1331 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1332 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1333   %q = load float, float* %ptr_b
   1334   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1335   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1336   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
   1337   ret <4 x float> %res
   1338 }
   1339 
   1340 define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
   1341 ; CHECK-LABEL: test_mask_xor_ps_rmbkz_128:
   1342 ; CHECK:       ## BB#0:
   1343 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1344 ; CHECK-NEXT:    vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07]
   1345 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1346   %q = load float, float* %ptr_b
   1347   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1348   %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
   1349   %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
   1350   ret <4 x float> %res
   1351 }
   1352 
   1353 declare <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1354 
   1355 define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) {
   1356 ; CHECK-LABEL: test_mask_xor_ps_rr_256:
   1357 ; CHECK:       ## BB#0:
   1358 ; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0xc1]
   1359 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1360   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1361   ret <8 x float> %res
   1362 }
   1363 
   1364 define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
   1365 ; CHECK-LABEL: test_mask_xor_ps_rrk_256:
   1366 ; CHECK:       ## BB#0:
   1367 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1368 ; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1]
   1369 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1370 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1371   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1372   ret <8 x float> %res
   1373 }
   1374 
   1375 define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
   1376 ; CHECK-LABEL: test_mask_xor_ps_rrkz_256:
   1377 ; CHECK:       ## BB#0:
   1378 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1379 ; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1]
   1380 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1381   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1382   ret <8 x float> %res
   1383 }
   1384 
   1385 define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
   1386 ; CHECK-LABEL: test_mask_xor_ps_rm_256:
   1387 ; CHECK:       ## BB#0:
   1388 ; CHECK-NEXT:    vxorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0x07]
   1389 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1390   %b = load <8 x float>, <8 x float>* %ptr_b
   1391   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1392   ret <8 x float> %res
   1393 }
   1394 
   1395 define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
   1396 ; CHECK-LABEL: test_mask_xor_ps_rmk_256:
   1397 ; CHECK:       ## BB#0:
   1398 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1399 ; CHECK-NEXT:    vxorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f]
   1400 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1401 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1402   %b = load <8 x float>, <8 x float>* %ptr_b
   1403   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1404   ret <8 x float> %res
   1405 }
   1406 
   1407 define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
   1408 ; CHECK-LABEL: test_mask_xor_ps_rmkz_256:
   1409 ; CHECK:       ## BB#0:
   1410 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1411 ; CHECK-NEXT:    vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07]
   1412 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1413   %b = load <8 x float>, <8 x float>* %ptr_b
   1414   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1415   ret <8 x float> %res
   1416 }
   1417 
   1418 define <8 x float> @test_mask_xor_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
   1419 ; CHECK-LABEL: test_mask_xor_ps_rmb_256:
   1420 ; CHECK:       ## BB#0:
   1421 ; CHECK-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x57,0x07]
   1422 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1423   %q = load float, float* %ptr_b
   1424   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1425   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1426   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
   1427   ret <8 x float> %res
   1428 }
   1429 
   1430 define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
   1431 ; CHECK-LABEL: test_mask_xor_ps_rmbk_256:
   1432 ; CHECK:       ## BB#0:
   1433 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1434 ; CHECK-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f]
   1435 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1436 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1437   %q = load float, float* %ptr_b
   1438   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1439   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1440   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
   1441   ret <8 x float> %res
   1442 }
   1443 
   1444 define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
   1445 ; CHECK-LABEL: test_mask_xor_ps_rmbkz_256:
   1446 ; CHECK:       ## BB#0:
   1447 ; CHECK-NEXT:    kmovb %esi, %k1 ## encoding: [0xc5,0xf9,0x92,0xce]
   1448 ; CHECK-NEXT:    vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07]
   1449 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1450   %q = load float, float* %ptr_b
   1451   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   1452   %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
   1453   %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
   1454   ret <8 x float> %res
   1455 }
   1456 
   1457 declare <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1458 
   1459 define <16 x float> @test_mask_xor_ps_rr_512(<16 x float> %a, <16 x float> %b) {
   1460 ; CHECK-LABEL: test_mask_xor_ps_rr_512:
   1461 ; CHECK:       ## BB#0:
   1462 ; CHECK-NEXT:    vxorps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x57,0xc1]
   1463 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1464   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1465   ret <16 x float> %res
   1466 }
   1467 
   1468 define <16 x float> @test_mask_xor_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
   1469 ; CHECK-LABEL: test_mask_xor_ps_rrk_512:
   1470 ; CHECK:       ## BB#0:
   1471 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1472 ; CHECK-NEXT:    vxorps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1]
   1473 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1474 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1475   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1476   ret <16 x float> %res
   1477 }
   1478 
   1479 define <16 x float> @test_mask_xor_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
   1480 ; CHECK-LABEL: test_mask_xor_ps_rrkz_512:
   1481 ; CHECK:       ## BB#0:
   1482 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1483 ; CHECK-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1]
   1484 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1485   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1486   ret <16 x float> %res
   1487 }
   1488 
   1489 define <16 x float> @test_mask_xor_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
   1490 ; CHECK-LABEL: test_mask_xor_ps_rm_512:
   1491 ; CHECK:       ## BB#0:
   1492 ; CHECK-NEXT:    vxorps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x57,0x07]
   1493 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1494   %b = load <16 x float>, <16 x float>* %ptr_b
   1495   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1496   ret <16 x float> %res
   1497 }
   1498 
   1499 define <16 x float> @test_mask_xor_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
   1500 ; CHECK-LABEL: test_mask_xor_ps_rmk_512:
   1501 ; CHECK:       ## BB#0:
   1502 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1503 ; CHECK-NEXT:    vxorps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x57,0x0f]
   1504 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1505 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1506   %b = load <16 x float>, <16 x float>* %ptr_b
   1507   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1508   ret <16 x float> %res
   1509 }
   1510 
   1511 define <16 x float> @test_mask_xor_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
   1512 ; CHECK-LABEL: test_mask_xor_ps_rmkz_512:
   1513 ; CHECK:       ## BB#0:
   1514 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1515 ; CHECK-NEXT:    vxorps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x07]
   1516 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1517   %b = load <16 x float>, <16 x float>* %ptr_b
   1518   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1519   ret <16 x float> %res
   1520 }
   1521 
   1522 define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
   1523 ; CHECK-LABEL: test_mask_xor_ps_rmb_512:
   1524 ; CHECK:       ## BB#0:
   1525 ; CHECK-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x57,0x07]
   1526 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1527   %q = load float, float* %ptr_b
   1528   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1529   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1530   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
   1531   ret <16 x float> %res
   1532 }
   1533 
   1534 define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
   1535 ; CHECK-LABEL: test_mask_xor_ps_rmbk_512:
   1536 ; CHECK:       ## BB#0:
   1537 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1538 ; CHECK-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f]
   1539 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1540 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1541   %q = load float, float* %ptr_b
   1542   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1543   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1544   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
   1545   ret <16 x float> %res
   1546 }
   1547 
   1548 define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
   1549 ; CHECK-LABEL: test_mask_xor_ps_rmbkz_512:
   1550 ; CHECK:       ## BB#0:
   1551 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1552 ; CHECK-NEXT:    vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07]
   1553 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1554   %q = load float, float* %ptr_b
   1555   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   1556   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
   1557   %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
   1558   ret <16 x float> %res
   1559 }
   1560 
   1561 declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
   1562 
   1563 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
   1564 
   1565 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
   1566 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
   1567 ; CHECK:       ## BB#0:
   1568 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1569 ; CHECK-NEXT:    vcvtpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
   1570 ; CHECK-NEXT:    vcvtpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
   1571 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
   1572 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1573   %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
   1574   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
   1575   %res2 = add <2 x i64> %res, %res1
   1576   ret <2 x i64> %res2
   1577 }
   1578 
   1579 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8)
   1580 
   1581 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
   1582 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
   1583 ; CHECK:       ## BB#0:
   1584 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1585 ; CHECK-NEXT:    vcvtpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
   1586 ; CHECK-NEXT:    vcvtpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
   1587 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
   1588 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1589   %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
   1590   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
   1591   %res2 = add <4 x i64> %res, %res1
   1592   ret <4 x i64> %res2
   1593 }
   1594 
   1595 declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8)
   1596 
   1597 define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
   1598 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
   1599 ; CHECK:       ## BB#0:
   1600 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1601 ; CHECK-NEXT:    vcvtpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
   1602 ; CHECK-NEXT:    vcvtpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
   1603 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
   1604 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1605   %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
   1606   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
   1607   %res2 = add <2 x i64> %res, %res1
   1608   ret <2 x i64> %res2
   1609 }
   1610 
   1611 declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8)
   1612 
   1613 define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
   1614 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
   1615 ; CHECK:       ## BB#0:
   1616 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1617 ; CHECK-NEXT:    vcvtpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
   1618 ; CHECK-NEXT:    vcvtpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
   1619 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
   1620 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1621   %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
   1622   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
   1623   %res2 = add <4 x i64> %res, %res1
   1624   ret <4 x i64> %res2
   1625 }
   1626 
   1627 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8)
   1628 
   1629 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
   1630 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
   1631 ; CHECK:       ## BB#0:
   1632 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1633 ; CHECK-NEXT:    vcvtps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
   1634 ; CHECK-NEXT:    vcvtps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
   1635 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
   1636 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1637   %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
   1638   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
   1639   %res2 = add <2 x i64> %res, %res1
   1640   ret <2 x i64> %res2
   1641 }
   1642 
   1643 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8)
   1644 
   1645 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
   1646 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
   1647 ; CHECK:       ## BB#0:
   1648 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1649 ; CHECK-NEXT:    vcvtps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
   1650 ; CHECK-NEXT:    vcvtps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
   1651 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
   1652 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1653   %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
   1654   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
   1655   %res2 = add <4 x i64> %res, %res1
   1656   ret <4 x i64> %res2
   1657 }
   1658 
   1659 declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8)
   1660 
   1661 define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
   1662 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
   1663 ; CHECK:       ## BB#0:
   1664 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1665 ; CHECK-NEXT:    vcvtps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
   1666 ; CHECK-NEXT:    vcvtps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
   1667 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
   1668 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1669   %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
   1670   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
   1671   %res2 = add <2 x i64> %res, %res1
   1672   ret <2 x i64> %res2
   1673 }
   1674 
   1675 declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8)
   1676 
   1677 define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
   1678 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
   1679 ; CHECK:       ## BB#0:
   1680 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1681 ; CHECK-NEXT:    vcvtps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
   1682 ; CHECK-NEXT:    vcvtps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
   1683 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
   1684 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1685   %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
   1686   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
   1687   %res2 = add <4 x i64> %res, %res1
   1688   ret <4 x i64> %res2
   1689 }
   1690 
   1691 declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8)
   1692 
   1693 define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
   1694 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
   1695 ; CHECK:       ## BB#0:
   1696 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1697 ; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0xe6,0xc8]
   1698 ; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0xe6,0xc0]
   1699 ; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
   1700 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1701   %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
   1702   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
   1703   %res2 = fadd <2 x double> %res, %res1
   1704   ret <2 x double> %res2
   1705 }
   1706 
   1707 declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8)
   1708 
   1709 define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
   1710 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
   1711 ; CHECK:       ## BB#0:
   1712 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1713 ; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0xe6,0xc8]
   1714 ; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0xe6,0xc0]
   1715 ; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
   1716 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1717   %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
   1718   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
   1719   %res2 = fadd <4 x double> %res, %res1
   1720   ret <4 x double> %res2
   1721 }
   1722 
   1723 declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8)
   1724 
   1725 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
   1726 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
   1727 ; CHECK:       ## BB#0:
   1728 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1729 ; CHECK-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
   1730 ; CHECK-NEXT:    vcvtqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
   1731 ; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
   1732 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1733   %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
   1734   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
   1735   %res2 = fadd <4 x float> %res, %res1
   1736   ret <4 x float> %res2
   1737 }
   1738 
   1739 declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8)
   1740 
   1741 define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
   1742 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
   1743 ; CHECK:       ## BB#0:
   1744 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1745 ; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
   1746 ; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0]
   1747 ; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
   1748 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1749   %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
   1750   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
   1751   %res2 = fadd <4 x float> %res, %res1
   1752   ret <4 x float> %res2
   1753 }
   1754 
   1755 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
   1756 
   1757 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
   1758 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
   1759 ; CHECK:       ## BB#0:
   1760 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1761 ; CHECK-NEXT:    vcvttpd2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
   1762 ; CHECK-NEXT:    vcvttpd2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
   1763 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
   1764 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1765   %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
   1766   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
   1767   %res2 = add <2 x i64> %res, %res1
   1768   ret <2 x i64> %res2
   1769 }
   1770 
   1771 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
   1772 
   1773 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
   1774 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
   1775 ; CHECK:       ## BB#0:
   1776 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1777 ; CHECK-NEXT:    vcvttpd2qq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
   1778 ; CHECK-NEXT:    vcvttpd2qq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
   1779 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
   1780 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1781   %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
   1782   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
   1783   %res2 = add <4 x i64> %res, %res1
   1784   ret <4 x i64> %res2
   1785 }
   1786 
   1787 declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
   1788 
   1789 define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
   1790 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
   1791 ; CHECK:       ## BB#0:
   1792 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1793 ; CHECK-NEXT:    vcvttpd2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
   1794 ; CHECK-NEXT:    vcvttpd2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
   1795 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
   1796 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1797   %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
   1798   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
   1799   %res2 = add <2 x i64> %res, %res1
   1800   ret <2 x i64> %res2
   1801 }
   1802 
   1803 declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
   1804 
   1805 define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
   1806 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
   1807 ; CHECK:       ## BB#0:
   1808 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1809 ; CHECK-NEXT:    vcvttpd2uqq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
   1810 ; CHECK-NEXT:    vcvttpd2uqq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
   1811 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
   1812 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1813   %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
   1814   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
   1815   %res2 = add <4 x i64> %res, %res1
   1816   ret <4 x i64> %res2
   1817 }
   1818 
   1819 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8)
   1820 
   1821 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
   1822 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
   1823 ; CHECK:       ## BB#0:
   1824 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1825 ; CHECK-NEXT:    vcvttps2qq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
   1826 ; CHECK-NEXT:    vcvttps2qq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
   1827 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
   1828 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1829   %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
   1830   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
   1831   %res2 = add <2 x i64> %res, %res1
   1832   ret <2 x i64> %res2
   1833 }
   1834 
   1835 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
   1836 
   1837 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
   1838 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
   1839 ; CHECK:       ## BB#0:
   1840 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1841 ; CHECK-NEXT:    vcvttps2qq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
   1842 ; CHECK-NEXT:    vcvttps2qq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
   1843 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
   1844 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1845   %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
   1846   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
   1847   %res2 = add <4 x i64> %res, %res1
   1848   ret <4 x i64> %res2
   1849 }
   1850 
   1851 declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8)
   1852 
   1853 define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
   1854 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
   1855 ; CHECK:       ## BB#0:
   1856 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1857 ; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7a,0xc8]
   1858 ; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7a,0xc0]
   1859 ; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
   1860 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1861   %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
   1862   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
   1863   %res2 = fadd <2 x double> %res, %res1
   1864   ret <2 x double> %res2
   1865 }
   1866 
   1867 declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8)
   1868 
   1869 define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
   1870 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
   1871 ; CHECK:       ## BB#0:
   1872 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1873 ; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7a,0xc8]
   1874 ; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x7a,0xc0]
   1875 ; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
   1876 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1877   %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
   1878   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
   1879   %res2 = fadd <4 x double> %res, %res1
   1880   ret <4 x double> %res2
   1881 }
   1882 
   1883 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8)
   1884 
   1885 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
   1886 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
   1887 ; CHECK:       ## BB#0:
   1888 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1889 ; CHECK-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
   1890 ; CHECK-NEXT:    vcvtuqq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
   1891 ; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
   1892 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1893   %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
   1894   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
   1895   %res2 = fadd <4 x float> %res, %res1
   1896   ret <4 x float> %res2
   1897 }
   1898 
   1899 declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
   1900 
   1901 define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
   1902 ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
   1903 ; CHECK:       ## BB#0:
   1904 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1905 ; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
   1906 ; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0]
   1907 ; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
   1908 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1909   %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
   1910   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
   1911   %res2 = fadd <4 x float> %res, %res1
   1912   ret <4 x float> %res2
   1913 }
   1914 
   1915 declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8)
   1916 
   1917 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
   1918 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
   1919 ; CHECK:       ## BB#0:
   1920 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1921 ; CHECK-NEXT:    vcvttps2uqq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
   1922 ; CHECK-NEXT:    vcvttps2uqq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
   1923 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
   1924 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1925   %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
   1926   %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
   1927   %res2 = add <2 x i64> %res, %res1
   1928   ret <2 x i64> %res2
   1929 }
   1930 
   1931 declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
   1932 
   1933 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
   1934 ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
   1935 ; CHECK:       ## BB#0:
   1936 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1937 ; CHECK-NEXT:    vcvttps2uqq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
   1938 ; CHECK-NEXT:    vcvttps2uqq %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
   1939 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
   1940 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1941   %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
   1942   %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
   1943   %res2 = add <4 x i64> %res, %res1
   1944   ret <4 x i64> %res2
   1945 }
   1946 
   1947 declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
   1948 
   1949 define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
   1950 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
   1951 ; CHECK:       ## BB#0:
   1952 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1953 ; CHECK-NEXT:    vreducepd $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
   1954 ; CHECK-NEXT:    vreducepd $8, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
   1955 ; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
   1956 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1957   %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
   1958   %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
   1959   %res2 = fadd <2 x double> %res, %res1
   1960   ret <2 x double> %res2
   1961 }
   1962 
   1963 declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
   1964 
   1965 define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
   1966 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
   1967 ; CHECK:       ## BB#0:
   1968 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1969 ; CHECK-NEXT:    vreducepd $4, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
   1970 ; CHECK-NEXT:    vreducepd $0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
   1971 ; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
   1972 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1973   %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
   1974   %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
   1975   %res2 = fadd <4 x double> %res, %res1
   1976   ret <4 x double> %res2
   1977 }
   1978 
   1979 declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
   1980 
   1981 define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
   1982 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
   1983 ; CHECK:       ## BB#0:
   1984 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   1985 ; CHECK-NEXT:    vreduceps $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
   1986 ; CHECK-NEXT:    vreduceps $88, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
   1987 ; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
   1988 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1989   %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
   1990   %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
   1991   %res2 = fadd <4 x float> %res, %res1
   1992   ret <4 x float> %res2
   1993 }
   1994 
   1995 declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
   1996 
   1997 define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
   1998 ; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
   1999 ; CHECK:       ## BB#0:
   2000 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2001 ; CHECK-NEXT:    vreduceps $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
   2002 ; CHECK-NEXT:    vreduceps $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0b]
   2003 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
   2004 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2005   %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
   2006   %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
   2007   %res2 = fadd <8 x float> %res, %res1
   2008   ret <8 x float> %res2
   2009 }
   2010 
   2011 declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
   2012 
   2013 define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
   2014 ; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_128:
   2015 ; CHECK:       ## BB#0:
   2016 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2017 ; CHECK-NEXT:    vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
   2018 ; CHECK-NEXT:    vrangepd $8, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
   2019 ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]
   2020 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2021   %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
   2022   %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
   2023   %res2 = fadd <2 x double> %res, %res1
   2024   ret <2 x double> %res2
   2025 }
   2026 
   2027 declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
   2028 
   2029 define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
   2030 ; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_256:
   2031 ; CHECK:       ## BB#0:
   2032 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2033 ; CHECK-NEXT:    vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
   2034 ; CHECK-NEXT:    vrangepd $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
   2035 ; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
   2036 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2037   %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
   2038   %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
   2039   %res2 = fadd <4 x double> %res, %res1
   2040   ret <4 x double> %res2
   2041 }
   2042 
   2043 declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
   2044 
   2045 define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
   2046 ; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_128:
   2047 ; CHECK:       ## BB#0:
   2048 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2049 ; CHECK-NEXT:    vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
   2050 ; CHECK-NEXT:    vrangeps $88, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
   2051 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
   2052 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2053   %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
   2054   %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
   2055   %res2 = fadd <4 x float> %res, %res1
   2056   ret <4 x float> %res2
   2057 }
   2058 
   2059 declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
   2060 
   2061 define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
   2062 ; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_256:
   2063 ; CHECK:       ## BB#0:
   2064 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2065 ; CHECK-NEXT:    vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
   2066 ; CHECK-NEXT:    vrangeps $88, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
   2067 ; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
   2068 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2069   %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
   2070   %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
   2071   %res2 = fadd <8 x float> %res, %res1
   2072   ret <8 x float> %res2
   2073 }
   2074 
   2075 declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8)
   2076 
   2077 define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) {
   2078 ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
   2079 ; CHECK:       ## BB#0:
   2080 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2081 ; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x19,0xc1,0x01]
   2082 ; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x19,0xc2,0x01]
   2083 ; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x19,0xc0,0x01]
   2084 ; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
   2085 ; CHECK-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2]
   2086 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2087   %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
   2088   %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
   2089   %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
   2090   %res3 = fadd <2 x double> %res, %res1
   2091   %res4 = fadd <2 x double> %res3, %res2
   2092   ret <2 x double> %res4
   2093 }
   2094 
   2095 declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
   2096 
   2097 define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
   2098 ; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
   2099 ; CHECK:       ## BB#0:
   2100 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2101 ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x18,0xd1,0x01]
   2102 ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x18,0xd9,0x01]
   2103 ; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x18,0xc1,0x01]
   2104 ; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
   2105 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   2106 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2107   %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
   2108   %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
   2109   %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
   2110   %res3 = fadd <4 x double> %res, %res1
   2111   %res4 = fadd <4 x double> %res2, %res3
   2112   ret <4 x double> %res4
   2113 }
   2114 
   2115 declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
   2116 
   2117 define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
   2118 ; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
   2119 ; CHECK:       ## BB#0:
   2120 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2121 ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x38,0xd1,0x01]
   2122 ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x38,0xd9,0x01]
   2123 ; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x38,0xc1,0x01]
   2124 ; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
   2125 ; CHECK-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3]
   2126 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2127   %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
   2128   %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
   2129   %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
   2130   %res3 = add <4 x i64> %res, %res1
   2131   %res4 = add <4 x i64> %res3, %res2
   2132   ret <4 x i64> %res4
   2133 }
   2134 
   2135 declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
   2136 
   2137 define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0, i8 %x1) {
   2138 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
   2139 ; CHECK:       ## BB#0:
   2140 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2141 ; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
   2142 ; CHECK-NEXT:    kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8]
   2143 ; CHECK-NEXT:    vfpclassps $4, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc0,0x04]
   2144 ; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
   2145 ; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
   2146 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   2147 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2148   %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 %x1)
   2149   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 -1)
   2150   %res2 = add i8 %res, %res1
   2151   ret i8 %res2
   2152 }
   2153 
   2154 declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)
   2155 
   2156 define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0, i8 %x1) {
   2157 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
   2158 ; CHECK:       ## BB#0:
   2159 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2160 ; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
   2161 ; CHECK-NEXT:    kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8]
   2162 ; CHECK-NEXT:    vfpclassps $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc0,0x04]
   2163 ; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
   2164 ; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
   2165 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   2166 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2167   %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 %x1)
   2168   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 -1)
   2169   %res2 = add i8 %res, %res1
   2170   ret i8 %res2
   2171 }
   2172 
   2173 declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)
   2174 
   2175 define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0, i8 %x1) {
   2176 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
   2177 ; CHECK:       ## BB#0:
   2178 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2179 ; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
   2180 ; CHECK-NEXT:    kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8]
   2181 ; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc0,0x02]
   2182 ; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
   2183 ; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
   2184 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   2185 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2186   %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 %x1)
   2187   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 -1)
   2188   %res2 = add i8 %res, %res1
   2189   ret i8 %res2
   2190 }
   2191 
   2192 declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)
   2193 
   2194 define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) {
   2195 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
   2196 ; CHECK:       ## BB#0:
   2197 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2198 ; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
   2199 ; CHECK-NEXT:    kmovb %k0, %ecx ## encoding: [0xc5,0xf9,0x93,0xc8]
   2200 ; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc0,0x04]
   2201 ; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
   2202 ; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
   2203 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   2204 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2205   %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 %x1)
   2206   %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 -1)
   2207   %res2 = add i8 %res, %res1
   2208   ret i8 %res2
   2209 }
   2210 
   2211 declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8)
   2212 
   2213 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) {
   2214 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256:
   2215 ; CHECK:       ## BB#0:
   2216 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2217 ; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x19,0xc8]
   2218 ; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x19,0xd0]
   2219 ; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x19,0xc0]
   2220 ; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
   2221 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
   2222 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2223   %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>  %x0, <8 x float> %x2, i8 %x3)
   2224   %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3)
   2225   %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
   2226   %res3 = fadd <8 x float> %res, %res1
   2227   %res4 = fadd <8 x float> %res3, %res2
   2228   ret <8 x float> %res4
   2229 }
   2230 
   2231 declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8)
   2232 
   2233 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3, i64 * %y_ptr) {
   2234 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256:
   2235 ; CHECK:       ## BB#0:
   2236 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2237 ; CHECK-NEXT:    vbroadcasti32x2 (%rsi), %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x59,0x0e]
   2238 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x59,0xd0]
   2239 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x59,0xc0]
   2240 ; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca]
   2241 ; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
   2242 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2243   %y_64  = load i64, i64 * %y_ptr
   2244   %y_v2i64 = insertelement <2 x i64> undef, i64 %y_64, i32 0
   2245   %y = bitcast <2 x i64> %y_v2i64 to <4 x i32>
   2246   %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>  %y, <8 x i32> %x2, i8 %x3)
   2247   %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3)
   2248   %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
   2249   %res3 = add <8 x i32> %res, %res1
   2250   %res4 = add <8 x i32> %res3, %res2
   2251   ret <8 x i32> %res4
   2252 }
   2253 
   2254 declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8)
   2255 
   2256 define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) {
   2257 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128:
   2258 ; CHECK:       ## BB#0:
   2259 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2260 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x59,0xc8]
   2261 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x59,0xd0]
   2262 ; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x59,0xc0]
   2263 ; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca]
   2264 ; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
   2265 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2266   %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>  %x0, <4 x i32> %x2, i8 %x3)
   2267   %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3)
   2268   %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1)
   2269   %res3 = add <4 x i32> %res, %res1
   2270   %res4 = add <4 x i32> %res3, %res2
   2271   ret <4 x i32> %res4
   2272 }
   2273 
   2274 declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
   2275 
   2276 define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
   2277 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
   2278 ; CHECK:       ## BB#0:
   2279 ; CHECK-NEXT:    vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
   2280 ; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
   2281 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   2282 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2283     %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
   2284     ret i8 %res
   2285 }
   2286 
   2287 declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
   2288 
   2289 define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
   2290 ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
   2291 ; CHECK:       ## BB#0:
   2292 ; CHECK-NEXT:    vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
   2293 ; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
   2294 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   2295 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2296     %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
   2297     ret i8 %res
   2298 }
   2299 
   2300 declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
   2301 
   2302 define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
   2303 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
   2304 ; CHECK:       ## BB#0:
   2305 ; CHECK-NEXT:    vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
   2306 ; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
   2307 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   2308 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2309     %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
   2310     ret i8 %res
   2311 }
   2312 
   2313 declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
   2314 
   2315 define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
   2316 ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
   2317 ; CHECK:       ## BB#0:
   2318 ; CHECK-NEXT:    vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
   2319 ; CHECK-NEXT:    kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
   2320 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   2321 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2322     %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
   2323     ret i8 %res
   2324 }
   2325 
   2326 declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8)
   2327 
   2328 define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) {
   2329 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_128:
   2330 ; CHECK:       ## BB#0:
   2331 ; CHECK-NEXT:    kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7]
   2332 ; CHECK-NEXT:    vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
   2333 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2334   %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0)
   2335   ret <4 x i32> %res
   2336 }
   2337 
   2338 declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8)
   2339 
   2340 define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) {
   2341 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_256:
   2342 ; CHECK:       ## BB#0:
   2343 ; CHECK-NEXT:    kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7]
   2344 ; CHECK-NEXT:    vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
   2345 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2346   %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0)
   2347   ret <8 x i32> %res
   2348 }
   2349 
   2350 declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8)
   2351 
   2352 define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) {
   2353 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_128:
   2354 ; CHECK:       ## BB#0:
   2355 ; CHECK-NEXT:    kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7]
   2356 ; CHECK-NEXT:    vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
   2357 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2358   %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0)
   2359   ret <2 x i64> %res
   2360 }
   2361 
   2362 declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8)
   2363 
   2364 define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) {
   2365 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_256:
   2366 ; CHECK:       ## BB#0:
   2367 ; CHECK-NEXT:    kmovb %edi, %k0 ## encoding: [0xc5,0xf9,0x92,0xc7]
   2368 ; CHECK-NEXT:    vpmovm2q %k0, %ymm0 ## encoding: [0x62,0xf2,0xfe,0x28,0x38,0xc0]
   2369 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2370   %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0)
   2371   ret <4 x i64> %res
   2372 }
   2373 declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8)
   2374 
   2375 define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) {
   2376 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
   2377 ; CHECK:       ## BB#0:
   2378 ; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
   2379 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2380 ; CHECK-NEXT:    vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd0,0x00]
   2381 ; CHECK-NEXT:    ## ymm2 {%k1} {z} = ymm0[0,1,0,1]
   2382 ; CHECK-NEXT:    vshuff64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xc8,0x00]
   2383 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,1,0,1]
   2384 ; CHECK-NEXT:    vshuff64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc0,0x00]
   2385 ; CHECK-NEXT:    ## ymm0 = ymm0[0,1,0,1]
   2386 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
   2387 ; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
   2388 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2389 
   2390   %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1)
   2391   %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
   2392   %res3 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask)
   2393   %res4 = fadd <4 x double> %res1, %res2
   2394   %res5 = fadd <4 x double> %res3, %res4
   2395   ret <4 x double> %res5
   2396 }
   2397 
   2398 declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8)
   2399 
   2400 define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) {
   2401 ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
   2402 ; CHECK:       ## BB#0:
   2403 ; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
   2404 ; CHECK-NEXT:    kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
   2405 ; CHECK-NEXT:    vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x43,0xd0,0x00]
   2406 ; CHECK-NEXT:    ## ymm2 {%k1} {z} = ymm0[0,1,0,1]
   2407 ; CHECK-NEXT:    vshufi64x2 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xc8,0x00]
   2408 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,1,0,1]
   2409 ; CHECK-NEXT:    vshufi64x2 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc0,0x00]
   2410 ; CHECK-NEXT:    ## ymm0 = ymm0[0,1,0,1]
   2411 ; CHECK-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1]
   2412 ; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0]
   2413 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2414 
   2415   %res1 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1)
   2416   %res2 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
   2417   %res3 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask)
   2418   %res4 = add <4 x i64> %res1, %res2
   2419   %res5 = add <4 x i64> %res3, %res4
   2420   ret <4 x i64> %res5
   2421 }
   2422