Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
      3 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
      4 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
      5 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
      6 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
      7 
      8 
      9 define i16 @mask16(i16 %x) {
     10 ; CHECK-LABEL: mask16:
     11 ; CHECK:       ## %bb.0:
     12 ; CHECK-NEXT:    notl %edi
     13 ; CHECK-NEXT:    movl %edi, %eax
     14 ; CHECK-NEXT:    retq
     15 ;
     16 ; X86-LABEL: mask16:
     17 ; X86:       ## %bb.0:
     18 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     19 ; X86-NEXT:    notl %eax
     20 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
     21 ; X86-NEXT:    retl
     22   %m0 = bitcast i16 %x to <16 x i1>
     23   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
     24   %ret = bitcast <16 x i1> %m1 to i16
     25   ret i16 %ret
     26 }
     27 
     28 define i32 @mask16_zext(i16 %x) {
     29 ; CHECK-LABEL: mask16_zext:
     30 ; CHECK:       ## %bb.0:
     31 ; CHECK-NEXT:    notl %edi
     32 ; CHECK-NEXT:    movzwl %di, %eax
     33 ; CHECK-NEXT:    retq
     34 ;
     35 ; X86-LABEL: mask16_zext:
     36 ; X86:       ## %bb.0:
     37 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
     38 ; X86-NEXT:    xorl $65535, %eax ## imm = 0xFFFF
     39 ; X86-NEXT:    retl
     40   %m0 = bitcast i16 %x to <16 x i1>
     41   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
     42   %m2 = bitcast <16 x i1> %m1 to i16
     43   %ret = zext i16 %m2 to i32
     44   ret i32 %ret
     45 }
     46 
     47 define i8 @mask8(i8 %x) {
     48 ; CHECK-LABEL: mask8:
     49 ; CHECK:       ## %bb.0:
     50 ; CHECK-NEXT:    notb %dil
     51 ; CHECK-NEXT:    movl %edi, %eax
     52 ; CHECK-NEXT:    retq
     53 ;
     54 ; X86-LABEL: mask8:
     55 ; X86:       ## %bb.0:
     56 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
     57 ; X86-NEXT:    notb %al
     58 ; X86-NEXT:    retl
     59   %m0 = bitcast i8 %x to <8 x i1>
     60   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
     61   %ret = bitcast <8 x i1> %m1 to i8
     62   ret i8 %ret
     63 }
     64 
     65 define i32 @mask8_zext(i8 %x) {
     66 ; CHECK-LABEL: mask8_zext:
     67 ; CHECK:       ## %bb.0:
     68 ; CHECK-NEXT:    notb %dil
     69 ; CHECK-NEXT:    movzbl %dil, %eax
     70 ; CHECK-NEXT:    retq
     71 ;
     72 ; X86-LABEL: mask8_zext:
     73 ; X86:       ## %bb.0:
     74 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
     75 ; X86-NEXT:    notb %al
     76 ; X86-NEXT:    movzbl %al, %eax
     77 ; X86-NEXT:    retl
     78   %m0 = bitcast i8 %x to <8 x i1>
     79   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
     80   %m2 = bitcast <8 x i1> %m1 to i8
     81   %ret = zext i8 %m2 to i32
     82   ret i32 %ret
     83 }
     84 
     85 define void @mask16_mem(i16* %ptr) {
     86 ; CHECK-LABEL: mask16_mem:
     87 ; CHECK:       ## %bb.0:
     88 ; CHECK-NEXT:    kmovw (%rdi), %k0
     89 ; CHECK-NEXT:    knotw %k0, %k0
     90 ; CHECK-NEXT:    kmovw %k0, (%rdi)
     91 ; CHECK-NEXT:    retq
     92 ;
     93 ; X86-LABEL: mask16_mem:
     94 ; X86:       ## %bb.0:
     95 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     96 ; X86-NEXT:    kmovw (%eax), %k0
     97 ; X86-NEXT:    knotw %k0, %k0
     98 ; X86-NEXT:    kmovw %k0, (%eax)
     99 ; X86-NEXT:    retl
    100   %x = load i16, i16* %ptr, align 4
    101   %m0 = bitcast i16 %x to <16 x i1>
    102   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
    103   %ret = bitcast <16 x i1> %m1 to i16
    104   store i16 %ret, i16* %ptr, align 4
    105   ret void
    106 }
    107 
    108 define void @mask8_mem(i8* %ptr) {
    109 ; KNL-LABEL: mask8_mem:
    110 ; KNL:       ## %bb.0:
    111 ; KNL-NEXT:    notb (%rdi)
    112 ; KNL-NEXT:    retq
    113 ;
    114 ; SKX-LABEL: mask8_mem:
    115 ; SKX:       ## %bb.0:
    116 ; SKX-NEXT:    kmovb (%rdi), %k0
    117 ; SKX-NEXT:    knotb %k0, %k0
    118 ; SKX-NEXT:    kmovb %k0, (%rdi)
    119 ; SKX-NEXT:    retq
    120 ;
    121 ; AVX512BW-LABEL: mask8_mem:
    122 ; AVX512BW:       ## %bb.0:
    123 ; AVX512BW-NEXT:    notb (%rdi)
    124 ; AVX512BW-NEXT:    retq
    125 ;
    126 ; AVX512DQ-LABEL: mask8_mem:
    127 ; AVX512DQ:       ## %bb.0:
    128 ; AVX512DQ-NEXT:    kmovb (%rdi), %k0
    129 ; AVX512DQ-NEXT:    knotb %k0, %k0
    130 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
    131 ; AVX512DQ-NEXT:    retq
    132 ;
    133 ; X86-LABEL: mask8_mem:
    134 ; X86:       ## %bb.0:
    135 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    136 ; X86-NEXT:    kmovb (%eax), %k0
    137 ; X86-NEXT:    knotb %k0, %k0
    138 ; X86-NEXT:    kmovb %k0, (%eax)
    139 ; X86-NEXT:    retl
    140   %x = load i8, i8* %ptr, align 4
    141   %m0 = bitcast i8 %x to <8 x i1>
    142   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
    143   %ret = bitcast <8 x i1> %m1 to i8
    144   store i8 %ret, i8* %ptr, align 4
    145   ret void
    146 }
    147 
    148 define i16 @mand16(i16 %x, i16 %y) {
    149 ; CHECK-LABEL: mand16:
    150 ; CHECK:       ## %bb.0:
    151 ; CHECK-NEXT:    movl %edi, %eax
    152 ; CHECK-NEXT:    xorl %esi, %eax
    153 ; CHECK-NEXT:    andl %esi, %edi
    154 ; CHECK-NEXT:    orl %eax, %edi
    155 ; CHECK-NEXT:    movl %edi, %eax
    156 ; CHECK-NEXT:    retq
    157 ;
    158 ; X86-LABEL: mand16:
    159 ; X86:       ## %bb.0:
    160 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    161 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    162 ; X86-NEXT:    movl %eax, %edx
    163 ; X86-NEXT:    andl %ecx, %edx
    164 ; X86-NEXT:    xorl %ecx, %eax
    165 ; X86-NEXT:    orl %edx, %eax
    166 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
    167 ; X86-NEXT:    retl
    168   %ma = bitcast i16 %x to <16 x i1>
    169   %mb = bitcast i16 %y to <16 x i1>
    170   %mc = and <16 x i1> %ma, %mb
    171   %md = xor <16 x i1> %ma, %mb
    172   %me = or <16 x i1> %mc, %md
    173   %ret = bitcast <16 x i1> %me to i16
    174   ret i16 %ret
    175 }
    176 
    177 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
    178 ; KNL-LABEL: mand16_mem:
    179 ; KNL:       ## %bb.0:
    180 ; KNL-NEXT:    kmovw (%rdi), %k0
    181 ; KNL-NEXT:    kmovw (%rsi), %k1
    182 ; KNL-NEXT:    kandw %k1, %k0, %k2
    183 ; KNL-NEXT:    kxorw %k1, %k0, %k0
    184 ; KNL-NEXT:    korw %k0, %k2, %k0
    185 ; KNL-NEXT:    kmovw %k0, %eax
    186 ; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
    187 ; KNL-NEXT:    retq
    188 ;
    189 ; SKX-LABEL: mand16_mem:
    190 ; SKX:       ## %bb.0:
    191 ; SKX-NEXT:    kmovw (%rdi), %k0
    192 ; SKX-NEXT:    kmovw (%rsi), %k1
    193 ; SKX-NEXT:    kandw %k1, %k0, %k2
    194 ; SKX-NEXT:    kxorw %k1, %k0, %k0
    195 ; SKX-NEXT:    korw %k0, %k2, %k0
    196 ; SKX-NEXT:    kmovd %k0, %eax
    197 ; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
    198 ; SKX-NEXT:    retq
    199 ;
    200 ; AVX512BW-LABEL: mand16_mem:
    201 ; AVX512BW:       ## %bb.0:
    202 ; AVX512BW-NEXT:    kmovw (%rdi), %k0
    203 ; AVX512BW-NEXT:    kmovw (%rsi), %k1
    204 ; AVX512BW-NEXT:    kandw %k1, %k0, %k2
    205 ; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
    206 ; AVX512BW-NEXT:    korw %k0, %k2, %k0
    207 ; AVX512BW-NEXT:    kmovd %k0, %eax
    208 ; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
    209 ; AVX512BW-NEXT:    retq
    210 ;
    211 ; AVX512DQ-LABEL: mand16_mem:
    212 ; AVX512DQ:       ## %bb.0:
    213 ; AVX512DQ-NEXT:    kmovw (%rdi), %k0
    214 ; AVX512DQ-NEXT:    kmovw (%rsi), %k1
    215 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k2
    216 ; AVX512DQ-NEXT:    kxorw %k1, %k0, %k0
    217 ; AVX512DQ-NEXT:    korw %k0, %k2, %k0
    218 ; AVX512DQ-NEXT:    kmovw %k0, %eax
    219 ; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
    220 ; AVX512DQ-NEXT:    retq
    221 ;
    222 ; X86-LABEL: mand16_mem:
    223 ; X86:       ## %bb.0:
    224 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    225 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    226 ; X86-NEXT:    kmovw (%ecx), %k0
    227 ; X86-NEXT:    kmovw (%eax), %k1
    228 ; X86-NEXT:    kandw %k1, %k0, %k2
    229 ; X86-NEXT:    kxorw %k1, %k0, %k0
    230 ; X86-NEXT:    korw %k0, %k2, %k0
    231 ; X86-NEXT:    kmovd %k0, %eax
    232 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
    233 ; X86-NEXT:    retl
    234   %ma = load <16 x i1>, <16 x i1>* %x
    235   %mb = load <16 x i1>, <16 x i1>* %y
    236   %mc = and <16 x i1> %ma, %mb
    237   %md = xor <16 x i1> %ma, %mb
    238   %me = or <16 x i1> %mc, %md
    239   %ret = bitcast <16 x i1> %me to i16
    240   ret i16 %ret
    241 }
    242 
    243 define i8 @shuf_test1(i16 %v) nounwind {
    244 ; KNL-LABEL: shuf_test1:
    245 ; KNL:       ## %bb.0:
    246 ; KNL-NEXT:    kmovw %edi, %k0
    247 ; KNL-NEXT:    kshiftrw $8, %k0, %k0
    248 ; KNL-NEXT:    kmovw %k0, %eax
    249 ; KNL-NEXT:    ## kill: def $al killed $al killed $eax
    250 ; KNL-NEXT:    retq
    251 ;
    252 ; SKX-LABEL: shuf_test1:
    253 ; SKX:       ## %bb.0:
    254 ; SKX-NEXT:    kmovd %edi, %k0
    255 ; SKX-NEXT:    kshiftrw $8, %k0, %k0
    256 ; SKX-NEXT:    kmovd %k0, %eax
    257 ; SKX-NEXT:    ## kill: def $al killed $al killed $eax
    258 ; SKX-NEXT:    retq
    259 ;
    260 ; AVX512BW-LABEL: shuf_test1:
    261 ; AVX512BW:       ## %bb.0:
    262 ; AVX512BW-NEXT:    kmovd %edi, %k0
    263 ; AVX512BW-NEXT:    kshiftrw $8, %k0, %k0
    264 ; AVX512BW-NEXT:    kmovd %k0, %eax
    265 ; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
    266 ; AVX512BW-NEXT:    retq
    267 ;
    268 ; AVX512DQ-LABEL: shuf_test1:
    269 ; AVX512DQ:       ## %bb.0:
    270 ; AVX512DQ-NEXT:    kmovw %edi, %k0
    271 ; AVX512DQ-NEXT:    kshiftrw $8, %k0, %k0
    272 ; AVX512DQ-NEXT:    kmovw %k0, %eax
    273 ; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
    274 ; AVX512DQ-NEXT:    retq
    275 ;
    276 ; X86-LABEL: shuf_test1:
    277 ; X86:       ## %bb.0:
    278 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    279 ; X86-NEXT:    retl
    280    %v1 = bitcast i16 %v to <16 x i1>
    281    %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    282    %mask1 = bitcast <8 x i1> %mask to i8
    283    ret i8 %mask1
    284 }
    285 
    286 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
    287 ; KNL-LABEL: zext_test1:
    288 ; KNL:       ## %bb.0:
    289 ; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    290 ; KNL-NEXT:    kshiftrw $5, %k0, %k0
    291 ; KNL-NEXT:    kmovw %k0, %eax
    292 ; KNL-NEXT:    andl $1, %eax
    293 ; KNL-NEXT:    vzeroupper
    294 ; KNL-NEXT:    retq
    295 ;
    296 ; SKX-LABEL: zext_test1:
    297 ; SKX:       ## %bb.0:
    298 ; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    299 ; SKX-NEXT:    kshiftrw $5, %k0, %k0
    300 ; SKX-NEXT:    kmovd %k0, %eax
    301 ; SKX-NEXT:    andl $1, %eax
    302 ; SKX-NEXT:    vzeroupper
    303 ; SKX-NEXT:    retq
    304 ;
    305 ; AVX512BW-LABEL: zext_test1:
    306 ; AVX512BW:       ## %bb.0:
    307 ; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    308 ; AVX512BW-NEXT:    kshiftrw $5, %k0, %k0
    309 ; AVX512BW-NEXT:    kmovd %k0, %eax
    310 ; AVX512BW-NEXT:    andl $1, %eax
    311 ; AVX512BW-NEXT:    vzeroupper
    312 ; AVX512BW-NEXT:    retq
    313 ;
    314 ; AVX512DQ-LABEL: zext_test1:
    315 ; AVX512DQ:       ## %bb.0:
    316 ; AVX512DQ-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    317 ; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k0
    318 ; AVX512DQ-NEXT:    kmovw %k0, %eax
    319 ; AVX512DQ-NEXT:    andl $1, %eax
    320 ; AVX512DQ-NEXT:    vzeroupper
    321 ; AVX512DQ-NEXT:    retq
    322 ;
    323 ; X86-LABEL: zext_test1:
    324 ; X86:       ## %bb.0:
    325 ; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    326 ; X86-NEXT:    kshiftrw $5, %k0, %k0
    327 ; X86-NEXT:    kmovd %k0, %eax
    328 ; X86-NEXT:    andl $1, %eax
    329 ; X86-NEXT:    vzeroupper
    330 ; X86-NEXT:    retl
    331   %cmp_res = icmp ugt <16 x i32> %a, %b
    332   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
    333   %res = zext i1 %cmp_res.i1 to i32
    334   ret i32 %res
    335 }
    336 
    337 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
    338 ; KNL-LABEL: zext_test2:
    339 ; KNL:       ## %bb.0:
    340 ; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    341 ; KNL-NEXT:    kshiftrw $5, %k0, %k0
    342 ; KNL-NEXT:    kmovw %k0, %eax
    343 ; KNL-NEXT:    andl $1, %eax
    344 ; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
    345 ; KNL-NEXT:    vzeroupper
    346 ; KNL-NEXT:    retq
    347 ;
    348 ; SKX-LABEL: zext_test2:
    349 ; SKX:       ## %bb.0:
    350 ; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    351 ; SKX-NEXT:    kshiftrw $5, %k0, %k0
    352 ; SKX-NEXT:    kmovd %k0, %eax
    353 ; SKX-NEXT:    andl $1, %eax
    354 ; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
    355 ; SKX-NEXT:    vzeroupper
    356 ; SKX-NEXT:    retq
    357 ;
    358 ; AVX512BW-LABEL: zext_test2:
    359 ; AVX512BW:       ## %bb.0:
    360 ; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    361 ; AVX512BW-NEXT:    kshiftrw $5, %k0, %k0
    362 ; AVX512BW-NEXT:    kmovd %k0, %eax
    363 ; AVX512BW-NEXT:    andl $1, %eax
    364 ; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
    365 ; AVX512BW-NEXT:    vzeroupper
    366 ; AVX512BW-NEXT:    retq
    367 ;
    368 ; AVX512DQ-LABEL: zext_test2:
    369 ; AVX512DQ:       ## %bb.0:
    370 ; AVX512DQ-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    371 ; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k0
    372 ; AVX512DQ-NEXT:    kmovw %k0, %eax
    373 ; AVX512DQ-NEXT:    andl $1, %eax
    374 ; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
    375 ; AVX512DQ-NEXT:    vzeroupper
    376 ; AVX512DQ-NEXT:    retq
    377 ;
    378 ; X86-LABEL: zext_test2:
    379 ; X86:       ## %bb.0:
    380 ; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    381 ; X86-NEXT:    kshiftrw $5, %k0, %k0
    382 ; X86-NEXT:    kmovd %k0, %eax
    383 ; X86-NEXT:    andl $1, %eax
    384 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
    385 ; X86-NEXT:    vzeroupper
    386 ; X86-NEXT:    retl
    387   %cmp_res = icmp ugt <16 x i32> %a, %b
    388   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
    389   %res = zext i1 %cmp_res.i1 to i16
    390   ret i16 %res
    391 }
    392 
    393 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
    394 ; KNL-LABEL: zext_test3:
    395 ; KNL:       ## %bb.0:
    396 ; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    397 ; KNL-NEXT:    kshiftrw $5, %k0, %k0
    398 ; KNL-NEXT:    kmovw %k0, %eax
    399 ; KNL-NEXT:    andb $1, %al
    400 ; KNL-NEXT:    ## kill: def $al killed $al killed $eax
    401 ; KNL-NEXT:    vzeroupper
    402 ; KNL-NEXT:    retq
    403 ;
    404 ; SKX-LABEL: zext_test3:
    405 ; SKX:       ## %bb.0:
    406 ; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    407 ; SKX-NEXT:    kshiftrw $5, %k0, %k0
    408 ; SKX-NEXT:    kmovd %k0, %eax
    409 ; SKX-NEXT:    andb $1, %al
    410 ; SKX-NEXT:    ## kill: def $al killed $al killed $eax
    411 ; SKX-NEXT:    vzeroupper
    412 ; SKX-NEXT:    retq
    413 ;
    414 ; AVX512BW-LABEL: zext_test3:
    415 ; AVX512BW:       ## %bb.0:
    416 ; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    417 ; AVX512BW-NEXT:    kshiftrw $5, %k0, %k0
    418 ; AVX512BW-NEXT:    kmovd %k0, %eax
    419 ; AVX512BW-NEXT:    andb $1, %al
    420 ; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
    421 ; AVX512BW-NEXT:    vzeroupper
    422 ; AVX512BW-NEXT:    retq
    423 ;
    424 ; AVX512DQ-LABEL: zext_test3:
    425 ; AVX512DQ:       ## %bb.0:
    426 ; AVX512DQ-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    427 ; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k0
    428 ; AVX512DQ-NEXT:    kmovw %k0, %eax
    429 ; AVX512DQ-NEXT:    andb $1, %al
    430 ; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
    431 ; AVX512DQ-NEXT:    vzeroupper
    432 ; AVX512DQ-NEXT:    retq
    433 ;
    434 ; X86-LABEL: zext_test3:
    435 ; X86:       ## %bb.0:
    436 ; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    437 ; X86-NEXT:    kshiftrw $5, %k0, %k0
    438 ; X86-NEXT:    kmovd %k0, %eax
    439 ; X86-NEXT:    andb $1, %al
    440 ; X86-NEXT:    ## kill: def $al killed $al killed $eax
    441 ; X86-NEXT:    vzeroupper
    442 ; X86-NEXT:    retl
    443   %cmp_res = icmp ugt <16 x i32> %a, %b
    444   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
    445   %res = zext i1 %cmp_res.i1 to i8
    446   ret i8 %res
    447 }
    448 
    449 define i8 @conv1(<8 x i1>* %R) {
    450 ; CHECK-LABEL: conv1:
    451 ; CHECK:       ## %bb.0: ## %entry
    452 ; CHECK-NEXT:    movb $-1, (%rdi)
    453 ; CHECK-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
    454 ; CHECK-NEXT:    movb $-2, %al
    455 ; CHECK-NEXT:    retq
    456 ;
    457 ; X86-LABEL: conv1:
    458 ; X86:       ## %bb.0: ## %entry
    459 ; X86-NEXT:    subl $12, %esp
    460 ; X86-NEXT:    .cfi_def_cfa_offset 16
    461 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    462 ; X86-NEXT:    movb $-1, (%eax)
    463 ; X86-NEXT:    movb $-2, (%esp)
    464 ; X86-NEXT:    movb $-2, %al
    465 ; X86-NEXT:    addl $12, %esp
    466 ; X86-NEXT:    retl
    467 entry:
    468   store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
    469 
    470   %maskPtr = alloca <8 x i1>
    471   store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
    472   %mask = load <8 x i1>, <8 x i1>* %maskPtr
    473   %mask_convert = bitcast <8 x i1> %mask to i8
    474   ret i8 %mask_convert
    475 }
    476 
    477 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
    478 ; KNL-LABEL: test4:
    479 ; KNL:       ## %bb.0:
    480 ; KNL-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
    481 ; KNL-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
    482 ; KNL-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
    483 ; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
    484 ; KNL-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
    485 ; KNL-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
    486 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    487 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    488 ; KNL-NEXT:    vzeroupper
    489 ; KNL-NEXT:    retq
    490 ;
    491 ; SKX-LABEL: test4:
    492 ; SKX:       ## %bb.0:
    493 ; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k1
    494 ; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
    495 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
    496 ; SKX-NEXT:    vzeroupper
    497 ; SKX-NEXT:    retq
    498 ;
    499 ; AVX512BW-LABEL: test4:
    500 ; AVX512BW:       ## %bb.0:
    501 ; AVX512BW-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
    502 ; AVX512BW-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
    503 ; AVX512BW-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
    504 ; AVX512BW-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
    505 ; AVX512BW-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
    506 ; AVX512BW-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1 {%k1}
    507 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    508 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    509 ; AVX512BW-NEXT:    vzeroupper
    510 ; AVX512BW-NEXT:    retq
    511 ;
    512 ; AVX512DQ-LABEL: test4:
    513 ; AVX512DQ:       ## %bb.0:
    514 ; AVX512DQ-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
    515 ; AVX512DQ-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
    516 ; AVX512DQ-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
    517 ; AVX512DQ-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
    518 ; AVX512DQ-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
    519 ; AVX512DQ-NEXT:    vpcmpgtq %zmm3, %zmm2, %k0 {%k1}
    520 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
    521 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    522 ; AVX512DQ-NEXT:    vzeroupper
    523 ; AVX512DQ-NEXT:    retq
    524 ;
    525 ; X86-LABEL: test4:
    526 ; X86:       ## %bb.0:
    527 ; X86-NEXT:    vpcmpleq %ymm1, %ymm0, %k1
    528 ; X86-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0 {%k1}
    529 ; X86-NEXT:    vpmovm2d %k0, %xmm0
    530 ; X86-NEXT:    vzeroupper
    531 ; X86-NEXT:    retl
    532   %x_gt_y = icmp sgt <4 x i64> %x, %y
    533   %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
    534   %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
    535   %resse = sext <4 x i1>%res to <4 x i32>
    536   ret <4 x i32> %resse
    537 }
    538 
    539 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
    540 ; KNL-LABEL: test5:
    541 ; KNL:       ## %bb.0:
    542 ; KNL-NEXT:    ## kill: def $xmm3 killed $xmm3 def $zmm3
    543 ; KNL-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
    544 ; KNL-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
    545 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
    546 ; KNL-NEXT:    vpcmpleq %zmm3, %zmm2, %k1
    547 ; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
    548 ; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    549 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    550 ; KNL-NEXT:    vzeroupper
    551 ; KNL-NEXT:    retq
    552 ;
    553 ; SKX-LABEL: test5:
    554 ; SKX:       ## %bb.0:
    555 ; SKX-NEXT:    vpcmpleq %xmm3, %xmm2, %k1
    556 ; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
    557 ; SKX-NEXT:    vpmovm2q %k0, %xmm0
    558 ; SKX-NEXT:    retq
    559 ;
    560 ; AVX512BW-LABEL: test5:
    561 ; AVX512BW:       ## %bb.0:
    562 ; AVX512BW-NEXT:    ## kill: def $xmm3 killed $xmm3 def $zmm3
    563 ; AVX512BW-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
    564 ; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
    565 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
    566 ; AVX512BW-NEXT:    vpcmpleq %zmm3, %zmm2, %k1
    567 ; AVX512BW-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 {%k1}
    568 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    569 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    570 ; AVX512BW-NEXT:    vzeroupper
    571 ; AVX512BW-NEXT:    retq
    572 ;
    573 ; AVX512DQ-LABEL: test5:
    574 ; AVX512DQ:       ## %bb.0:
    575 ; AVX512DQ-NEXT:    ## kill: def $xmm3 killed $xmm3 def $zmm3
    576 ; AVX512DQ-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
    577 ; AVX512DQ-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
    578 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
    579 ; AVX512DQ-NEXT:    vpcmpleq %zmm3, %zmm2, %k1
    580 ; AVX512DQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0 {%k1}
    581 ; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
    582 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    583 ; AVX512DQ-NEXT:    vzeroupper
    584 ; AVX512DQ-NEXT:    retq
    585 ;
    586 ; X86-LABEL: test5:
    587 ; X86:       ## %bb.0:
    588 ; X86-NEXT:    vpcmpleq %xmm3, %xmm2, %k1
    589 ; X86-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 {%k1}
    590 ; X86-NEXT:    vpmovm2q %k0, %xmm0
    591 ; X86-NEXT:    retl
    592   %x_gt_y = icmp slt <2 x i64> %x, %y
    593   %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
    594   %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
    595   %resse = sext <2 x i1>%res to <2 x i64>
    596   ret <2 x i64> %resse
    597 }define void @test6(<16 x i1> %mask)  {
    598 allocas:
    599   %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
    600   %b = bitcast <16 x i1> %a to i16
    601   %c = icmp eq i16 %b, 0
    602   br i1 %c, label %true, label %false
    603 
    604 true:
    605   ret void
    606 
    607 false:
    608   ret void
    609 }
    610 define void @test7(<8 x i1> %mask)  {
    611 ; KNL-LABEL: test7:
    612 ; KNL:       ## %bb.0: ## %allocas
    613 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    614 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    615 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
    616 ; KNL-NEXT:    kmovw %k0, %eax
    617 ; KNL-NEXT:    orb $85, %al
    618 ; KNL-NEXT:    vzeroupper
    619 ; KNL-NEXT:    retq
    620 ;
    621 ; SKX-LABEL: test7:
    622 ; SKX:       ## %bb.0: ## %allocas
    623 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    624 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
    625 ; SKX-NEXT:    kmovd %k0, %eax
    626 ; SKX-NEXT:    orb $85, %al
    627 ; SKX-NEXT:    retq
    628 ;
    629 ; AVX512BW-LABEL: test7:
    630 ; AVX512BW:       ## %bb.0: ## %allocas
    631 ; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
    632 ; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
    633 ; AVX512BW-NEXT:    kmovd %k0, %eax
    634 ; AVX512BW-NEXT:    orb $85, %al
    635 ; AVX512BW-NEXT:    vzeroupper
    636 ; AVX512BW-NEXT:    retq
    637 ;
    638 ; AVX512DQ-LABEL: test7:
    639 ; AVX512DQ:       ## %bb.0: ## %allocas
    640 ; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
    641 ; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
    642 ; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
    643 ; AVX512DQ-NEXT:    kmovw %k0, %eax
    644 ; AVX512DQ-NEXT:    orb $85, %al
    645 ; AVX512DQ-NEXT:    vzeroupper
    646 ; AVX512DQ-NEXT:    retq
    647 ;
    648 ; X86-LABEL: test7:
    649 ; X86:       ## %bb.0: ## %allocas
    650 ; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
    651 ; X86-NEXT:    vpmovw2m %xmm0, %k0
    652 ; X86-NEXT:    kmovd %k0, %eax
    653 ; X86-NEXT:    orb $85, %al
    654 ; X86-NEXT:    retl
    655 allocas:
    656   %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
    657   %b = bitcast <8 x i1> %a to i8
    658   %c = icmp eq i8 %b, 0
    659   br i1 %c, label %true, label %false
    660 
    661 true:
    662   ret void
    663 
    664 false:
    665   ret void
    666 }
    667 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
    668 ; KNL-LABEL: test8:
    669 ; KNL:       ## %bb.0:
    670 ; KNL-NEXT:    cmpl %esi, %edi
    671 ; KNL-NEXT:    jg LBB17_1
    672 ; KNL-NEXT:  ## %bb.2:
    673 ; KNL-NEXT:    kxorw %k0, %k0, %k1
    674 ; KNL-NEXT:    jmp LBB17_3
    675 ; KNL-NEXT:  LBB17_1:
    676 ; KNL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    677 ; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
    678 ; KNL-NEXT:  LBB17_3:
    679 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    680 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
    681 ; KNL-NEXT:    vzeroupper
    682 ; KNL-NEXT:    retq
    683 ;
    684 ; SKX-LABEL: test8:
    685 ; SKX:       ## %bb.0:
    686 ; SKX-NEXT:    cmpl %esi, %edi
    687 ; SKX-NEXT:    jg LBB17_1
    688 ; SKX-NEXT:  ## %bb.2:
    689 ; SKX-NEXT:    kxorw %k0, %k0, %k0
    690 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
    691 ; SKX-NEXT:    vzeroupper
    692 ; SKX-NEXT:    retq
    693 ; SKX-NEXT:  LBB17_1:
    694 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    695 ; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
    696 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
    697 ; SKX-NEXT:    vzeroupper
    698 ; SKX-NEXT:    retq
    699 ;
    700 ; AVX512BW-LABEL: test8:
    701 ; AVX512BW:       ## %bb.0:
    702 ; AVX512BW-NEXT:    cmpl %esi, %edi
    703 ; AVX512BW-NEXT:    jg LBB17_1
    704 ; AVX512BW-NEXT:  ## %bb.2:
    705 ; AVX512BW-NEXT:    kxorw %k0, %k0, %k0
    706 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
    707 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    708 ; AVX512BW-NEXT:    vzeroupper
    709 ; AVX512BW-NEXT:    retq
    710 ; AVX512BW-NEXT:  LBB17_1:
    711 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    712 ; AVX512BW-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
    713 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
    714 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    715 ; AVX512BW-NEXT:    vzeroupper
    716 ; AVX512BW-NEXT:    retq
    717 ;
    718 ; AVX512DQ-LABEL: test8:
    719 ; AVX512DQ:       ## %bb.0:
    720 ; AVX512DQ-NEXT:    cmpl %esi, %edi
    721 ; AVX512DQ-NEXT:    jg LBB17_1
    722 ; AVX512DQ-NEXT:  ## %bb.2:
    723 ; AVX512DQ-NEXT:    kxorw %k0, %k0, %k0
    724 ; AVX512DQ-NEXT:    jmp LBB17_3
    725 ; AVX512DQ-NEXT:  LBB17_1:
    726 ; AVX512DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    727 ; AVX512DQ-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
    728 ; AVX512DQ-NEXT:  LBB17_3:
    729 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
    730 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
    731 ; AVX512DQ-NEXT:    vzeroupper
    732 ; AVX512DQ-NEXT:    retq
    733 ;
    734 ; X86-LABEL: test8:
    735 ; X86:       ## %bb.0:
    736 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    737 ; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
    738 ; X86-NEXT:    jg LBB17_1
    739 ; X86-NEXT:  ## %bb.2:
    740 ; X86-NEXT:    kxorw %k0, %k0, %k0
    741 ; X86-NEXT:    vpmovm2b %k0, %xmm0
    742 ; X86-NEXT:    vzeroupper
    743 ; X86-NEXT:    retl
    744 ; X86-NEXT:  LBB17_1:
    745 ; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    746 ; X86-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
    747 ; X86-NEXT:    vpmovm2b %k0, %xmm0
    748 ; X86-NEXT:    vzeroupper
    749 ; X86-NEXT:    retl
    750   %cond = icmp sgt i32 %a1, %b1
    751   %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
    752   %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
    753   %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
    754   %res = sext <16 x i1> %mix to <16 x i8>
    755   ret <16 x i8> %res
    756 }
    757 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
    758 ; KNL-LABEL: test9:
    759 ; KNL:       ## %bb.0:
    760 ; KNL-NEXT:    cmpl %esi, %edi
    761 ; KNL-NEXT:    jg LBB18_1
    762 ; KNL-NEXT:  ## %bb.2:
    763 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm0
    764 ; KNL-NEXT:    jmp LBB18_3
    765 ; KNL-NEXT:  LBB18_1:
    766 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    767 ; KNL-NEXT:  LBB18_3:
    768 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    769 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    770 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    771 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
    772 ; KNL-NEXT:    vzeroupper
    773 ; KNL-NEXT:    retq
    774 ;
    775 ; SKX-LABEL: test9:
    776 ; SKX:       ## %bb.0:
    777 ; SKX-NEXT:    cmpl %esi, %edi
    778 ; SKX-NEXT:    jg LBB18_1
    779 ; SKX-NEXT:  ## %bb.2:
    780 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm0
    781 ; SKX-NEXT:    jmp LBB18_3
    782 ; SKX-NEXT:  LBB18_1:
    783 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    784 ; SKX-NEXT:  LBB18_3:
    785 ; SKX-NEXT:    vpmovb2m %xmm0, %k0
    786 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
    787 ; SKX-NEXT:    retq
    788 ;
    789 ; AVX512BW-LABEL: test9:
    790 ; AVX512BW:       ## %bb.0:
    791 ; AVX512BW-NEXT:    cmpl %esi, %edi
    792 ; AVX512BW-NEXT:    jg LBB18_1
    793 ; AVX512BW-NEXT:  ## %bb.2:
    794 ; AVX512BW-NEXT:    vpsllw $7, %xmm1, %xmm0
    795 ; AVX512BW-NEXT:    jmp LBB18_3
    796 ; AVX512BW-NEXT:  LBB18_1:
    797 ; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
    798 ; AVX512BW-NEXT:  LBB18_3:
    799 ; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
    800 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
    801 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    802 ; AVX512BW-NEXT:    vzeroupper
    803 ; AVX512BW-NEXT:    retq
    804 ;
    805 ; AVX512DQ-LABEL: test9:
    806 ; AVX512DQ:       ## %bb.0:
    807 ; AVX512DQ-NEXT:    cmpl %esi, %edi
    808 ; AVX512DQ-NEXT:    jg LBB18_1
    809 ; AVX512DQ-NEXT:  ## %bb.2:
    810 ; AVX512DQ-NEXT:    vpmovsxbd %xmm1, %zmm0
    811 ; AVX512DQ-NEXT:    jmp LBB18_3
    812 ; AVX512DQ-NEXT:  LBB18_1:
    813 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
    814 ; AVX512DQ-NEXT:  LBB18_3:
    815 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
    816 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
    817 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
    818 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
    819 ; AVX512DQ-NEXT:    vzeroupper
    820 ; AVX512DQ-NEXT:    retq
    821 ;
    822 ; X86-LABEL: test9:
    823 ; X86:       ## %bb.0:
    824 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    825 ; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
    826 ; X86-NEXT:    jg LBB18_1
    827 ; X86-NEXT:  ## %bb.2:
    828 ; X86-NEXT:    vpsllw $7, %xmm1, %xmm0
    829 ; X86-NEXT:    jmp LBB18_3
    830 ; X86-NEXT:  LBB18_1:
    831 ; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
    832 ; X86-NEXT:  LBB18_3:
    833 ; X86-NEXT:    vpmovb2m %xmm0, %k0
    834 ; X86-NEXT:    vpmovm2b %k0, %xmm0
    835 ; X86-NEXT:    retl
    836   %mask = icmp sgt i32 %a1, %b1
    837   %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
    838   ret <16 x i1>%c
    839 }define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
    840   %mask = icmp sgt i32 %a1, %b1
    841   %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
    842   ret <8 x i1>%c
    843 }
    844 
    845 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
    846 ; KNL-LABEL: test11:
    847 ; KNL:       ## %bb.0:
    848 ; KNL-NEXT:    cmpl %esi, %edi
    849 ; KNL-NEXT:    jg LBB20_1
    850 ; KNL-NEXT:  ## %bb.2:
    851 ; KNL-NEXT:    vpslld $31, %xmm1, %xmm0
    852 ; KNL-NEXT:    jmp LBB20_3
    853 ; KNL-NEXT:  LBB20_1:
    854 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    855 ; KNL-NEXT:  LBB20_3:
    856 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    857 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    858 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    859 ; KNL-NEXT:    vzeroupper
    860 ; KNL-NEXT:    retq
    861 ;
    862 ; SKX-LABEL: test11:
    863 ; SKX:       ## %bb.0:
    864 ; SKX-NEXT:    cmpl %esi, %edi
    865 ; SKX-NEXT:    jg LBB20_1
    866 ; SKX-NEXT:  ## %bb.2:
    867 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
    868 ; SKX-NEXT:    jmp LBB20_3
    869 ; SKX-NEXT:  LBB20_1:
    870 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    871 ; SKX-NEXT:  LBB20_3:
    872 ; SKX-NEXT:    vpmovd2m %xmm0, %k0
    873 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
    874 ; SKX-NEXT:    retq
    875 ;
    876 ; AVX512BW-LABEL: test11:
    877 ; AVX512BW:       ## %bb.0:
    878 ; AVX512BW-NEXT:    cmpl %esi, %edi
    879 ; AVX512BW-NEXT:    jg LBB20_1
    880 ; AVX512BW-NEXT:  ## %bb.2:
    881 ; AVX512BW-NEXT:    vpslld $31, %xmm1, %xmm0
    882 ; AVX512BW-NEXT:    jmp LBB20_3
    883 ; AVX512BW-NEXT:  LBB20_1:
    884 ; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
    885 ; AVX512BW-NEXT:  LBB20_3:
    886 ; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k1
    887 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    888 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    889 ; AVX512BW-NEXT:    vzeroupper
    890 ; AVX512BW-NEXT:    retq
    891 ;
    892 ; AVX512DQ-LABEL: test11:
    893 ; AVX512DQ:       ## %bb.0:
    894 ; AVX512DQ-NEXT:    cmpl %esi, %edi
    895 ; AVX512DQ-NEXT:    jg LBB20_1
    896 ; AVX512DQ-NEXT:  ## %bb.2:
    897 ; AVX512DQ-NEXT:    vpslld $31, %xmm1, %xmm0
    898 ; AVX512DQ-NEXT:    jmp LBB20_3
    899 ; AVX512DQ-NEXT:  LBB20_1:
    900 ; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
    901 ; AVX512DQ-NEXT:  LBB20_3:
    902 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
    903 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
    904 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
    905 ; AVX512DQ-NEXT:    vzeroupper
    906 ; AVX512DQ-NEXT:    retq
    907 ;
    908 ; X86-LABEL: test11:
    909 ; X86:       ## %bb.0:
    910 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    911 ; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
    912 ; X86-NEXT:    jg LBB20_1
    913 ; X86-NEXT:  ## %bb.2:
    914 ; X86-NEXT:    vpslld $31, %xmm1, %xmm0
    915 ; X86-NEXT:    jmp LBB20_3
    916 ; X86-NEXT:  LBB20_1:
    917 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
    918 ; X86-NEXT:  LBB20_3:
    919 ; X86-NEXT:    vpmovd2m %xmm0, %k0
    920 ; X86-NEXT:    vpmovm2d %k0, %xmm0
    921 ; X86-NEXT:    retl
    922   %mask = icmp sgt i32 %a1, %b1
    923   %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
    924   ret <4 x i1>%c
    925 }
    926 
    927 define i32 @test12(i32 %x, i32 %y)  {
    928 ; CHECK-LABEL: test12:
    929 ; CHECK:       ## %bb.0:
    930 ; CHECK-NEXT:    movl %edi, %eax
    931 ; CHECK-NEXT:    retq
    932 ;
    933 ; X86-LABEL: test12:
    934 ; X86:       ## %bb.0:
    935 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    936 ; X86-NEXT:    retl
    937   %a = bitcast i16 21845 to <16 x i1>
    938   %b = extractelement <16 x i1> %a, i32 0
    939   %c = select i1 %b, i32 %x, i32 %y
    940   ret i32 %c
    941 }
    942 
    943 define i32 @test13(i32 %x, i32 %y)  {
    944 ; CHECK-LABEL: test13:
    945 ; CHECK:       ## %bb.0:
    946 ; CHECK-NEXT:    movl %esi, %eax
    947 ; CHECK-NEXT:    retq
    948 ;
    949 ; X86-LABEL: test13:
    950 ; X86:       ## %bb.0:
    951 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    952 ; X86-NEXT:    retl
    953   %a = bitcast i16 21845 to <16 x i1>
    954   %b = extractelement <16 x i1> %a, i32 3
    955   %c = select i1 %b, i32 %x, i32 %y
    956   ret i32 %c
    957 }
    958 
    959 ; Make sure we don't crash on a large vector.
    960 define i32 @test13_crash(i32 %x, i32 %y)  {
    961 ; CHECK-LABEL: test13_crash:
    962 ; CHECK:       ## %bb.0:
    963 ; CHECK-NEXT:    movl %edi, %eax
    964 ; CHECK-NEXT:    retq
    965 ;
    966 ; X86-LABEL: test13_crash:
    967 ; X86:       ## %bb.0:
    968 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    969 ; X86-NEXT:    retl
    970   %a = bitcast i128 2184568686868686868686868686 to <128 x i1>
    971   %b = extractelement <128 x i1> %a, i32 3
    972   %c = select i1 %b, i32 %x, i32 %y
    973   ret i32 %c
    974 }
    975 
    976 define <4 x i1> @test14()  {
    977 ; CHECK-LABEL: test14:
    978 ; CHECK:       ## %bb.0:
    979 ; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,0,1]
    980 ; CHECK-NEXT:    retq
    981 ;
    982 ; X86-LABEL: test14:
    983 ; X86:       ## %bb.0:
    984 ; X86-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,0,1]
    985 ; X86-NEXT:    retl
    986   %a = bitcast i16 21845 to <16 x i1>
    987   %b = extractelement <16 x i1> %a, i32 2
    988   %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
    989   ret <4 x i1> %c
    990 }
    991 
    992 define <16 x i1> @test15(i32 %x, i32 %y)  {
    993 ; KNL-LABEL: test15:
    994 ; KNL:       ## %bb.0:
    995 ; KNL-NEXT:    cmpl %esi, %edi
    996 ; KNL-NEXT:    movl $21845, %eax ## imm = 0x5555
    997 ; KNL-NEXT:    movl $1, %ecx
    998 ; KNL-NEXT:    cmovgl %eax, %ecx
    999 ; KNL-NEXT:    kmovw %ecx, %k1
   1000 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   1001 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   1002 ; KNL-NEXT:    vzeroupper
   1003 ; KNL-NEXT:    retq
   1004 ;
   1005 ; SKX-LABEL: test15:
   1006 ; SKX:       ## %bb.0:
   1007 ; SKX-NEXT:    cmpl %esi, %edi
   1008 ; SKX-NEXT:    movl $21845, %eax ## imm = 0x5555
   1009 ; SKX-NEXT:    movl $1, %ecx
   1010 ; SKX-NEXT:    cmovgl %eax, %ecx
   1011 ; SKX-NEXT:    kmovd %ecx, %k0
   1012 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
   1013 ; SKX-NEXT:    retq
   1014 ;
   1015 ; AVX512BW-LABEL: test15:
   1016 ; AVX512BW:       ## %bb.0:
   1017 ; AVX512BW-NEXT:    cmpl %esi, %edi
   1018 ; AVX512BW-NEXT:    movl $21845, %eax ## imm = 0x5555
   1019 ; AVX512BW-NEXT:    movl $1, %ecx
   1020 ; AVX512BW-NEXT:    cmovgl %eax, %ecx
   1021 ; AVX512BW-NEXT:    kmovd %ecx, %k0
   1022 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
   1023 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
   1024 ; AVX512BW-NEXT:    vzeroupper
   1025 ; AVX512BW-NEXT:    retq
   1026 ;
   1027 ; AVX512DQ-LABEL: test15:
   1028 ; AVX512DQ:       ## %bb.0:
   1029 ; AVX512DQ-NEXT:    cmpl %esi, %edi
   1030 ; AVX512DQ-NEXT:    movl $21845, %eax ## imm = 0x5555
   1031 ; AVX512DQ-NEXT:    movl $1, %ecx
   1032 ; AVX512DQ-NEXT:    cmovgl %eax, %ecx
   1033 ; AVX512DQ-NEXT:    kmovw %ecx, %k0
   1034 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
   1035 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
   1036 ; AVX512DQ-NEXT:    vzeroupper
   1037 ; AVX512DQ-NEXT:    retq
   1038 ;
   1039 ; X86-LABEL: test15:
   1040 ; X86:       ## %bb.0:
   1041 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1042 ; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
   1043 ; X86-NEXT:    movl $21845, %eax ## imm = 0x5555
   1044 ; X86-NEXT:    movl $1, %ecx
   1045 ; X86-NEXT:    cmovgl %eax, %ecx
   1046 ; X86-NEXT:    kmovd %ecx, %k0
   1047 ; X86-NEXT:    vpmovm2b %k0, %xmm0
   1048 ; X86-NEXT:    retl
   1049   %a = bitcast i16 21845 to <16 x i1>
   1050   %b = bitcast i16 1 to <16 x i1>
   1051   %mask = icmp sgt i32 %x, %y
   1052   %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
   1053   ret <16 x i1> %c
   1054 }
   1055 
   1056 define <64 x i8> @test16(i64 %x) {
   1057 ;
   1058 ; KNL-LABEL: test16:
   1059 ; KNL:       ## %bb.0:
   1060 ; KNL-NEXT:    movq %rdi, %rax
   1061 ; KNL-NEXT:    movl %edi, %ecx
   1062 ; KNL-NEXT:    kmovw %edi, %k0
   1063 ; KNL-NEXT:    shrq $32, %rdi
   1064 ; KNL-NEXT:    shrq $48, %rax
   1065 ; KNL-NEXT:    shrl $16, %ecx
   1066 ; KNL-NEXT:    kmovw %ecx, %k1
   1067 ; KNL-NEXT:    kmovw %eax, %k2
   1068 ; KNL-NEXT:    kmovw %edi, %k3
   1069 ; KNL-NEXT:    movb $1, %al
   1070 ; KNL-NEXT:    kmovw %eax, %k4
   1071 ; KNL-NEXT:    kshiftrw $5, %k0, %k5
   1072 ; KNL-NEXT:    kxorw %k4, %k5, %k4
   1073 ; KNL-NEXT:    kshiftlw $15, %k4, %k4
   1074 ; KNL-NEXT:    kshiftrw $10, %k4, %k4
   1075 ; KNL-NEXT:    kxorw %k4, %k0, %k4
   1076 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
   1077 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   1078 ; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
   1079 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
   1080 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
   1081 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
   1082 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   1083 ; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
   1084 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
   1085 ; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1086 ; KNL-NEXT:    retq
   1087 ;
   1088 ; SKX-LABEL: test16:
   1089 ; SKX:       ## %bb.0:
   1090 ; SKX-NEXT:    kmovq %rdi, %k0
   1091 ; SKX-NEXT:    movb $1, %al
   1092 ; SKX-NEXT:    kmovd %eax, %k1
   1093 ; SKX-NEXT:    kshiftrq $5, %k0, %k2
   1094 ; SKX-NEXT:    kxorq %k1, %k2, %k1
   1095 ; SKX-NEXT:    kshiftlq $63, %k1, %k1
   1096 ; SKX-NEXT:    kshiftrq $58, %k1, %k1
   1097 ; SKX-NEXT:    kxorq %k1, %k0, %k0
   1098 ; SKX-NEXT:    vpmovm2b %k0, %zmm0
   1099 ; SKX-NEXT:    retq
   1100 ;
   1101 ; AVX512BW-LABEL: test16:
   1102 ; AVX512BW:       ## %bb.0:
   1103 ; AVX512BW-NEXT:    kmovq %rdi, %k0
   1104 ; AVX512BW-NEXT:    movb $1, %al
   1105 ; AVX512BW-NEXT:    kmovd %eax, %k1
   1106 ; AVX512BW-NEXT:    kshiftrq $5, %k0, %k2
   1107 ; AVX512BW-NEXT:    kxorq %k1, %k2, %k1
   1108 ; AVX512BW-NEXT:    kshiftlq $63, %k1, %k1
   1109 ; AVX512BW-NEXT:    kshiftrq $58, %k1, %k1
   1110 ; AVX512BW-NEXT:    kxorq %k1, %k0, %k0
   1111 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
   1112 ; AVX512BW-NEXT:    retq
   1113 ;
   1114 ; AVX512DQ-LABEL: test16:
   1115 ; AVX512DQ:       ## %bb.0:
   1116 ; AVX512DQ-NEXT:    movq %rdi, %rax
   1117 ; AVX512DQ-NEXT:    movl %edi, %ecx
   1118 ; AVX512DQ-NEXT:    kmovw %edi, %k0
   1119 ; AVX512DQ-NEXT:    shrq $32, %rdi
   1120 ; AVX512DQ-NEXT:    shrq $48, %rax
   1121 ; AVX512DQ-NEXT:    shrl $16, %ecx
   1122 ; AVX512DQ-NEXT:    kmovw %ecx, %k1
   1123 ; AVX512DQ-NEXT:    kmovw %eax, %k2
   1124 ; AVX512DQ-NEXT:    kmovw %edi, %k3
   1125 ; AVX512DQ-NEXT:    movb $1, %al
   1126 ; AVX512DQ-NEXT:    kmovw %eax, %k4
   1127 ; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k5
   1128 ; AVX512DQ-NEXT:    kxorw %k4, %k5, %k4
   1129 ; AVX512DQ-NEXT:    kshiftlw $15, %k4, %k4
   1130 ; AVX512DQ-NEXT:    kshiftrw $10, %k4, %k4
   1131 ; AVX512DQ-NEXT:    kxorw %k4, %k0, %k0
   1132 ; AVX512DQ-NEXT:    vpmovm2d %k3, %zmm0
   1133 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
   1134 ; AVX512DQ-NEXT:    vpmovm2d %k2, %zmm1
   1135 ; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
   1136 ; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
   1137 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
   1138 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
   1139 ; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm2
   1140 ; AVX512DQ-NEXT:    vpmovdb %zmm2, %xmm2
   1141 ; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1142 ; AVX512DQ-NEXT:    retq
   1143 ;
   1144 ; X86-LABEL: test16:
   1145 ; X86:       ## %bb.0:
   1146 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0
   1147 ; X86-NEXT:    movb $1, %al
   1148 ; X86-NEXT:    kmovd %eax, %k1
   1149 ; X86-NEXT:    kshiftrq $5, %k0, %k2
   1150 ; X86-NEXT:    kxorq %k1, %k2, %k1
   1151 ; X86-NEXT:    kshiftlq $63, %k1, %k1
   1152 ; X86-NEXT:    kshiftrq $58, %k1, %k1
   1153 ; X86-NEXT:    kxorq %k1, %k0, %k0
   1154 ; X86-NEXT:    vpmovm2b %k0, %zmm0
   1155 ; X86-NEXT:    retl
   1156   %a = bitcast i64 %x to <64 x i1>
   1157   %b = insertelement <64 x i1>%a, i1 true, i32 5
   1158   %c = sext <64 x i1>%b to <64 x i8>
   1159   ret <64 x i8>%c
   1160 }
   1161 
   1162 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
   1163 ;
   1164 ; KNL-LABEL: test17:
   1165 ; KNL:       ## %bb.0:
   1166 ; KNL-NEXT:    movq %rdi, %rax
   1167 ; KNL-NEXT:    movl %edi, %ecx
   1168 ; KNL-NEXT:    kmovw %edi, %k0
   1169 ; KNL-NEXT:    shrq $32, %rdi
   1170 ; KNL-NEXT:    shrq $48, %rax
   1171 ; KNL-NEXT:    shrl $16, %ecx
   1172 ; KNL-NEXT:    kmovw %ecx, %k1
   1173 ; KNL-NEXT:    kmovw %eax, %k2
   1174 ; KNL-NEXT:    kmovw %edi, %k3
   1175 ; KNL-NEXT:    cmpl %edx, %esi
   1176 ; KNL-NEXT:    setg %al
   1177 ; KNL-NEXT:    kshiftrw $5, %k0, %k4
   1178 ; KNL-NEXT:    kmovw %eax, %k5
   1179 ; KNL-NEXT:    kxorw %k5, %k4, %k4
   1180 ; KNL-NEXT:    kshiftlw $15, %k4, %k4
   1181 ; KNL-NEXT:    kshiftrw $10, %k4, %k4
   1182 ; KNL-NEXT:    kxorw %k4, %k0, %k4
   1183 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
   1184 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   1185 ; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
   1186 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
   1187 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
   1188 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z}
   1189 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   1190 ; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
   1191 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
   1192 ; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1193 ; KNL-NEXT:    retq
   1194 ;
   1195 ; SKX-LABEL: test17:
   1196 ; SKX:       ## %bb.0:
   1197 ; SKX-NEXT:    kmovq %rdi, %k0
   1198 ; SKX-NEXT:    cmpl %edx, %esi
   1199 ; SKX-NEXT:    setg %al
   1200 ; SKX-NEXT:    kmovd %eax, %k1
   1201 ; SKX-NEXT:    kshiftrq $5, %k0, %k2
   1202 ; SKX-NEXT:    kxorq %k1, %k2, %k1
   1203 ; SKX-NEXT:    kshiftlq $63, %k1, %k1
   1204 ; SKX-NEXT:    kshiftrq $58, %k1, %k1
   1205 ; SKX-NEXT:    kxorq %k1, %k0, %k0
   1206 ; SKX-NEXT:    vpmovm2b %k0, %zmm0
   1207 ; SKX-NEXT:    retq
   1208 ;
   1209 ; AVX512BW-LABEL: test17:
   1210 ; AVX512BW:       ## %bb.0:
   1211 ; AVX512BW-NEXT:    kmovq %rdi, %k0
   1212 ; AVX512BW-NEXT:    cmpl %edx, %esi
   1213 ; AVX512BW-NEXT:    setg %al
   1214 ; AVX512BW-NEXT:    kmovd %eax, %k1
   1215 ; AVX512BW-NEXT:    kshiftrq $5, %k0, %k2
   1216 ; AVX512BW-NEXT:    kxorq %k1, %k2, %k1
   1217 ; AVX512BW-NEXT:    kshiftlq $63, %k1, %k1
   1218 ; AVX512BW-NEXT:    kshiftrq $58, %k1, %k1
   1219 ; AVX512BW-NEXT:    kxorq %k1, %k0, %k0
   1220 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
   1221 ; AVX512BW-NEXT:    retq
   1222 ;
   1223 ; AVX512DQ-LABEL: test17:
   1224 ; AVX512DQ:       ## %bb.0:
   1225 ; AVX512DQ-NEXT:    movq %rdi, %rax
   1226 ; AVX512DQ-NEXT:    movl %edi, %ecx
   1227 ; AVX512DQ-NEXT:    kmovw %edi, %k0
   1228 ; AVX512DQ-NEXT:    shrq $32, %rdi
   1229 ; AVX512DQ-NEXT:    shrq $48, %rax
   1230 ; AVX512DQ-NEXT:    shrl $16, %ecx
   1231 ; AVX512DQ-NEXT:    kmovw %ecx, %k1
   1232 ; AVX512DQ-NEXT:    kmovw %eax, %k2
   1233 ; AVX512DQ-NEXT:    kmovw %edi, %k3
   1234 ; AVX512DQ-NEXT:    cmpl %edx, %esi
   1235 ; AVX512DQ-NEXT:    setg %al
   1236 ; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k4
   1237 ; AVX512DQ-NEXT:    kmovw %eax, %k5
   1238 ; AVX512DQ-NEXT:    kxorw %k5, %k4, %k4
   1239 ; AVX512DQ-NEXT:    kshiftlw $15, %k4, %k4
   1240 ; AVX512DQ-NEXT:    kshiftrw $10, %k4, %k4
   1241 ; AVX512DQ-NEXT:    kxorw %k4, %k0, %k0
   1242 ; AVX512DQ-NEXT:    vpmovm2d %k3, %zmm0
   1243 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
   1244 ; AVX512DQ-NEXT:    vpmovm2d %k2, %zmm1
   1245 ; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
   1246 ; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
   1247 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
   1248 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
   1249 ; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm2
   1250 ; AVX512DQ-NEXT:    vpmovdb %zmm2, %xmm2
   1251 ; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
   1252 ; AVX512DQ-NEXT:    retq
   1253 ;
   1254 ; X86-LABEL: test17:
   1255 ; X86:       ## %bb.0:
   1256 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1257 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0
   1258 ; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
   1259 ; X86-NEXT:    setg %al
   1260 ; X86-NEXT:    kmovd %eax, %k1
   1261 ; X86-NEXT:    kshiftrq $5, %k0, %k2
   1262 ; X86-NEXT:    kxorq %k1, %k2, %k1
   1263 ; X86-NEXT:    kshiftlq $63, %k1, %k1
   1264 ; X86-NEXT:    kshiftrq $58, %k1, %k1
   1265 ; X86-NEXT:    kxorq %k1, %k0, %k0
   1266 ; X86-NEXT:    vpmovm2b %k0, %zmm0
   1267 ; X86-NEXT:    retl
   1268   %a = bitcast i64 %x to <64 x i1>
   1269   %b = icmp sgt i32 %y, %z
   1270   %c = insertelement <64 x i1>%a, i1 %b, i32 5
   1271   %d = sext <64 x i1>%c to <64 x i8>
   1272   ret <64 x i8>%d
   1273 }
   1274 
   1275 define <8 x i1> @test18(i8 %a, i16 %y) {
   1276 ; KNL-LABEL: test18:
   1277 ; KNL:       ## %bb.0:
   1278 ; KNL-NEXT:    kmovw %edi, %k1
   1279 ; KNL-NEXT:    kmovw %esi, %k2
   1280 ; KNL-NEXT:    kshiftrw $8, %k2, %k0
   1281 ; KNL-NEXT:    kshiftrw $9, %k2, %k2
   1282 ; KNL-NEXT:    kshiftrw $6, %k1, %k3
   1283 ; KNL-NEXT:    kxorw %k2, %k3, %k2
   1284 ; KNL-NEXT:    kshiftlw $15, %k2, %k2
   1285 ; KNL-NEXT:    kshiftrw $9, %k2, %k2
   1286 ; KNL-NEXT:    kxorw %k2, %k1, %k1
   1287 ; KNL-NEXT:    kshiftlw $9, %k1, %k1
   1288 ; KNL-NEXT:    kshiftrw $9, %k1, %k1
   1289 ; KNL-NEXT:    kshiftlw $7, %k0, %k0
   1290 ; KNL-NEXT:    korw %k0, %k1, %k1
   1291 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   1292 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0
   1293 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
   1294 ; KNL-NEXT:    vzeroupper
   1295 ; KNL-NEXT:    retq
   1296 ;
   1297 ; SKX-LABEL: test18:
   1298 ; SKX:       ## %bb.0:
   1299 ; SKX-NEXT:    kmovd %edi, %k1
   1300 ; SKX-NEXT:    kmovd %esi, %k2
   1301 ; SKX-NEXT:    kshiftrw $8, %k2, %k0
   1302 ; SKX-NEXT:    kshiftrw $9, %k2, %k2
   1303 ; SKX-NEXT:    kshiftrb $6, %k1, %k3
   1304 ; SKX-NEXT:    kxorb %k2, %k3, %k2
   1305 ; SKX-NEXT:    kshiftlb $7, %k2, %k2
   1306 ; SKX-NEXT:    kshiftrb $1, %k2, %k2
   1307 ; SKX-NEXT:    kxorb %k2, %k1, %k1
   1308 ; SKX-NEXT:    kshiftlb $1, %k1, %k1
   1309 ; SKX-NEXT:    kshiftrb $1, %k1, %k1
   1310 ; SKX-NEXT:    kshiftlb $7, %k0, %k0
   1311 ; SKX-NEXT:    korb %k0, %k1, %k0
   1312 ; SKX-NEXT:    vpmovm2w %k0, %xmm0
   1313 ; SKX-NEXT:    retq
   1314 ;
   1315 ; AVX512BW-LABEL: test18:
   1316 ; AVX512BW:       ## %bb.0:
   1317 ; AVX512BW-NEXT:    kmovd %edi, %k1
   1318 ; AVX512BW-NEXT:    kmovd %esi, %k2
   1319 ; AVX512BW-NEXT:    kshiftrw $8, %k2, %k0
   1320 ; AVX512BW-NEXT:    kshiftrw $9, %k2, %k2
   1321 ; AVX512BW-NEXT:    kshiftrw $6, %k1, %k3
   1322 ; AVX512BW-NEXT:    kxorw %k2, %k3, %k2
   1323 ; AVX512BW-NEXT:    kshiftlw $15, %k2, %k2
   1324 ; AVX512BW-NEXT:    kshiftrw $9, %k2, %k2
   1325 ; AVX512BW-NEXT:    kxorw %k2, %k1, %k1
   1326 ; AVX512BW-NEXT:    kshiftlw $9, %k1, %k1
   1327 ; AVX512BW-NEXT:    kshiftrw $9, %k1, %k1
   1328 ; AVX512BW-NEXT:    kshiftlw $7, %k0, %k0
   1329 ; AVX512BW-NEXT:    korw %k0, %k1, %k0
   1330 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
   1331 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
   1332 ; AVX512BW-NEXT:    vzeroupper
   1333 ; AVX512BW-NEXT:    retq
   1334 ;
   1335 ; AVX512DQ-LABEL: test18:
   1336 ; AVX512DQ:       ## %bb.0:
   1337 ; AVX512DQ-NEXT:    kmovw %edi, %k1
   1338 ; AVX512DQ-NEXT:    kmovw %esi, %k2
   1339 ; AVX512DQ-NEXT:    kshiftrw $8, %k2, %k0
   1340 ; AVX512DQ-NEXT:    kshiftrw $9, %k2, %k2
   1341 ; AVX512DQ-NEXT:    kshiftrb $6, %k1, %k3
   1342 ; AVX512DQ-NEXT:    kxorb %k2, %k3, %k2
   1343 ; AVX512DQ-NEXT:    kshiftlb $7, %k2, %k2
   1344 ; AVX512DQ-NEXT:    kshiftrb $1, %k2, %k2
   1345 ; AVX512DQ-NEXT:    kxorb %k2, %k1, %k1
   1346 ; AVX512DQ-NEXT:    kshiftlb $1, %k1, %k1
   1347 ; AVX512DQ-NEXT:    kshiftrb $1, %k1, %k1
   1348 ; AVX512DQ-NEXT:    kshiftlb $7, %k0, %k0
   1349 ; AVX512DQ-NEXT:    korb %k0, %k1, %k0
   1350 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
   1351 ; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
   1352 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
   1353 ; AVX512DQ-NEXT:    vzeroupper
   1354 ; AVX512DQ-NEXT:    retq
   1355 ;
   1356 ; X86-LABEL: test18:
   1357 ; X86:       ## %bb.0:
   1358 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
   1359 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
   1360 ; X86-NEXT:    kshiftrw $9, %k1, %k2
   1361 ; X86-NEXT:    kshiftrw $8, %k1, %k1
   1362 ; X86-NEXT:    kshiftlb $7, %k1, %k1
   1363 ; X86-NEXT:    kshiftrb $6, %k0, %k3
   1364 ; X86-NEXT:    kxorb %k2, %k3, %k2
   1365 ; X86-NEXT:    kshiftlb $7, %k2, %k2
   1366 ; X86-NEXT:    kshiftrb $1, %k2, %k2
   1367 ; X86-NEXT:    kxorb %k2, %k0, %k0
   1368 ; X86-NEXT:    kshiftlb $1, %k0, %k0
   1369 ; X86-NEXT:    kshiftrb $1, %k0, %k0
   1370 ; X86-NEXT:    korb %k1, %k0, %k0
   1371 ; X86-NEXT:    vpmovm2w %k0, %xmm0
   1372 ; X86-NEXT:    retl
   1373   %b = bitcast i8 %a to <8 x i1>
   1374   %b1 = bitcast i16 %y to <16 x i1>
   1375   %el1 = extractelement <16 x i1>%b1, i32 8
   1376   %el2 = extractelement <16 x i1>%b1, i32 9
   1377   %c = insertelement <8 x i1>%b, i1 %el1, i32 7
   1378   %d = insertelement <8 x i1>%c, i1 %el2, i32 6
   1379   ret <8 x i1>%d
   1380 }
   1381 define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
   1382 ; KNL-LABEL: test21:
   1383 ; KNL:       ## %bb.0:
   1384 ; KNL-NEXT:    vextracti128 $1, %ymm2, %xmm3
   1385 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
   1386 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
   1387 ; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
   1388 ; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
   1389 ; KNL-NEXT:    vpand %ymm0, %ymm2, %ymm0
   1390 ; KNL-NEXT:    vpsllw $15, %ymm3, %ymm2
   1391 ; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
   1392 ; KNL-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1393 ; KNL-NEXT:    retq
   1394 ;
   1395 ; SKX-LABEL: test21:
   1396 ; SKX:       ## %bb.0:
   1397 ; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
   1398 ; SKX-NEXT:    vpmovb2m %ymm1, %k1
   1399 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
   1400 ; SKX-NEXT:    retq
   1401 ;
   1402 ; AVX512BW-LABEL: test21:
   1403 ; AVX512BW:       ## %bb.0:
   1404 ; AVX512BW-NEXT:    vpsllw $7, %ymm1, %ymm1
   1405 ; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
   1406 ; AVX512BW-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
   1407 ; AVX512BW-NEXT:    retq
   1408 ;
   1409 ; AVX512DQ-LABEL: test21:
   1410 ; AVX512DQ:       ## %bb.0:
   1411 ; AVX512DQ-NEXT:    vextracti128 $1, %ymm2, %xmm3
   1412 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
   1413 ; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
   1414 ; AVX512DQ-NEXT:    vpsllw $15, %ymm2, %ymm2
   1415 ; AVX512DQ-NEXT:    vpsraw $15, %ymm2, %ymm2
   1416 ; AVX512DQ-NEXT:    vpand %ymm0, %ymm2, %ymm0
   1417 ; AVX512DQ-NEXT:    vpsllw $15, %ymm3, %ymm2
   1418 ; AVX512DQ-NEXT:    vpsraw $15, %ymm2, %ymm2
   1419 ; AVX512DQ-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1420 ; AVX512DQ-NEXT:    retq
   1421 ;
   1422 ; X86-LABEL: test21:
   1423 ; X86:       ## %bb.0:
   1424 ; X86-NEXT:    vpsllw $7, %ymm1, %ymm1
   1425 ; X86-NEXT:    vpmovb2m %ymm1, %k1
   1426 ; X86-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
   1427 ; X86-NEXT:    retl
   1428   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   1429   ret <32 x i16> %ret
   1430 }
   1431 
   1432 define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
   1433 ; KNL-LABEL: test22:
   1434 ; KNL:       ## %bb.0:
   1435 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
   1436 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1437 ; KNL-NEXT:    kmovw %k0, %eax
   1438 ; KNL-NEXT:    movb %al, (%rdi)
   1439 ; KNL-NEXT:    vzeroupper
   1440 ; KNL-NEXT:    retq
   1441 ;
   1442 ; SKX-LABEL: test22:
   1443 ; SKX:       ## %bb.0:
   1444 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
   1445 ; SKX-NEXT:    vpmovd2m %xmm0, %k0
   1446 ; SKX-NEXT:    kmovb %k0, (%rdi)
   1447 ; SKX-NEXT:    retq
   1448 ;
   1449 ; AVX512BW-LABEL: test22:
   1450 ; AVX512BW:       ## %bb.0:
   1451 ; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
   1452 ; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1453 ; AVX512BW-NEXT:    kmovd %k0, %eax
   1454 ; AVX512BW-NEXT:    movb %al, (%rdi)
   1455 ; AVX512BW-NEXT:    vzeroupper
   1456 ; AVX512BW-NEXT:    retq
   1457 ;
   1458 ; AVX512DQ-LABEL: test22:
   1459 ; AVX512DQ:       ## %bb.0:
   1460 ; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
   1461 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
   1462 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
   1463 ; AVX512DQ-NEXT:    vzeroupper
   1464 ; AVX512DQ-NEXT:    retq
   1465 ;
   1466 ; X86-LABEL: test22:
   1467 ; X86:       ## %bb.0:
   1468 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
   1469 ; X86-NEXT:    vpmovd2m %xmm0, %k0
   1470 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1471 ; X86-NEXT:    kmovb %k0, (%eax)
   1472 ; X86-NEXT:    retl
   1473   store <4 x i1> %a, <4 x i1>* %addr
   1474   ret void
   1475 }
   1476 
   1477 define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
   1478 ; KNL-LABEL: test23:
   1479 ; KNL:       ## %bb.0:
   1480 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
   1481 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
   1482 ; KNL-NEXT:    kmovw %k0, %eax
   1483 ; KNL-NEXT:    movb %al, (%rdi)
   1484 ; KNL-NEXT:    vzeroupper
   1485 ; KNL-NEXT:    retq
   1486 ;
   1487 ; SKX-LABEL: test23:
   1488 ; SKX:       ## %bb.0:
   1489 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
   1490 ; SKX-NEXT:    vpmovq2m %xmm0, %k0
   1491 ; SKX-NEXT:    kmovb %k0, (%rdi)
   1492 ; SKX-NEXT:    retq
   1493 ;
   1494 ; AVX512BW-LABEL: test23:
   1495 ; AVX512BW:       ## %bb.0:
   1496 ; AVX512BW-NEXT:    vpsllq $63, %xmm0, %xmm0
   1497 ; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
   1498 ; AVX512BW-NEXT:    kmovd %k0, %eax
   1499 ; AVX512BW-NEXT:    movb %al, (%rdi)
   1500 ; AVX512BW-NEXT:    vzeroupper
   1501 ; AVX512BW-NEXT:    retq
   1502 ;
   1503 ; AVX512DQ-LABEL: test23:
   1504 ; AVX512DQ:       ## %bb.0:
   1505 ; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
   1506 ; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
   1507 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
   1508 ; AVX512DQ-NEXT:    vzeroupper
   1509 ; AVX512DQ-NEXT:    retq
   1510 ;
   1511 ; X86-LABEL: test23:
   1512 ; X86:       ## %bb.0:
   1513 ; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
   1514 ; X86-NEXT:    vpmovq2m %xmm0, %k0
   1515 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1516 ; X86-NEXT:    kmovb %k0, (%eax)
   1517 ; X86-NEXT:    retl
   1518   store <2 x i1> %a, <2 x i1>* %addr
   1519   ret void
   1520 }
   1521 
   1522 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
   1523 ; KNL-LABEL: store_v1i1:
   1524 ; KNL:       ## %bb.0:
   1525 ; KNL-NEXT:    kmovw %edi, %k0
   1526 ; KNL-NEXT:    kxnorw %k0, %k0, %k1
   1527 ; KNL-NEXT:    kxorw %k1, %k0, %k0
   1528 ; KNL-NEXT:    kmovw %k0, %eax
   1529 ; KNL-NEXT:    movb %al, (%rsi)
   1530 ; KNL-NEXT:    retq
   1531 ;
   1532 ; SKX-LABEL: store_v1i1:
   1533 ; SKX:       ## %bb.0:
   1534 ; SKX-NEXT:    kmovd %edi, %k0
   1535 ; SKX-NEXT:    kxnorw %k0, %k0, %k1
   1536 ; SKX-NEXT:    kxorw %k1, %k0, %k0
   1537 ; SKX-NEXT:    kmovb %k0, (%rsi)
   1538 ; SKX-NEXT:    retq
   1539 ;
   1540 ; AVX512BW-LABEL: store_v1i1:
   1541 ; AVX512BW:       ## %bb.0:
   1542 ; AVX512BW-NEXT:    kmovd %edi, %k0
   1543 ; AVX512BW-NEXT:    kxnorw %k0, %k0, %k1
   1544 ; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
   1545 ; AVX512BW-NEXT:    kmovd %k0, %eax
   1546 ; AVX512BW-NEXT:    movb %al, (%rsi)
   1547 ; AVX512BW-NEXT:    retq
   1548 ;
   1549 ; AVX512DQ-LABEL: store_v1i1:
   1550 ; AVX512DQ:       ## %bb.0:
   1551 ; AVX512DQ-NEXT:    kmovw %edi, %k0
   1552 ; AVX512DQ-NEXT:    kxnorw %k0, %k0, %k1
   1553 ; AVX512DQ-NEXT:    kxorw %k1, %k0, %k0
   1554 ; AVX512DQ-NEXT:    kmovb %k0, (%rsi)
   1555 ; AVX512DQ-NEXT:    retq
   1556 ;
   1557 ; X86-LABEL: store_v1i1:
   1558 ; X86:       ## %bb.0:
   1559 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
   1560 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1561 ; X86-NEXT:    kxnorw %k0, %k0, %k1
   1562 ; X86-NEXT:    kxorw %k1, %k0, %k0
   1563 ; X86-NEXT:    kmovb %k0, (%eax)
   1564 ; X86-NEXT:    retl
   1565   %x = xor <1 x i1> %c, <i1 1>
   1566   store <1 x i1> %x, <1 x i1>*  %ptr, align 4
   1567   ret void
   1568 }
   1569 
   1570 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
   1571 ; KNL-LABEL: store_v2i1:
   1572 ; KNL:       ## %bb.0:
   1573 ; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
   1574 ; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
   1575 ; KNL-NEXT:    kmovw %k0, %eax
   1576 ; KNL-NEXT:    movb %al, (%rdi)
   1577 ; KNL-NEXT:    vzeroupper
   1578 ; KNL-NEXT:    retq
   1579 ;
   1580 ; SKX-LABEL: store_v2i1:
   1581 ; SKX:       ## %bb.0:
   1582 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
   1583 ; SKX-NEXT:    vpmovq2m %xmm0, %k0
   1584 ; SKX-NEXT:    knotw %k0, %k0
   1585 ; SKX-NEXT:    kmovb %k0, (%rdi)
   1586 ; SKX-NEXT:    retq
   1587 ;
   1588 ; AVX512BW-LABEL: store_v2i1:
   1589 ; AVX512BW:       ## %bb.0:
   1590 ; AVX512BW-NEXT:    vpsllq $63, %xmm0, %xmm0
   1591 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
   1592 ; AVX512BW-NEXT:    kmovd %k0, %eax
   1593 ; AVX512BW-NEXT:    movb %al, (%rdi)
   1594 ; AVX512BW-NEXT:    vzeroupper
   1595 ; AVX512BW-NEXT:    retq
   1596 ;
   1597 ; AVX512DQ-LABEL: store_v2i1:
   1598 ; AVX512DQ:       ## %bb.0:
   1599 ; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
   1600 ; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
   1601 ; AVX512DQ-NEXT:    knotw %k0, %k0
   1602 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
   1603 ; AVX512DQ-NEXT:    vzeroupper
   1604 ; AVX512DQ-NEXT:    retq
   1605 ;
   1606 ; X86-LABEL: store_v2i1:
   1607 ; X86:       ## %bb.0:
   1608 ; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
   1609 ; X86-NEXT:    vpmovq2m %xmm0, %k0
   1610 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1611 ; X86-NEXT:    knotw %k0, %k0
   1612 ; X86-NEXT:    kmovb %k0, (%eax)
   1613 ; X86-NEXT:    retl
   1614   %x = xor <2 x i1> %c, <i1 1, i1 1>
   1615   store <2 x i1> %x, <2 x i1>*  %ptr, align 4
   1616   ret void
   1617 }
   1618 
   1619 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
   1620 ; KNL-LABEL: store_v4i1:
   1621 ; KNL:       ## %bb.0:
   1622 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
   1623 ; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   1624 ; KNL-NEXT:    kmovw %k0, %eax
   1625 ; KNL-NEXT:    movb %al, (%rdi)
   1626 ; KNL-NEXT:    vzeroupper
   1627 ; KNL-NEXT:    retq
   1628 ;
   1629 ; SKX-LABEL: store_v4i1:
   1630 ; SKX:       ## %bb.0:
   1631 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
   1632 ; SKX-NEXT:    vpmovd2m %xmm0, %k0
   1633 ; SKX-NEXT:    knotw %k0, %k0
   1634 ; SKX-NEXT:    kmovb %k0, (%rdi)
   1635 ; SKX-NEXT:    retq
   1636 ;
   1637 ; AVX512BW-LABEL: store_v4i1:
   1638 ; AVX512BW:       ## %bb.0:
   1639 ; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
   1640 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   1641 ; AVX512BW-NEXT:    kmovd %k0, %eax
   1642 ; AVX512BW-NEXT:    movb %al, (%rdi)
   1643 ; AVX512BW-NEXT:    vzeroupper
   1644 ; AVX512BW-NEXT:    retq
   1645 ;
   1646 ; AVX512DQ-LABEL: store_v4i1:
   1647 ; AVX512DQ:       ## %bb.0:
   1648 ; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
   1649 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
   1650 ; AVX512DQ-NEXT:    knotw %k0, %k0
   1651 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
   1652 ; AVX512DQ-NEXT:    vzeroupper
   1653 ; AVX512DQ-NEXT:    retq
   1654 ;
   1655 ; X86-LABEL: store_v4i1:
   1656 ; X86:       ## %bb.0:
   1657 ; X86-NEXT:    vpslld $31, %xmm0, %xmm0
   1658 ; X86-NEXT:    vpmovd2m %xmm0, %k0
   1659 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1660 ; X86-NEXT:    knotw %k0, %k0
   1661 ; X86-NEXT:    kmovb %k0, (%eax)
   1662 ; X86-NEXT:    retl
   1663   %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
   1664   store <4 x i1> %x, <4 x i1>*  %ptr, align 4
   1665   ret void
   1666 }
   1667 
   1668 define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
   1669 ; KNL-LABEL: store_v8i1:
   1670 ; KNL:       ## %bb.0:
   1671 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1672 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1673 ; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
   1674 ; KNL-NEXT:    kmovw %k0, %eax
   1675 ; KNL-NEXT:    movb %al, (%rdi)
   1676 ; KNL-NEXT:    vzeroupper
   1677 ; KNL-NEXT:    retq
   1678 ;
   1679 ; SKX-LABEL: store_v8i1:
   1680 ; SKX:       ## %bb.0:
   1681 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1682 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
   1683 ; SKX-NEXT:    knotb %k0, %k0
   1684 ; SKX-NEXT:    kmovb %k0, (%rdi)
   1685 ; SKX-NEXT:    retq
   1686 ;
   1687 ; AVX512BW-LABEL: store_v8i1:
   1688 ; AVX512BW:       ## %bb.0:
   1689 ; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
   1690 ; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
   1691 ; AVX512BW-NEXT:    knotw %k0, %k0
   1692 ; AVX512BW-NEXT:    kmovd %k0, %eax
   1693 ; AVX512BW-NEXT:    movb %al, (%rdi)
   1694 ; AVX512BW-NEXT:    vzeroupper
   1695 ; AVX512BW-NEXT:    retq
   1696 ;
   1697 ; AVX512DQ-LABEL: store_v8i1:
   1698 ; AVX512DQ:       ## %bb.0:
   1699 ; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
   1700 ; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
   1701 ; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
   1702 ; AVX512DQ-NEXT:    knotb %k0, %k0
   1703 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
   1704 ; AVX512DQ-NEXT:    vzeroupper
   1705 ; AVX512DQ-NEXT:    retq
   1706 ;
   1707 ; X86-LABEL: store_v8i1:
   1708 ; X86:       ## %bb.0:
   1709 ; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
   1710 ; X86-NEXT:    vpmovw2m %xmm0, %k0
   1711 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1712 ; X86-NEXT:    knotb %k0, %k0
   1713 ; X86-NEXT:    kmovb %k0, (%eax)
   1714 ; X86-NEXT:    retl
   1715   %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
   1716   store <8 x i1> %x, <8 x i1>*  %ptr, align 4
   1717   ret void
   1718 }
   1719 
   1720 define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
   1721 ; KNL-LABEL: store_v16i1:
   1722 ; KNL:       ## %bb.0:
   1723 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   1724 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1725 ; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   1726 ; KNL-NEXT:    kmovw %k0, (%rdi)
   1727 ; KNL-NEXT:    vzeroupper
   1728 ; KNL-NEXT:    retq
   1729 ;
   1730 ; SKX-LABEL: store_v16i1:
   1731 ; SKX:       ## %bb.0:
   1732 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
   1733 ; SKX-NEXT:    vpmovb2m %xmm0, %k0
   1734 ; SKX-NEXT:    knotw %k0, %k0
   1735 ; SKX-NEXT:    kmovw %k0, (%rdi)
   1736 ; SKX-NEXT:    retq
   1737 ;
   1738 ; AVX512BW-LABEL: store_v16i1:
   1739 ; AVX512BW:       ## %bb.0:
   1740 ; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
   1741 ; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
   1742 ; AVX512BW-NEXT:    knotw %k0, %k0
   1743 ; AVX512BW-NEXT:    kmovw %k0, (%rdi)
   1744 ; AVX512BW-NEXT:    vzeroupper
   1745 ; AVX512BW-NEXT:    retq
   1746 ;
   1747 ; AVX512DQ-LABEL: store_v16i1:
   1748 ; AVX512DQ:       ## %bb.0:
   1749 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
   1750 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
   1751 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
   1752 ; AVX512DQ-NEXT:    knotw %k0, %k0
   1753 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
   1754 ; AVX512DQ-NEXT:    vzeroupper
   1755 ; AVX512DQ-NEXT:    retq
   1756 ;
   1757 ; X86-LABEL: store_v16i1:
   1758 ; X86:       ## %bb.0:
   1759 ; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
   1760 ; X86-NEXT:    vpmovb2m %xmm0, %k0
   1761 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1762 ; X86-NEXT:    knotw %k0, %k0
   1763 ; X86-NEXT:    kmovw %k0, (%eax)
   1764 ; X86-NEXT:    retl
   1765   %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
   1766   store <16 x i1> %x, <16 x i1>*  %ptr, align 4
   1767   ret void
   1768 }
   1769 
   1770 ;void f2(int);
   1771 ;void f1(int c)
   1772 ;{
   1773 ;  static int v = 0;
   1774 ;  if (v == 0)
   1775 ;    v = 1;
   1776 ;  else
   1777 ;    v = 0;
   1778 ;  f2(v);
   1779 ;}
   1780 
   1781 @f1.v = internal unnamed_addr global i1 false, align 4
   1782 
   1783 define void @f1(i32 %c) {
   1784 ; CHECK-LABEL: f1:
   1785 ; CHECK:       ## %bb.0: ## %entry
   1786 ; CHECK-NEXT:    movzbl {{.*}}(%rip), %edi
   1787 ; CHECK-NEXT:    xorl $1, %edi
   1788 ; CHECK-NEXT:    movb %dil, {{.*}}(%rip)
   1789 ; CHECK-NEXT:    jmp _f2 ## TAILCALL
   1790 ;
   1791 ; X86-LABEL: f1:
   1792 ; X86:       ## %bb.0: ## %entry
   1793 ; X86-NEXT:    subl $12, %esp
   1794 ; X86-NEXT:    .cfi_def_cfa_offset 16
   1795 ; X86-NEXT:    movzbl _f1.v, %eax
   1796 ; X86-NEXT:    xorl $1, %eax
   1797 ; X86-NEXT:    movb %al, _f1.v
   1798 ; X86-NEXT:    movl %eax, (%esp)
   1799 ; X86-NEXT:    calll _f2
   1800 ; X86-NEXT:    addl $12, %esp
   1801 ; X86-NEXT:    retl
   1802 entry:
   1803   %.b1 = load i1, i1* @f1.v, align 4
   1804   %not..b1 = xor i1 %.b1, true
   1805   store i1 %not..b1, i1* @f1.v, align 4
   1806   %0 = zext i1 %not..b1 to i32
   1807   tail call void @f2(i32 %0) #2
   1808   ret void
   1809 }
   1810 
   1811 declare void @f2(i32) #1
   1812 
   1813 define void @store_i16_i1(i16 %x, i1 *%y) {
   1814 ; CHECK-LABEL: store_i16_i1:
   1815 ; CHECK:       ## %bb.0:
   1816 ; CHECK-NEXT:    andl $1, %edi
   1817 ; CHECK-NEXT:    movb %dil, (%rsi)
   1818 ; CHECK-NEXT:    retq
   1819 ;
   1820 ; X86-LABEL: store_i16_i1:
   1821 ; X86:       ## %bb.0:
   1822 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1823 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
   1824 ; X86-NEXT:    andl $1, %ecx
   1825 ; X86-NEXT:    movb %cl, (%eax)
   1826 ; X86-NEXT:    retl
   1827   %c = trunc i16 %x to i1
   1828   store i1 %c, i1* %y
   1829   ret void
   1830 }
   1831 
   1832 define void @store_i8_i1(i8 %x, i1 *%y) {
   1833 ; CHECK-LABEL: store_i8_i1:
   1834 ; CHECK:       ## %bb.0:
   1835 ; CHECK-NEXT:    andl $1, %edi
   1836 ; CHECK-NEXT:    movb %dil, (%rsi)
   1837 ; CHECK-NEXT:    retq
   1838 ;
   1839 ; X86-LABEL: store_i8_i1:
   1840 ; X86:       ## %bb.0:
   1841 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1842 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
   1843 ; X86-NEXT:    andb $1, %cl
   1844 ; X86-NEXT:    movb %cl, (%eax)
   1845 ; X86-NEXT:    retl
   1846   %c = trunc i8 %x to i1
   1847   store i1 %c, i1* %y
   1848   ret void
   1849 }
   1850 
   1851 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
   1852 ; KNL-LABEL: test_build_vec_v32i1:
   1853 ; KNL:       ## %bb.0:
   1854 ; KNL-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
   1855 ; KNL-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
   1856 ; KNL-NEXT:    retq
   1857 ;
   1858 ; SKX-LABEL: test_build_vec_v32i1:
   1859 ; SKX:       ## %bb.0:
   1860 ; SKX-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
   1861 ; SKX-NEXT:    kmovd %eax, %k1
   1862 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
   1863 ; SKX-NEXT:    retq
   1864 ;
   1865 ; AVX512BW-LABEL: test_build_vec_v32i1:
   1866 ; AVX512BW:       ## %bb.0:
   1867 ; AVX512BW-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
   1868 ; AVX512BW-NEXT:    kmovd %eax, %k1
   1869 ; AVX512BW-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
   1870 ; AVX512BW-NEXT:    retq
   1871 ;
   1872 ; AVX512DQ-LABEL: test_build_vec_v32i1:
   1873 ; AVX512DQ:       ## %bb.0:
   1874 ; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
   1875 ; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
   1876 ; AVX512DQ-NEXT:    retq
   1877 ;
   1878 ; X86-LABEL: test_build_vec_v32i1:
   1879 ; X86:       ## %bb.0:
   1880 ; X86-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
   1881 ; X86-NEXT:    kmovd %eax, %k1
   1882 ; X86-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
   1883 ; X86-NEXT:    retl
   1884   %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
   1885   ret <32 x i16> %ret
   1886 }
   1887 
   1888 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
   1889 ; KNL-LABEL: test_build_vec_v64i1:
   1890 ; KNL:       ## %bb.0:
   1891 ; KNL-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
   1892 ; KNL-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
   1893 ; KNL-NEXT:    retq
   1894 ;
   1895 ; SKX-LABEL: test_build_vec_v64i1:
   1896 ; SKX:       ## %bb.0:
   1897 ; SKX-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
   1898 ; SKX-NEXT:    retq
   1899 ;
   1900 ; AVX512BW-LABEL: test_build_vec_v64i1:
   1901 ; AVX512BW:       ## %bb.0:
   1902 ; AVX512BW-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
   1903 ; AVX512BW-NEXT:    retq
   1904 ;
   1905 ; AVX512DQ-LABEL: test_build_vec_v64i1:
   1906 ; AVX512DQ:       ## %bb.0:
   1907 ; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
   1908 ; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
   1909 ; AVX512DQ-NEXT:    retq
   1910 ;
   1911 ; X86-LABEL: test_build_vec_v64i1:
   1912 ; X86:       ## %bb.0:
   1913 ; X86-NEXT:    vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
   1914 ; X86-NEXT:    retl
   1915   %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
   1916   ret <64 x i8> %ret
   1917 }
   1918 
   1919 define void @ktest_1(<8 x double> %in, double * %base) {
   1920 ; KNL-LABEL: ktest_1:
   1921 ; KNL:       ## %bb.0:
   1922 ; KNL-NEXT:    vmovupd (%rdi), %zmm1
   1923 ; KNL-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
   1924 ; KNL-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
   1925 ; KNL-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
   1926 ; KNL-NEXT:    kmovw %k0, %eax
   1927 ; KNL-NEXT:    testb %al, %al
   1928 ; KNL-NEXT:    je LBB42_2
   1929 ; KNL-NEXT:  ## %bb.1: ## %L1
   1930 ; KNL-NEXT:    vmovapd %zmm0, (%rdi)
   1931 ; KNL-NEXT:    vzeroupper
   1932 ; KNL-NEXT:    retq
   1933 ; KNL-NEXT:  LBB42_2: ## %L2
   1934 ; KNL-NEXT:    vmovapd %zmm0, 8(%rdi)
   1935 ; KNL-NEXT:    vzeroupper
   1936 ; KNL-NEXT:    retq
   1937 ;
   1938 ; SKX-LABEL: ktest_1:
   1939 ; SKX:       ## %bb.0:
   1940 ; SKX-NEXT:    vmovupd (%rdi), %zmm1
   1941 ; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
   1942 ; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
   1943 ; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
   1944 ; SKX-NEXT:    kortestb %k0, %k0
   1945 ; SKX-NEXT:    je LBB42_2
   1946 ; SKX-NEXT:  ## %bb.1: ## %L1
   1947 ; SKX-NEXT:    vmovapd %zmm0, (%rdi)
   1948 ; SKX-NEXT:    vzeroupper
   1949 ; SKX-NEXT:    retq
   1950 ; SKX-NEXT:  LBB42_2: ## %L2
   1951 ; SKX-NEXT:    vmovapd %zmm0, 8(%rdi)
   1952 ; SKX-NEXT:    vzeroupper
   1953 ; SKX-NEXT:    retq
   1954 ;
   1955 ; AVX512BW-LABEL: ktest_1:
   1956 ; AVX512BW:       ## %bb.0:
   1957 ; AVX512BW-NEXT:    vmovupd (%rdi), %zmm1
   1958 ; AVX512BW-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
   1959 ; AVX512BW-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
   1960 ; AVX512BW-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
   1961 ; AVX512BW-NEXT:    kmovd %k0, %eax
   1962 ; AVX512BW-NEXT:    testb %al, %al
   1963 ; AVX512BW-NEXT:    je LBB42_2
   1964 ; AVX512BW-NEXT:  ## %bb.1: ## %L1
   1965 ; AVX512BW-NEXT:    vmovapd %zmm0, (%rdi)
   1966 ; AVX512BW-NEXT:    vzeroupper
   1967 ; AVX512BW-NEXT:    retq
   1968 ; AVX512BW-NEXT:  LBB42_2: ## %L2
   1969 ; AVX512BW-NEXT:    vmovapd %zmm0, 8(%rdi)
   1970 ; AVX512BW-NEXT:    vzeroupper
   1971 ; AVX512BW-NEXT:    retq
   1972 ;
   1973 ; AVX512DQ-LABEL: ktest_1:
   1974 ; AVX512DQ:       ## %bb.0:
   1975 ; AVX512DQ-NEXT:    vmovupd (%rdi), %zmm1
   1976 ; AVX512DQ-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
   1977 ; AVX512DQ-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
   1978 ; AVX512DQ-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
   1979 ; AVX512DQ-NEXT:    kortestb %k0, %k0
   1980 ; AVX512DQ-NEXT:    je LBB42_2
   1981 ; AVX512DQ-NEXT:  ## %bb.1: ## %L1
   1982 ; AVX512DQ-NEXT:    vmovapd %zmm0, (%rdi)
   1983 ; AVX512DQ-NEXT:    vzeroupper
   1984 ; AVX512DQ-NEXT:    retq
   1985 ; AVX512DQ-NEXT:  LBB42_2: ## %L2
   1986 ; AVX512DQ-NEXT:    vmovapd %zmm0, 8(%rdi)
   1987 ; AVX512DQ-NEXT:    vzeroupper
   1988 ; AVX512DQ-NEXT:    retq
   1989 ;
   1990 ; X86-LABEL: ktest_1:
   1991 ; X86:       ## %bb.0:
   1992 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1993 ; X86-NEXT:    vmovupd (%eax), %zmm1
   1994 ; X86-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
   1995 ; X86-NEXT:    vmovupd 8(%eax), %zmm1 {%k1} {z}
   1996 ; X86-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
   1997 ; X86-NEXT:    kortestb %k0, %k0
   1998 ; X86-NEXT:    je LBB42_2
   1999 ; X86-NEXT:  ## %bb.1: ## %L1
   2000 ; X86-NEXT:    vmovapd %zmm0, (%eax)
   2001 ; X86-NEXT:    vzeroupper
   2002 ; X86-NEXT:    retl
   2003 ; X86-NEXT:  LBB42_2: ## %L2
   2004 ; X86-NEXT:    vmovapd %zmm0, 8(%eax)
   2005 ; X86-NEXT:    vzeroupper
   2006 ; X86-NEXT:    retl
   2007   %addr1 = getelementptr double, double * %base, i64 0
   2008   %addr2 = getelementptr double, double * %base, i64 1
   2009 
   2010   %vaddr1 = bitcast double* %addr1 to <8 x double>*
   2011   %vaddr2 = bitcast double* %addr2 to <8 x double>*
   2012 
   2013   %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
   2014   %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
   2015 
   2016   %sel1 = fcmp ogt <8 x double>%in, %val1
   2017   %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
   2018   %sel2 = fcmp olt <8 x double> %in, %val3
   2019   %sel3 = and <8 x i1> %sel1, %sel2
   2020 
   2021   %int_sel3 = bitcast <8 x i1> %sel3 to i8
   2022   %res = icmp eq i8 %int_sel3, zeroinitializer
   2023   br i1 %res, label %L2, label %L1
   2024 L1:
   2025   store <8 x double> %in, <8 x double>* %vaddr1
   2026   br label %End
   2027 L2:
   2028   store <8 x double> %in, <8 x double>* %vaddr2
   2029   br label %End
   2030 End:
   2031   ret void
   2032 }
   2033 
   2034 define void @ktest_2(<32 x float> %in, float * %base) {
   2035 ;
   2036 ; KNL-LABEL: ktest_2:
   2037 ; KNL:       ## %bb.0:
   2038 ; KNL-NEXT:    vmovups (%rdi), %zmm2
   2039 ; KNL-NEXT:    vmovups 64(%rdi), %zmm3
   2040 ; KNL-NEXT:    vcmpltps %zmm1, %zmm3, %k1
   2041 ; KNL-NEXT:    vcmpltps %zmm0, %zmm2, %k2
   2042 ; KNL-NEXT:    vmovups 4(%rdi), %zmm2 {%k2} {z}
   2043 ; KNL-NEXT:    vmovups 68(%rdi), %zmm3 {%k1} {z}
   2044 ; KNL-NEXT:    vcmpltps %zmm3, %zmm1, %k0
   2045 ; KNL-NEXT:    vcmpltps %zmm2, %zmm0, %k3
   2046 ; KNL-NEXT:    korw %k3, %k2, %k2
   2047 ; KNL-NEXT:    kmovw %k2, %eax
   2048 ; KNL-NEXT:    korw %k0, %k1, %k0
   2049 ; KNL-NEXT:    kmovw %k0, %ecx
   2050 ; KNL-NEXT:    shll $16, %ecx
   2051 ; KNL-NEXT:    orl %eax, %ecx
   2052 ; KNL-NEXT:    je LBB43_2
   2053 ; KNL-NEXT:  ## %bb.1: ## %L1
   2054 ; KNL-NEXT:    vmovaps %zmm0, (%rdi)
   2055 ; KNL-NEXT:    vmovaps %zmm1, 64(%rdi)
   2056 ; KNL-NEXT:    vzeroupper
   2057 ; KNL-NEXT:    retq
   2058 ; KNL-NEXT:  LBB43_2: ## %L2
   2059 ; KNL-NEXT:    vmovaps %zmm0, 4(%rdi)
   2060 ; KNL-NEXT:    vmovaps %zmm1, 68(%rdi)
   2061 ; KNL-NEXT:    vzeroupper
   2062 ; KNL-NEXT:    retq
   2063 ;
   2064 ; SKX-LABEL: ktest_2:
   2065 ; SKX:       ## %bb.0:
   2066 ; SKX-NEXT:    vmovups (%rdi), %zmm2
   2067 ; SKX-NEXT:    vmovups 64(%rdi), %zmm3
   2068 ; SKX-NEXT:    vcmpltps %zmm0, %zmm2, %k1
   2069 ; SKX-NEXT:    vcmpltps %zmm1, %zmm3, %k2
   2070 ; SKX-NEXT:    kunpckwd %k1, %k2, %k0
   2071 ; SKX-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
   2072 ; SKX-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
   2073 ; SKX-NEXT:    vcmpltps %zmm3, %zmm0, %k1
   2074 ; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2
   2075 ; SKX-NEXT:    kunpckwd %k1, %k2, %k1
   2076 ; SKX-NEXT:    kortestd %k1, %k0
   2077 ; SKX-NEXT:    je LBB43_2
   2078 ; SKX-NEXT:  ## %bb.1: ## %L1
   2079 ; SKX-NEXT:    vmovaps %zmm0, (%rdi)
   2080 ; SKX-NEXT:    vmovaps %zmm1, 64(%rdi)
   2081 ; SKX-NEXT:    vzeroupper
   2082 ; SKX-NEXT:    retq
   2083 ; SKX-NEXT:  LBB43_2: ## %L2
   2084 ; SKX-NEXT:    vmovaps %zmm0, 4(%rdi)
   2085 ; SKX-NEXT:    vmovaps %zmm1, 68(%rdi)
   2086 ; SKX-NEXT:    vzeroupper
   2087 ; SKX-NEXT:    retq
   2088 ;
   2089 ; AVX512BW-LABEL: ktest_2:
   2090 ; AVX512BW:       ## %bb.0:
   2091 ; AVX512BW-NEXT:    vmovups (%rdi), %zmm2
   2092 ; AVX512BW-NEXT:    vmovups 64(%rdi), %zmm3
   2093 ; AVX512BW-NEXT:    vcmpltps %zmm0, %zmm2, %k1
   2094 ; AVX512BW-NEXT:    vcmpltps %zmm1, %zmm3, %k2
   2095 ; AVX512BW-NEXT:    kunpckwd %k1, %k2, %k0
   2096 ; AVX512BW-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
   2097 ; AVX512BW-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
   2098 ; AVX512BW-NEXT:    vcmpltps %zmm3, %zmm0, %k1
   2099 ; AVX512BW-NEXT:    vcmpltps %zmm2, %zmm1, %k2
   2100 ; AVX512BW-NEXT:    kunpckwd %k1, %k2, %k1
   2101 ; AVX512BW-NEXT:    kortestd %k1, %k0
   2102 ; AVX512BW-NEXT:    je LBB43_2
   2103 ; AVX512BW-NEXT:  ## %bb.1: ## %L1
   2104 ; AVX512BW-NEXT:    vmovaps %zmm0, (%rdi)
   2105 ; AVX512BW-NEXT:    vmovaps %zmm1, 64(%rdi)
   2106 ; AVX512BW-NEXT:    vzeroupper
   2107 ; AVX512BW-NEXT:    retq
   2108 ; AVX512BW-NEXT:  LBB43_2: ## %L2
   2109 ; AVX512BW-NEXT:    vmovaps %zmm0, 4(%rdi)
   2110 ; AVX512BW-NEXT:    vmovaps %zmm1, 68(%rdi)
   2111 ; AVX512BW-NEXT:    vzeroupper
   2112 ; AVX512BW-NEXT:    retq
   2113 ;
   2114 ; AVX512DQ-LABEL: ktest_2:
   2115 ; AVX512DQ:       ## %bb.0:
   2116 ; AVX512DQ-NEXT:    vmovups (%rdi), %zmm2
   2117 ; AVX512DQ-NEXT:    vmovups 64(%rdi), %zmm3
   2118 ; AVX512DQ-NEXT:    vcmpltps %zmm1, %zmm3, %k1
   2119 ; AVX512DQ-NEXT:    vcmpltps %zmm0, %zmm2, %k2
   2120 ; AVX512DQ-NEXT:    vmovups 4(%rdi), %zmm2 {%k2} {z}
   2121 ; AVX512DQ-NEXT:    vmovups 68(%rdi), %zmm3 {%k1} {z}
   2122 ; AVX512DQ-NEXT:    vcmpltps %zmm3, %zmm1, %k0
   2123 ; AVX512DQ-NEXT:    vcmpltps %zmm2, %zmm0, %k3
   2124 ; AVX512DQ-NEXT:    korw %k3, %k2, %k2
   2125 ; AVX512DQ-NEXT:    kmovw %k2, %eax
   2126 ; AVX512DQ-NEXT:    korw %k0, %k1, %k0
   2127 ; AVX512DQ-NEXT:    kmovw %k0, %ecx
   2128 ; AVX512DQ-NEXT:    shll $16, %ecx
   2129 ; AVX512DQ-NEXT:    orl %eax, %ecx
   2130 ; AVX512DQ-NEXT:    je LBB43_2
   2131 ; AVX512DQ-NEXT:  ## %bb.1: ## %L1
   2132 ; AVX512DQ-NEXT:    vmovaps %zmm0, (%rdi)
   2133 ; AVX512DQ-NEXT:    vmovaps %zmm1, 64(%rdi)
   2134 ; AVX512DQ-NEXT:    vzeroupper
   2135 ; AVX512DQ-NEXT:    retq
   2136 ; AVX512DQ-NEXT:  LBB43_2: ## %L2
   2137 ; AVX512DQ-NEXT:    vmovaps %zmm0, 4(%rdi)
   2138 ; AVX512DQ-NEXT:    vmovaps %zmm1, 68(%rdi)
   2139 ; AVX512DQ-NEXT:    vzeroupper
   2140 ; AVX512DQ-NEXT:    retq
   2141 ;
   2142 ; X86-LABEL: ktest_2:
   2143 ; X86:       ## %bb.0:
   2144 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2145 ; X86-NEXT:    vmovups (%eax), %zmm2
   2146 ; X86-NEXT:    vmovups 64(%eax), %zmm3
   2147 ; X86-NEXT:    vcmpltps %zmm0, %zmm2, %k1
   2148 ; X86-NEXT:    vcmpltps %zmm1, %zmm3, %k2
   2149 ; X86-NEXT:    kunpckwd %k1, %k2, %k0
   2150 ; X86-NEXT:    vmovups 68(%eax), %zmm2 {%k2} {z}
   2151 ; X86-NEXT:    vmovups 4(%eax), %zmm3 {%k1} {z}
   2152 ; X86-NEXT:    vcmpltps %zmm3, %zmm0, %k1
   2153 ; X86-NEXT:    vcmpltps %zmm2, %zmm1, %k2
   2154 ; X86-NEXT:    kunpckwd %k1, %k2, %k1
   2155 ; X86-NEXT:    kortestd %k1, %k0
   2156 ; X86-NEXT:    je LBB43_2
   2157 ; X86-NEXT:  ## %bb.1: ## %L1
   2158 ; X86-NEXT:    vmovaps %zmm0, (%eax)
   2159 ; X86-NEXT:    vmovaps %zmm1, 64(%eax)
   2160 ; X86-NEXT:    vzeroupper
   2161 ; X86-NEXT:    retl
   2162 ; X86-NEXT:  LBB43_2: ## %L2
   2163 ; X86-NEXT:    vmovaps %zmm0, 4(%eax)
   2164 ; X86-NEXT:    vmovaps %zmm1, 68(%eax)
   2165 ; X86-NEXT:    vzeroupper
   2166 ; X86-NEXT:    retl
   2167   %addr1 = getelementptr float, float * %base, i64 0
   2168   %addr2 = getelementptr float, float * %base, i64 1
   2169 
   2170   %vaddr1 = bitcast float* %addr1 to <32 x float>*
   2171   %vaddr2 = bitcast float* %addr2 to <32 x float>*
   2172 
   2173   %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
   2174   %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
   2175 
   2176   %sel1 = fcmp ogt <32 x float>%in, %val1
   2177   %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
   2178   %sel2 = fcmp olt <32 x float> %in, %val3
   2179   %sel3 = or <32 x i1> %sel1, %sel2
   2180 
   2181   %int_sel3 = bitcast <32 x i1> %sel3 to i32
   2182   %res = icmp eq i32 %int_sel3, zeroinitializer
   2183   br i1 %res, label %L2, label %L1
   2184 L1:
   2185   store <32 x float> %in, <32 x float>* %vaddr1
   2186   br label %End
   2187 L2:
   2188   store <32 x float> %in, <32 x float>* %vaddr2
   2189   br label %End
   2190 End:
   2191   ret void
   2192 }
   2193 
   2194 define <8 x i64> @load_8i1(<8 x i1>* %a) {
   2195 ; KNL-LABEL: load_8i1:
   2196 ; KNL:       ## %bb.0:
   2197 ; KNL-NEXT:    movzbl (%rdi), %eax
   2198 ; KNL-NEXT:    kmovw %eax, %k1
   2199 ; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2200 ; KNL-NEXT:    retq
   2201 ;
   2202 ; SKX-LABEL: load_8i1:
   2203 ; SKX:       ## %bb.0:
   2204 ; SKX-NEXT:    kmovb (%rdi), %k0
   2205 ; SKX-NEXT:    vpmovm2q %k0, %zmm0
   2206 ; SKX-NEXT:    retq
   2207 ;
   2208 ; AVX512BW-LABEL: load_8i1:
   2209 ; AVX512BW:       ## %bb.0:
   2210 ; AVX512BW-NEXT:    movzbl (%rdi), %eax
   2211 ; AVX512BW-NEXT:    kmovd %eax, %k1
   2212 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2213 ; AVX512BW-NEXT:    retq
   2214 ;
   2215 ; AVX512DQ-LABEL: load_8i1:
   2216 ; AVX512DQ:       ## %bb.0:
   2217 ; AVX512DQ-NEXT:    kmovb (%rdi), %k0
   2218 ; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
   2219 ; AVX512DQ-NEXT:    retq
   2220 ;
   2221 ; X86-LABEL: load_8i1:
   2222 ; X86:       ## %bb.0:
   2223 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2224 ; X86-NEXT:    kmovb (%eax), %k0
   2225 ; X86-NEXT:    vpmovm2q %k0, %zmm0
   2226 ; X86-NEXT:    retl
   2227   %b = load <8 x i1>, <8 x i1>* %a
   2228   %c = sext <8 x i1> %b to <8 x i64>
   2229   ret <8 x i64> %c
   2230 }
   2231 
   2232 define <16 x i32> @load_16i1(<16 x i1>* %a) {
   2233 ; KNL-LABEL: load_16i1:
   2234 ; KNL:       ## %bb.0:
   2235 ; KNL-NEXT:    kmovw (%rdi), %k1
   2236 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2237 ; KNL-NEXT:    retq
   2238 ;
   2239 ; SKX-LABEL: load_16i1:
   2240 ; SKX:       ## %bb.0:
   2241 ; SKX-NEXT:    kmovw (%rdi), %k0
   2242 ; SKX-NEXT:    vpmovm2d %k0, %zmm0
   2243 ; SKX-NEXT:    retq
   2244 ;
   2245 ; AVX512BW-LABEL: load_16i1:
   2246 ; AVX512BW:       ## %bb.0:
   2247 ; AVX512BW-NEXT:    kmovw (%rdi), %k1
   2248 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2249 ; AVX512BW-NEXT:    retq
   2250 ;
   2251 ; AVX512DQ-LABEL: load_16i1:
   2252 ; AVX512DQ:       ## %bb.0:
   2253 ; AVX512DQ-NEXT:    kmovw (%rdi), %k0
   2254 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
   2255 ; AVX512DQ-NEXT:    retq
   2256 ;
   2257 ; X86-LABEL: load_16i1:
   2258 ; X86:       ## %bb.0:
   2259 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2260 ; X86-NEXT:    kmovw (%eax), %k0
   2261 ; X86-NEXT:    vpmovm2d %k0, %zmm0
   2262 ; X86-NEXT:    retl
   2263   %b = load <16 x i1>, <16 x i1>* %a
   2264   %c = sext <16 x i1> %b to <16 x i32>
   2265   ret <16 x i32> %c
   2266 }
   2267 
   2268 define <2 x i16> @load_2i1(<2 x i1>* %a) {
   2269 ; KNL-LABEL: load_2i1:
   2270 ; KNL:       ## %bb.0:
   2271 ; KNL-NEXT:    movzbl (%rdi), %eax
   2272 ; KNL-NEXT:    kmovw %eax, %k1
   2273 ; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2274 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
   2275 ; KNL-NEXT:    vzeroupper
   2276 ; KNL-NEXT:    retq
   2277 ;
   2278 ; SKX-LABEL: load_2i1:
   2279 ; SKX:       ## %bb.0:
   2280 ; SKX-NEXT:    kmovb (%rdi), %k0
   2281 ; SKX-NEXT:    vpmovm2q %k0, %xmm0
   2282 ; SKX-NEXT:    retq
   2283 ;
   2284 ; AVX512BW-LABEL: load_2i1:
   2285 ; AVX512BW:       ## %bb.0:
   2286 ; AVX512BW-NEXT:    movzbl (%rdi), %eax
   2287 ; AVX512BW-NEXT:    kmovd %eax, %k1
   2288 ; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2289 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
   2290 ; AVX512BW-NEXT:    vzeroupper
   2291 ; AVX512BW-NEXT:    retq
   2292 ;
   2293 ; AVX512DQ-LABEL: load_2i1:
   2294 ; AVX512DQ:       ## %bb.0:
   2295 ; AVX512DQ-NEXT:    kmovb (%rdi), %k0
   2296 ; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
   2297 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
   2298 ; AVX512DQ-NEXT:    vzeroupper
   2299 ; AVX512DQ-NEXT:    retq
   2300 ;
   2301 ; X86-LABEL: load_2i1:
   2302 ; X86:       ## %bb.0:
   2303 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2304 ; X86-NEXT:    kmovb (%eax), %k0
   2305 ; X86-NEXT:    vpmovm2q %k0, %xmm0
   2306 ; X86-NEXT:    retl
   2307   %b = load <2 x i1>, <2 x i1>* %a
   2308   %c = sext <2 x i1> %b to <2 x i16>
   2309   ret <2 x i16> %c
   2310 }
   2311 
   2312 define <4 x i16> @load_4i1(<4 x i1>* %a) {
   2313 ; KNL-LABEL: load_4i1:
   2314 ; KNL:       ## %bb.0:
   2315 ; KNL-NEXT:    movzbl (%rdi), %eax
   2316 ; KNL-NEXT:    kmovw %eax, %k1
   2317 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2318 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
   2319 ; KNL-NEXT:    vzeroupper
   2320 ; KNL-NEXT:    retq
   2321 ;
   2322 ; SKX-LABEL: load_4i1:
   2323 ; SKX:       ## %bb.0:
   2324 ; SKX-NEXT:    kmovb (%rdi), %k0
   2325 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
   2326 ; SKX-NEXT:    retq
   2327 ;
   2328 ; AVX512BW-LABEL: load_4i1:
   2329 ; AVX512BW:       ## %bb.0:
   2330 ; AVX512BW-NEXT:    movzbl (%rdi), %eax
   2331 ; AVX512BW-NEXT:    kmovd %eax, %k1
   2332 ; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2333 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
   2334 ; AVX512BW-NEXT:    vzeroupper
   2335 ; AVX512BW-NEXT:    retq
   2336 ;
   2337 ; AVX512DQ-LABEL: load_4i1:
   2338 ; AVX512DQ:       ## %bb.0:
   2339 ; AVX512DQ-NEXT:    kmovb (%rdi), %k0
   2340 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
   2341 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
   2342 ; AVX512DQ-NEXT:    vzeroupper
   2343 ; AVX512DQ-NEXT:    retq
   2344 ;
   2345 ; X86-LABEL: load_4i1:
   2346 ; X86:       ## %bb.0:
   2347 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2348 ; X86-NEXT:    kmovb (%eax), %k0
   2349 ; X86-NEXT:    vpmovm2d %k0, %xmm0
   2350 ; X86-NEXT:    retl
   2351   %b = load <4 x i1>, <4 x i1>* %a
   2352   %c = sext <4 x i1> %b to <4 x i16>
   2353   ret <4 x i16> %c
   2354 }
   2355 
   2356 define <32 x i16> @load_32i1(<32 x i1>* %a) {
   2357 ; KNL-LABEL: load_32i1:
   2358 ; KNL:       ## %bb.0:
   2359 ; KNL-NEXT:    kmovw (%rdi), %k1
   2360 ; KNL-NEXT:    kmovw 2(%rdi), %k2
   2361 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2362 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0
   2363 ; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
   2364 ; KNL-NEXT:    vpmovdw %zmm1, %ymm1
   2365 ; KNL-NEXT:    retq
   2366 ;
   2367 ; SKX-LABEL: load_32i1:
   2368 ; SKX:       ## %bb.0:
   2369 ; SKX-NEXT:    kmovd (%rdi), %k0
   2370 ; SKX-NEXT:    vpmovm2w %k0, %zmm0
   2371 ; SKX-NEXT:    retq
   2372 ;
   2373 ; AVX512BW-LABEL: load_32i1:
   2374 ; AVX512BW:       ## %bb.0:
   2375 ; AVX512BW-NEXT:    kmovd (%rdi), %k0
   2376 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
   2377 ; AVX512BW-NEXT:    retq
   2378 ;
   2379 ; AVX512DQ-LABEL: load_32i1:
   2380 ; AVX512DQ:       ## %bb.0:
   2381 ; AVX512DQ-NEXT:    kmovw (%rdi), %k0
   2382 ; AVX512DQ-NEXT:    kmovw 2(%rdi), %k1
   2383 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
   2384 ; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
   2385 ; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm1
   2386 ; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
   2387 ; AVX512DQ-NEXT:    retq
   2388 ;
   2389 ; X86-LABEL: load_32i1:
   2390 ; X86:       ## %bb.0:
   2391 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2392 ; X86-NEXT:    kmovd (%eax), %k0
   2393 ; X86-NEXT:    vpmovm2w %k0, %zmm0
   2394 ; X86-NEXT:    retl
   2395   %b = load <32 x i1>, <32 x i1>* %a
   2396   %c = sext <32 x i1> %b to <32 x i16>
   2397   ret <32 x i16> %c
   2398 }
   2399 
   2400 define <64 x i8> @load_64i1(<64 x i1>* %a) {
   2401 ; KNL-LABEL: load_64i1:
   2402 ; KNL:       ## %bb.0:
   2403 ; KNL-NEXT:    kmovw (%rdi), %k1
   2404 ; KNL-NEXT:    kmovw 2(%rdi), %k2
   2405 ; KNL-NEXT:    kmovw 4(%rdi), %k3
   2406 ; KNL-NEXT:    kmovw 6(%rdi), %k4
   2407 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
   2408 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   2409 ; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
   2410 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
   2411 ; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2412 ; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
   2413 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
   2414 ; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k4} {z}
   2415 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
   2416 ; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2417 ; KNL-NEXT:    retq
   2418 ;
   2419 ; SKX-LABEL: load_64i1:
   2420 ; SKX:       ## %bb.0:
   2421 ; SKX-NEXT:    kmovq (%rdi), %k0
   2422 ; SKX-NEXT:    vpmovm2b %k0, %zmm0
   2423 ; SKX-NEXT:    retq
   2424 ;
   2425 ; AVX512BW-LABEL: load_64i1:
   2426 ; AVX512BW:       ## %bb.0:
   2427 ; AVX512BW-NEXT:    kmovq (%rdi), %k0
   2428 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
   2429 ; AVX512BW-NEXT:    retq
   2430 ;
   2431 ; AVX512DQ-LABEL: load_64i1:
   2432 ; AVX512DQ:       ## %bb.0:
   2433 ; AVX512DQ-NEXT:    kmovw (%rdi), %k0
   2434 ; AVX512DQ-NEXT:    kmovw 2(%rdi), %k1
   2435 ; AVX512DQ-NEXT:    kmovw 4(%rdi), %k2
   2436 ; AVX512DQ-NEXT:    kmovw 6(%rdi), %k3
   2437 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
   2438 ; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
   2439 ; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm1
   2440 ; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
   2441 ; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   2442 ; AVX512DQ-NEXT:    vpmovm2d %k2, %zmm1
   2443 ; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
   2444 ; AVX512DQ-NEXT:    vpmovm2d %k3, %zmm2
   2445 ; AVX512DQ-NEXT:    vpmovdb %zmm2, %xmm2
   2446 ; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
   2447 ; AVX512DQ-NEXT:    retq
   2448 ;
   2449 ; X86-LABEL: load_64i1:
   2450 ; X86:       ## %bb.0:
   2451 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2452 ; X86-NEXT:    kmovq (%eax), %k0
   2453 ; X86-NEXT:    vpmovm2b %k0, %zmm0
   2454 ; X86-NEXT:    retl
   2455   %b = load <64 x i1>, <64 x i1>* %a
   2456   %c = sext <64 x i1> %b to <64 x i8>
   2457   ret <64 x i8> %c
   2458 }
   2459 
   2460 define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
   2461 ; KNL-LABEL: store_8i1:
   2462 ; KNL:       ## %bb.0:
   2463 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   2464 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   2465 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
   2466 ; KNL-NEXT:    kmovw %k0, %eax
   2467 ; KNL-NEXT:    movb %al, (%rdi)
   2468 ; KNL-NEXT:    vzeroupper
   2469 ; KNL-NEXT:    retq
   2470 ;
   2471 ; SKX-LABEL: store_8i1:
   2472 ; SKX:       ## %bb.0:
   2473 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   2474 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
   2475 ; SKX-NEXT:    kmovb %k0, (%rdi)
   2476 ; SKX-NEXT:    retq
   2477 ;
   2478 ; AVX512BW-LABEL: store_8i1:
   2479 ; AVX512BW:       ## %bb.0:
   2480 ; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
   2481 ; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
   2482 ; AVX512BW-NEXT:    kmovd %k0, %eax
   2483 ; AVX512BW-NEXT:    movb %al, (%rdi)
   2484 ; AVX512BW-NEXT:    vzeroupper
   2485 ; AVX512BW-NEXT:    retq
   2486 ;
   2487 ; AVX512DQ-LABEL: store_8i1:
   2488 ; AVX512DQ:       ## %bb.0:
   2489 ; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
   2490 ; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
   2491 ; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
   2492 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
   2493 ; AVX512DQ-NEXT:    vzeroupper
   2494 ; AVX512DQ-NEXT:    retq
   2495 ;
   2496 ; X86-LABEL: store_8i1:
   2497 ; X86:       ## %bb.0:
   2498 ; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
   2499 ; X86-NEXT:    vpmovw2m %xmm0, %k0
   2500 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2501 ; X86-NEXT:    kmovb %k0, (%eax)
   2502 ; X86-NEXT:    retl
   2503   store <8 x i1> %v, <8 x i1>* %a
   2504   ret void
   2505 }
   2506 
   2507 define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
   2508 ; KNL-LABEL: store_8i1_1:
   2509 ; KNL:       ## %bb.0:
   2510 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   2511 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   2512 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
   2513 ; KNL-NEXT:    kmovw %k0, %eax
   2514 ; KNL-NEXT:    movb %al, (%rdi)
   2515 ; KNL-NEXT:    vzeroupper
   2516 ; KNL-NEXT:    retq
   2517 ;
   2518 ; SKX-LABEL: store_8i1_1:
   2519 ; SKX:       ## %bb.0:
   2520 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   2521 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
   2522 ; SKX-NEXT:    kmovb %k0, (%rdi)
   2523 ; SKX-NEXT:    retq
   2524 ;
   2525 ; AVX512BW-LABEL: store_8i1_1:
   2526 ; AVX512BW:       ## %bb.0:
   2527 ; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
   2528 ; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
   2529 ; AVX512BW-NEXT:    kmovd %k0, %eax
   2530 ; AVX512BW-NEXT:    movb %al, (%rdi)
   2531 ; AVX512BW-NEXT:    vzeroupper
   2532 ; AVX512BW-NEXT:    retq
   2533 ;
   2534 ; AVX512DQ-LABEL: store_8i1_1:
   2535 ; AVX512DQ:       ## %bb.0:
   2536 ; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
   2537 ; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
   2538 ; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
   2539 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
   2540 ; AVX512DQ-NEXT:    vzeroupper
   2541 ; AVX512DQ-NEXT:    retq
   2542 ;
   2543 ; X86-LABEL: store_8i1_1:
   2544 ; X86:       ## %bb.0:
   2545 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2546 ; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
   2547 ; X86-NEXT:    vpmovw2m %xmm0, %k0
   2548 ; X86-NEXT:    kmovb %k0, (%eax)
   2549 ; X86-NEXT:    retl
   2550   %v1 = trunc <8 x i16> %v to <8 x i1>
   2551   store <8 x i1> %v1, <8 x i1>* %a
   2552   ret void
   2553 }
   2554 
   2555 define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
   2556 ; KNL-LABEL: store_16i1:
   2557 ; KNL:       ## %bb.0:
   2558 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   2559 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   2560 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   2561 ; KNL-NEXT:    kmovw %k0, (%rdi)
   2562 ; KNL-NEXT:    vzeroupper
   2563 ; KNL-NEXT:    retq
   2564 ;
   2565 ; SKX-LABEL: store_16i1:
   2566 ; SKX:       ## %bb.0:
   2567 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
   2568 ; SKX-NEXT:    vpmovb2m %xmm0, %k0
   2569 ; SKX-NEXT:    kmovw %k0, (%rdi)
   2570 ; SKX-NEXT:    retq
   2571 ;
   2572 ; AVX512BW-LABEL: store_16i1:
   2573 ; AVX512BW:       ## %bb.0:
   2574 ; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
   2575 ; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
   2576 ; AVX512BW-NEXT:    kmovw %k0, (%rdi)
   2577 ; AVX512BW-NEXT:    vzeroupper
   2578 ; AVX512BW-NEXT:    retq
   2579 ;
   2580 ; AVX512DQ-LABEL: store_16i1:
   2581 ; AVX512DQ:       ## %bb.0:
   2582 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
   2583 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
   2584 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
   2585 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
   2586 ; AVX512DQ-NEXT:    vzeroupper
   2587 ; AVX512DQ-NEXT:    retq
   2588 ;
   2589 ; X86-LABEL: store_16i1:
   2590 ; X86:       ## %bb.0:
   2591 ; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
   2592 ; X86-NEXT:    vpmovb2m %xmm0, %k0
   2593 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2594 ; X86-NEXT:    kmovw %k0, (%eax)
   2595 ; X86-NEXT:    retl
   2596   store <16 x i1> %v, <16 x i1>* %a
   2597   ret void
   2598 }
   2599 
   2600 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
   2601 ; KNL-LABEL: store_32i1:
   2602 ; KNL:       ## %bb.0:
   2603 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm1
   2604 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
   2605 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
   2606 ; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
   2607 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   2608 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   2609 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
   2610 ; KNL-NEXT:    kmovw %k1, 2(%rdi)
   2611 ; KNL-NEXT:    kmovw %k0, (%rdi)
   2612 ; KNL-NEXT:    vzeroupper
   2613 ; KNL-NEXT:    retq
   2614 ;
   2615 ; SKX-LABEL: store_32i1:
   2616 ; SKX:       ## %bb.0:
   2617 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
   2618 ; SKX-NEXT:    vpmovb2m %ymm0, %k0
   2619 ; SKX-NEXT:    kmovd %k0, (%rdi)
   2620 ; SKX-NEXT:    vzeroupper
   2621 ; SKX-NEXT:    retq
   2622 ;
   2623 ; AVX512BW-LABEL: store_32i1:
   2624 ; AVX512BW:       ## %bb.0:
   2625 ; AVX512BW-NEXT:    vpsllw $7, %ymm0, %ymm0
   2626 ; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
   2627 ; AVX512BW-NEXT:    kmovd %k0, (%rdi)
   2628 ; AVX512BW-NEXT:    vzeroupper
   2629 ; AVX512BW-NEXT:    retq
   2630 ;
   2631 ; AVX512DQ-LABEL: store_32i1:
   2632 ; AVX512DQ:       ## %bb.0:
   2633 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm1
   2634 ; AVX512DQ-NEXT:    vpslld $31, %zmm1, %zmm1
   2635 ; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k0
   2636 ; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
   2637 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
   2638 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
   2639 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
   2640 ; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
   2641 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
   2642 ; AVX512DQ-NEXT:    vzeroupper
   2643 ; AVX512DQ-NEXT:    retq
   2644 ;
   2645 ; X86-LABEL: store_32i1:
   2646 ; X86:       ## %bb.0:
   2647 ; X86-NEXT:    vpsllw $7, %ymm0, %ymm0
   2648 ; X86-NEXT:    vpmovb2m %ymm0, %k0
   2649 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2650 ; X86-NEXT:    kmovd %k0, (%eax)
   2651 ; X86-NEXT:    vzeroupper
   2652 ; X86-NEXT:    retl
   2653   store <32 x i1> %v, <32 x i1>* %a
   2654   ret void
   2655 }
   2656 
   2657 define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
   2658 ; KNL-LABEL: store_32i1_1:
   2659 ; KNL:       ## %bb.0:
   2660 ; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
   2661 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   2662 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   2663 ; KNL-NEXT:    vpmovsxwd %ymm1, %zmm0
   2664 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   2665 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
   2666 ; KNL-NEXT:    kmovw %k1, 2(%rdi)
   2667 ; KNL-NEXT:    kmovw %k0, (%rdi)
   2668 ; KNL-NEXT:    vzeroupper
   2669 ; KNL-NEXT:    retq
   2670 ;
   2671 ; SKX-LABEL: store_32i1_1:
   2672 ; SKX:       ## %bb.0:
   2673 ; SKX-NEXT:    vpsllw $15, %zmm0, %zmm0
   2674 ; SKX-NEXT:    vpmovw2m %zmm0, %k0
   2675 ; SKX-NEXT:    kmovd %k0, (%rdi)
   2676 ; SKX-NEXT:    vzeroupper
   2677 ; SKX-NEXT:    retq
   2678 ;
   2679 ; AVX512BW-LABEL: store_32i1_1:
   2680 ; AVX512BW:       ## %bb.0:
   2681 ; AVX512BW-NEXT:    vpsllw $15, %zmm0, %zmm0
   2682 ; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
   2683 ; AVX512BW-NEXT:    kmovd %k0, (%rdi)
   2684 ; AVX512BW-NEXT:    vzeroupper
   2685 ; AVX512BW-NEXT:    retq
   2686 ;
   2687 ; AVX512DQ-LABEL: store_32i1_1:
   2688 ; AVX512DQ:       ## %bb.0:
   2689 ; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
   2690 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
   2691 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
   2692 ; AVX512DQ-NEXT:    vpmovsxwd %ymm1, %zmm0
   2693 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
   2694 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
   2695 ; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
   2696 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
   2697 ; AVX512DQ-NEXT:    vzeroupper
   2698 ; AVX512DQ-NEXT:    retq
   2699 ;
   2700 ; X86-LABEL: store_32i1_1:
   2701 ; X86:       ## %bb.0:
   2702 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2703 ; X86-NEXT:    vpsllw $15, %zmm0, %zmm0
   2704 ; X86-NEXT:    vpmovw2m %zmm0, %k0
   2705 ; X86-NEXT:    kmovd %k0, (%eax)
   2706 ; X86-NEXT:    vzeroupper
   2707 ; X86-NEXT:    retl
   2708   %v1 = trunc <32 x i16> %v to <32 x i1>
   2709   store <32 x i1> %v1, <32 x i1>* %a
   2710   ret void
   2711 }
   2712 
   2713 
   2714 define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
   2715 ;
   2716 ; KNL-LABEL: store_64i1:
   2717 ; KNL:       ## %bb.0:
   2718 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   2719 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   2720 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   2721 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm0
   2722 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   2723 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
   2724 ; KNL-NEXT:    vpmovsxbd %xmm2, %zmm0
   2725 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   2726 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k2
   2727 ; KNL-NEXT:    vpmovsxbd %xmm3, %zmm0
   2728 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   2729 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k3
   2730 ; KNL-NEXT:    kmovw %k3, 6(%rdi)
   2731 ; KNL-NEXT:    kmovw %k2, 4(%rdi)
   2732 ; KNL-NEXT:    kmovw %k1, 2(%rdi)
   2733 ; KNL-NEXT:    kmovw %k0, (%rdi)
   2734 ; KNL-NEXT:    vzeroupper
   2735 ; KNL-NEXT:    retq
   2736 ;
   2737 ; SKX-LABEL: store_64i1:
   2738 ; SKX:       ## %bb.0:
   2739 ; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
   2740 ; SKX-NEXT:    vpmovb2m %zmm0, %k0
   2741 ; SKX-NEXT:    kmovq %k0, (%rdi)
   2742 ; SKX-NEXT:    vzeroupper
   2743 ; SKX-NEXT:    retq
   2744 ;
   2745 ; AVX512BW-LABEL: store_64i1:
   2746 ; AVX512BW:       ## %bb.0:
   2747 ; AVX512BW-NEXT:    vpsllw $7, %zmm0, %zmm0
   2748 ; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
   2749 ; AVX512BW-NEXT:    kmovq %k0, (%rdi)
   2750 ; AVX512BW-NEXT:    vzeroupper
   2751 ; AVX512BW-NEXT:    retq
   2752 ;
   2753 ; AVX512DQ-LABEL: store_64i1:
   2754 ; AVX512DQ:       ## %bb.0:
   2755 ; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
   2756 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
   2757 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
   2758 ; AVX512DQ-NEXT:    vpmovsxbd %xmm1, %zmm0
   2759 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
   2760 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
   2761 ; AVX512DQ-NEXT:    vpmovsxbd %xmm2, %zmm0
   2762 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
   2763 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k2
   2764 ; AVX512DQ-NEXT:    vpmovsxbd %xmm3, %zmm0
   2765 ; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
   2766 ; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k3
   2767 ; AVX512DQ-NEXT:    kmovw %k3, 6(%rdi)
   2768 ; AVX512DQ-NEXT:    kmovw %k2, 4(%rdi)
   2769 ; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
   2770 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
   2771 ; AVX512DQ-NEXT:    vzeroupper
   2772 ; AVX512DQ-NEXT:    retq
   2773 ;
   2774 ; X86-LABEL: store_64i1:
   2775 ; X86:       ## %bb.0:
   2776 ; X86-NEXT:    vpsllw $7, %zmm0, %zmm0
   2777 ; X86-NEXT:    vpmovb2m %zmm0, %k0
   2778 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2779 ; X86-NEXT:    kmovq %k0, (%eax)
   2780 ; X86-NEXT:    vzeroupper
   2781 ; X86-NEXT:    retl
   2782   store <64 x i1> %v, <64 x i1>* %a
   2783   ret void
   2784 }
   2785 
   2786 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
   2787 ; KNL-LABEL: test_bitcast_v8i1_zext:
   2788 ; KNL:       ## %bb.0:
   2789 ; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   2790 ; KNL-NEXT:    kmovw %k0, %eax
   2791 ; KNL-NEXT:    movzbl %al, %eax
   2792 ; KNL-NEXT:    addl %eax, %eax
   2793 ; KNL-NEXT:    vzeroupper
   2794 ; KNL-NEXT:    retq
   2795 ;
   2796 ; SKX-LABEL: test_bitcast_v8i1_zext:
   2797 ; SKX:       ## %bb.0:
   2798 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   2799 ; SKX-NEXT:    kmovb %k0, %eax
   2800 ; SKX-NEXT:    addl %eax, %eax
   2801 ; SKX-NEXT:    vzeroupper
   2802 ; SKX-NEXT:    retq
   2803 ;
   2804 ; AVX512BW-LABEL: test_bitcast_v8i1_zext:
   2805 ; AVX512BW:       ## %bb.0:
   2806 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   2807 ; AVX512BW-NEXT:    kmovd %k0, %eax
   2808 ; AVX512BW-NEXT:    movzbl %al, %eax
   2809 ; AVX512BW-NEXT:    addl %eax, %eax
   2810 ; AVX512BW-NEXT:    vzeroupper
   2811 ; AVX512BW-NEXT:    retq
   2812 ;
   2813 ; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
   2814 ; AVX512DQ:       ## %bb.0:
   2815 ; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   2816 ; AVX512DQ-NEXT:    kmovb %k0, %eax
   2817 ; AVX512DQ-NEXT:    addl %eax, %eax
   2818 ; AVX512DQ-NEXT:    vzeroupper
   2819 ; AVX512DQ-NEXT:    retq
   2820 ;
   2821 ; X86-LABEL: test_bitcast_v8i1_zext:
   2822 ; X86:       ## %bb.0:
   2823 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   2824 ; X86-NEXT:    kmovb %k0, %eax
   2825 ; X86-NEXT:    addl %eax, %eax
   2826 ; X86-NEXT:    vzeroupper
   2827 ; X86-NEXT:    retl
   2828    %v1 = icmp eq <16 x i32> %a, zeroinitializer
   2829    %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2830    %mask1 = bitcast <8 x i1> %mask to i8
   2831    %val = zext i8 %mask1 to i32
   2832    %val1 = add i32 %val, %val
   2833    ret i32 %val1
   2834 }
   2835 
   2836 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
   2837 ; CHECK-LABEL: test_bitcast_v16i1_zext:
   2838 ; CHECK:       ## %bb.0:
   2839 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   2840 ; CHECK-NEXT:    kmovw %k0, %eax
   2841 ; CHECK-NEXT:    addl %eax, %eax
   2842 ; CHECK-NEXT:    vzeroupper
   2843 ; CHECK-NEXT:    retq
   2844 ;
   2845 ; X86-LABEL: test_bitcast_v16i1_zext:
   2846 ; X86:       ## %bb.0:
   2847 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   2848 ; X86-NEXT:    kmovw %k0, %eax
   2849 ; X86-NEXT:    addl %eax, %eax
   2850 ; X86-NEXT:    vzeroupper
   2851 ; X86-NEXT:    retl
   2852    %v1 = icmp eq <16 x i32> %a, zeroinitializer
   2853    %mask1 = bitcast <16 x i1> %v1 to i16
   2854    %val = zext i16 %mask1 to i32
   2855    %val1 = add i32 %val, %val
   2856    ret i32 %val1
   2857 }
   2858 
   2859 define i16 @test_v16i1_add(i16 %x, i16 %y) {
   2860 ; KNL-LABEL: test_v16i1_add:
   2861 ; KNL:       ## %bb.0:
   2862 ; KNL-NEXT:    kmovw %edi, %k0
   2863 ; KNL-NEXT:    kmovw %esi, %k1
   2864 ; KNL-NEXT:    kxorw %k1, %k0, %k0
   2865 ; KNL-NEXT:    kmovw %k0, %eax
   2866 ; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
   2867 ; KNL-NEXT:    retq
   2868 ;
   2869 ; SKX-LABEL: test_v16i1_add:
   2870 ; SKX:       ## %bb.0:
   2871 ; SKX-NEXT:    kmovd %edi, %k0
   2872 ; SKX-NEXT:    kmovd %esi, %k1
   2873 ; SKX-NEXT:    kxorw %k1, %k0, %k0
   2874 ; SKX-NEXT:    kmovd %k0, %eax
   2875 ; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
   2876 ; SKX-NEXT:    retq
   2877 ;
   2878 ; AVX512BW-LABEL: test_v16i1_add:
   2879 ; AVX512BW:       ## %bb.0:
   2880 ; AVX512BW-NEXT:    kmovd %edi, %k0
   2881 ; AVX512BW-NEXT:    kmovd %esi, %k1
   2882 ; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
   2883 ; AVX512BW-NEXT:    kmovd %k0, %eax
   2884 ; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
   2885 ; AVX512BW-NEXT:    retq
   2886 ;
   2887 ; AVX512DQ-LABEL: test_v16i1_add:
   2888 ; AVX512DQ:       ## %bb.0:
   2889 ; AVX512DQ-NEXT:    kmovw %edi, %k0
   2890 ; AVX512DQ-NEXT:    kmovw %esi, %k1
   2891 ; AVX512DQ-NEXT:    kxorw %k1, %k0, %k0
   2892 ; AVX512DQ-NEXT:    kmovw %k0, %eax
   2893 ; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
   2894 ; AVX512DQ-NEXT:    retq
   2895 ;
   2896 ; X86-LABEL: test_v16i1_add:
   2897 ; X86:       ## %bb.0:
   2898 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
   2899 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
   2900 ; X86-NEXT:    kxorw %k1, %k0, %k0
   2901 ; X86-NEXT:    kmovd %k0, %eax
   2902 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   2903 ; X86-NEXT:    retl
   2904   %m0 = bitcast i16 %x to <16 x i1>
   2905   %m1 = bitcast i16 %y to <16 x i1>
   2906   %m2 = add <16 x i1> %m0,  %m1
   2907   %ret = bitcast <16 x i1> %m2 to i16
   2908   ret i16 %ret
   2909 }
   2910 
   2911 define i16 @test_v16i1_sub(i16 %x, i16 %y) {
   2912 ; KNL-LABEL: test_v16i1_sub:
   2913 ; KNL:       ## %bb.0:
   2914 ; KNL-NEXT:    kmovw %edi, %k0
   2915 ; KNL-NEXT:    kmovw %esi, %k1
   2916 ; KNL-NEXT:    kxorw %k1, %k0, %k0
   2917 ; KNL-NEXT:    kmovw %k0, %eax
   2918 ; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
   2919 ; KNL-NEXT:    retq
   2920 ;
   2921 ; SKX-LABEL: test_v16i1_sub:
   2922 ; SKX:       ## %bb.0:
   2923 ; SKX-NEXT:    kmovd %edi, %k0
   2924 ; SKX-NEXT:    kmovd %esi, %k1
   2925 ; SKX-NEXT:    kxorw %k1, %k0, %k0
   2926 ; SKX-NEXT:    kmovd %k0, %eax
   2927 ; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
   2928 ; SKX-NEXT:    retq
   2929 ;
   2930 ; AVX512BW-LABEL: test_v16i1_sub:
   2931 ; AVX512BW:       ## %bb.0:
   2932 ; AVX512BW-NEXT:    kmovd %edi, %k0
   2933 ; AVX512BW-NEXT:    kmovd %esi, %k1
   2934 ; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
   2935 ; AVX512BW-NEXT:    kmovd %k0, %eax
   2936 ; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
   2937 ; AVX512BW-NEXT:    retq
   2938 ;
   2939 ; AVX512DQ-LABEL: test_v16i1_sub:
   2940 ; AVX512DQ:       ## %bb.0:
   2941 ; AVX512DQ-NEXT:    kmovw %edi, %k0
   2942 ; AVX512DQ-NEXT:    kmovw %esi, %k1
   2943 ; AVX512DQ-NEXT:    kxorw %k1, %k0, %k0
   2944 ; AVX512DQ-NEXT:    kmovw %k0, %eax
   2945 ; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
   2946 ; AVX512DQ-NEXT:    retq
   2947 ;
   2948 ; X86-LABEL: test_v16i1_sub:
   2949 ; X86:       ## %bb.0:
   2950 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
   2951 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
   2952 ; X86-NEXT:    kxorw %k1, %k0, %k0
   2953 ; X86-NEXT:    kmovd %k0, %eax
   2954 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   2955 ; X86-NEXT:    retl
   2956   %m0 = bitcast i16 %x to <16 x i1>
   2957   %m1 = bitcast i16 %y to <16 x i1>
   2958   %m2 = sub <16 x i1> %m0,  %m1
   2959   %ret = bitcast <16 x i1> %m2 to i16
   2960   ret i16 %ret
   2961 }
   2962 
   2963 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
   2964 ; KNL-LABEL: test_v16i1_mul:
   2965 ; KNL:       ## %bb.0:
   2966 ; KNL-NEXT:    kmovw %edi, %k0
   2967 ; KNL-NEXT:    kmovw %esi, %k1
   2968 ; KNL-NEXT:    kandw %k1, %k0, %k0
   2969 ; KNL-NEXT:    kmovw %k0, %eax
   2970 ; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
   2971 ; KNL-NEXT:    retq
   2972 ;
   2973 ; SKX-LABEL: test_v16i1_mul:
   2974 ; SKX:       ## %bb.0:
   2975 ; SKX-NEXT:    kmovd %edi, %k0
   2976 ; SKX-NEXT:    kmovd %esi, %k1
   2977 ; SKX-NEXT:    kandw %k1, %k0, %k0
   2978 ; SKX-NEXT:    kmovd %k0, %eax
   2979 ; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
   2980 ; SKX-NEXT:    retq
   2981 ;
   2982 ; AVX512BW-LABEL: test_v16i1_mul:
   2983 ; AVX512BW:       ## %bb.0:
   2984 ; AVX512BW-NEXT:    kmovd %edi, %k0
   2985 ; AVX512BW-NEXT:    kmovd %esi, %k1
   2986 ; AVX512BW-NEXT:    kandw %k1, %k0, %k0
   2987 ; AVX512BW-NEXT:    kmovd %k0, %eax
   2988 ; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
   2989 ; AVX512BW-NEXT:    retq
   2990 ;
   2991 ; AVX512DQ-LABEL: test_v16i1_mul:
   2992 ; AVX512DQ:       ## %bb.0:
   2993 ; AVX512DQ-NEXT:    kmovw %edi, %k0
   2994 ; AVX512DQ-NEXT:    kmovw %esi, %k1
   2995 ; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
   2996 ; AVX512DQ-NEXT:    kmovw %k0, %eax
   2997 ; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
   2998 ; AVX512DQ-NEXT:    retq
   2999 ;
   3000 ; X86-LABEL: test_v16i1_mul:
   3001 ; X86:       ## %bb.0:
   3002 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
   3003 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
   3004 ; X86-NEXT:    kandw %k1, %k0, %k0
   3005 ; X86-NEXT:    kmovd %k0, %eax
   3006 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   3007 ; X86-NEXT:    retl
   3008   %m0 = bitcast i16 %x to <16 x i1>
   3009   %m1 = bitcast i16 %y to <16 x i1>
   3010   %m2 = mul <16 x i1> %m0,  %m1
   3011   %ret = bitcast <16 x i1> %m2 to i16
   3012   ret i16 %ret
   3013 }
   3014 
   3015 define i8 @test_v8i1_add(i8 %x, i8 %y) {
   3016 ; KNL-LABEL: test_v8i1_add:
   3017 ; KNL:       ## %bb.0:
   3018 ; KNL-NEXT:    kmovw %edi, %k0
   3019 ; KNL-NEXT:    kmovw %esi, %k1
   3020 ; KNL-NEXT:    kxorw %k1, %k0, %k0
   3021 ; KNL-NEXT:    kmovw %k0, %eax
   3022 ; KNL-NEXT:    ## kill: def $al killed $al killed $eax
   3023 ; KNL-NEXT:    retq
   3024 ;
   3025 ; SKX-LABEL: test_v8i1_add:
   3026 ; SKX:       ## %bb.0:
   3027 ; SKX-NEXT:    kmovd %edi, %k0
   3028 ; SKX-NEXT:    kmovd %esi, %k1
   3029 ; SKX-NEXT:    kxorb %k1, %k0, %k0
   3030 ; SKX-NEXT:    kmovd %k0, %eax
   3031 ; SKX-NEXT:    ## kill: def $al killed $al killed $eax
   3032 ; SKX-NEXT:    retq
   3033 ;
   3034 ; AVX512BW-LABEL: test_v8i1_add:
   3035 ; AVX512BW:       ## %bb.0:
   3036 ; AVX512BW-NEXT:    kmovd %edi, %k0
   3037 ; AVX512BW-NEXT:    kmovd %esi, %k1
   3038 ; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
   3039 ; AVX512BW-NEXT:    kmovd %k0, %eax
   3040 ; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
   3041 ; AVX512BW-NEXT:    retq
   3042 ;
   3043 ; AVX512DQ-LABEL: test_v8i1_add:
   3044 ; AVX512DQ:       ## %bb.0:
   3045 ; AVX512DQ-NEXT:    kmovw %edi, %k0
   3046 ; AVX512DQ-NEXT:    kmovw %esi, %k1
   3047 ; AVX512DQ-NEXT:    kxorb %k1, %k0, %k0
   3048 ; AVX512DQ-NEXT:    kmovw %k0, %eax
   3049 ; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
   3050 ; AVX512DQ-NEXT:    retq
   3051 ;
   3052 ; X86-LABEL: test_v8i1_add:
   3053 ; X86:       ## %bb.0:
   3054 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
   3055 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
   3056 ; X86-NEXT:    kxorb %k1, %k0, %k0
   3057 ; X86-NEXT:    kmovd %k0, %eax
   3058 ; X86-NEXT:    ## kill: def $al killed $al killed $eax
   3059 ; X86-NEXT:    retl
   3060   %m0 = bitcast i8 %x to <8 x i1>
   3061   %m1 = bitcast i8 %y to <8 x i1>
   3062   %m2 = add <8 x i1> %m0,  %m1
   3063   %ret = bitcast <8 x i1> %m2 to i8
   3064   ret i8 %ret
   3065 }
   3066 
   3067 define i8 @test_v8i1_sub(i8 %x, i8 %y) {
   3068 ; KNL-LABEL: test_v8i1_sub:
   3069 ; KNL:       ## %bb.0:
   3070 ; KNL-NEXT:    kmovw %edi, %k0
   3071 ; KNL-NEXT:    kmovw %esi, %k1
   3072 ; KNL-NEXT:    kxorw %k1, %k0, %k0
   3073 ; KNL-NEXT:    kmovw %k0, %eax
   3074 ; KNL-NEXT:    ## kill: def $al killed $al killed $eax
   3075 ; KNL-NEXT:    retq
   3076 ;
   3077 ; SKX-LABEL: test_v8i1_sub:
   3078 ; SKX:       ## %bb.0:
   3079 ; SKX-NEXT:    kmovd %edi, %k0
   3080 ; SKX-NEXT:    kmovd %esi, %k1
   3081 ; SKX-NEXT:    kxorb %k1, %k0, %k0
   3082 ; SKX-NEXT:    kmovd %k0, %eax
   3083 ; SKX-NEXT:    ## kill: def $al killed $al killed $eax
   3084 ; SKX-NEXT:    retq
   3085 ;
   3086 ; AVX512BW-LABEL: test_v8i1_sub:
   3087 ; AVX512BW:       ## %bb.0:
   3088 ; AVX512BW-NEXT:    kmovd %edi, %k0
   3089 ; AVX512BW-NEXT:    kmovd %esi, %k1
   3090 ; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
   3091 ; AVX512BW-NEXT:    kmovd %k0, %eax
   3092 ; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
   3093 ; AVX512BW-NEXT:    retq
   3094 ;
   3095 ; AVX512DQ-LABEL: test_v8i1_sub:
   3096 ; AVX512DQ:       ## %bb.0:
   3097 ; AVX512DQ-NEXT:    kmovw %edi, %k0
   3098 ; AVX512DQ-NEXT:    kmovw %esi, %k1
   3099 ; AVX512DQ-NEXT:    kxorb %k1, %k0, %k0
   3100 ; AVX512DQ-NEXT:    kmovw %k0, %eax
   3101 ; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
   3102 ; AVX512DQ-NEXT:    retq
   3103 ;
   3104 ; X86-LABEL: test_v8i1_sub:
   3105 ; X86:       ## %bb.0:
   3106 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
   3107 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
   3108 ; X86-NEXT:    kxorb %k1, %k0, %k0
   3109 ; X86-NEXT:    kmovd %k0, %eax
   3110 ; X86-NEXT:    ## kill: def $al killed $al killed $eax
   3111 ; X86-NEXT:    retl
   3112   %m0 = bitcast i8 %x to <8 x i1>
   3113   %m1 = bitcast i8 %y to <8 x i1>
   3114   %m2 = sub <8 x i1> %m0,  %m1
   3115   %ret = bitcast <8 x i1> %m2 to i8
   3116   ret i8 %ret
   3117 }
   3118 
   3119 define i8 @test_v8i1_mul(i8 %x, i8 %y) {
   3120 ; KNL-LABEL: test_v8i1_mul:
   3121 ; KNL:       ## %bb.0:
   3122 ; KNL-NEXT:    kmovw %edi, %k0
   3123 ; KNL-NEXT:    kmovw %esi, %k1
   3124 ; KNL-NEXT:    kandw %k1, %k0, %k0
   3125 ; KNL-NEXT:    kmovw %k0, %eax
   3126 ; KNL-NEXT:    ## kill: def $al killed $al killed $eax
   3127 ; KNL-NEXT:    retq
   3128 ;
   3129 ; SKX-LABEL: test_v8i1_mul:
   3130 ; SKX:       ## %bb.0:
   3131 ; SKX-NEXT:    kmovd %edi, %k0
   3132 ; SKX-NEXT:    kmovd %esi, %k1
   3133 ; SKX-NEXT:    kandb %k1, %k0, %k0
   3134 ; SKX-NEXT:    kmovd %k0, %eax
   3135 ; SKX-NEXT:    ## kill: def $al killed $al killed $eax
   3136 ; SKX-NEXT:    retq
   3137 ;
   3138 ; AVX512BW-LABEL: test_v8i1_mul:
   3139 ; AVX512BW:       ## %bb.0:
   3140 ; AVX512BW-NEXT:    kmovd %edi, %k0
   3141 ; AVX512BW-NEXT:    kmovd %esi, %k1
   3142 ; AVX512BW-NEXT:    kandw %k1, %k0, %k0
   3143 ; AVX512BW-NEXT:    kmovd %k0, %eax
   3144 ; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
   3145 ; AVX512BW-NEXT:    retq
   3146 ;
   3147 ; AVX512DQ-LABEL: test_v8i1_mul:
   3148 ; AVX512DQ:       ## %bb.0:
   3149 ; AVX512DQ-NEXT:    kmovw %edi, %k0
   3150 ; AVX512DQ-NEXT:    kmovw %esi, %k1
   3151 ; AVX512DQ-NEXT:    kandb %k1, %k0, %k0
   3152 ; AVX512DQ-NEXT:    kmovw %k0, %eax
   3153 ; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
   3154 ; AVX512DQ-NEXT:    retq
   3155 ;
   3156 ; X86-LABEL: test_v8i1_mul:
   3157 ; X86:       ## %bb.0:
   3158 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
   3159 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
   3160 ; X86-NEXT:    kandb %k1, %k0, %k0
   3161 ; X86-NEXT:    kmovd %k0, %eax
   3162 ; X86-NEXT:    ## kill: def $al killed $al killed $eax
   3163 ; X86-NEXT:    retl
   3164   %m0 = bitcast i8 %x to <8 x i1>
   3165   %m1 = bitcast i8 %y to <8 x i1>
   3166   %m2 = mul <8 x i1> %m0,  %m1
   3167   %ret = bitcast <8 x i1> %m2 to i8
   3168   ret i8 %ret
   3169 }
   3170 
   3171 ; Make sure we don't emit a ktest for signed comparisons.
   3172 define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
   3173 ; KNL-LABEL: ktest_signed:
   3174 ; KNL:       ## %bb.0:
   3175 ; KNL-NEXT:    pushq %rax
   3176 ; KNL-NEXT:    .cfi_def_cfa_offset 16
   3177 ; KNL-NEXT:    vporq %zmm1, %zmm0, %zmm0
   3178 ; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   3179 ; KNL-NEXT:    kmovw %k0, %eax
   3180 ; KNL-NEXT:    testw %ax, %ax
   3181 ; KNL-NEXT:    jle LBB64_1
   3182 ; KNL-NEXT:  ## %bb.2: ## %bb.2
   3183 ; KNL-NEXT:    popq %rax
   3184 ; KNL-NEXT:    vzeroupper
   3185 ; KNL-NEXT:    retq
   3186 ; KNL-NEXT:  LBB64_1: ## %bb.1
   3187 ; KNL-NEXT:    vzeroupper
   3188 ; KNL-NEXT:    callq _foo
   3189 ; KNL-NEXT:    popq %rax
   3190 ; KNL-NEXT:    retq
   3191 ;
   3192 ; SKX-LABEL: ktest_signed:
   3193 ; SKX:       ## %bb.0:
   3194 ; SKX-NEXT:    pushq %rax
   3195 ; SKX-NEXT:    .cfi_def_cfa_offset 16
   3196 ; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0
   3197 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   3198 ; SKX-NEXT:    kmovd %k0, %eax
   3199 ; SKX-NEXT:    testw %ax, %ax
   3200 ; SKX-NEXT:    jle LBB64_1
   3201 ; SKX-NEXT:  ## %bb.2: ## %bb.2
   3202 ; SKX-NEXT:    popq %rax
   3203 ; SKX-NEXT:    vzeroupper
   3204 ; SKX-NEXT:    retq
   3205 ; SKX-NEXT:  LBB64_1: ## %bb.1
   3206 ; SKX-NEXT:    vzeroupper
   3207 ; SKX-NEXT:    callq _foo
   3208 ; SKX-NEXT:    popq %rax
   3209 ; SKX-NEXT:    retq
   3210 ;
   3211 ; AVX512BW-LABEL: ktest_signed:
   3212 ; AVX512BW:       ## %bb.0:
   3213 ; AVX512BW-NEXT:    pushq %rax
   3214 ; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
   3215 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
   3216 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   3217 ; AVX512BW-NEXT:    kmovd %k0, %eax
   3218 ; AVX512BW-NEXT:    testw %ax, %ax
   3219 ; AVX512BW-NEXT:    jle LBB64_1
   3220 ; AVX512BW-NEXT:  ## %bb.2: ## %bb.2
   3221 ; AVX512BW-NEXT:    popq %rax
   3222 ; AVX512BW-NEXT:    vzeroupper
   3223 ; AVX512BW-NEXT:    retq
   3224 ; AVX512BW-NEXT:  LBB64_1: ## %bb.1
   3225 ; AVX512BW-NEXT:    vzeroupper
   3226 ; AVX512BW-NEXT:    callq _foo
   3227 ; AVX512BW-NEXT:    popq %rax
   3228 ; AVX512BW-NEXT:    retq
   3229 ;
   3230 ; AVX512DQ-LABEL: ktest_signed:
   3231 ; AVX512DQ:       ## %bb.0:
   3232 ; AVX512DQ-NEXT:    pushq %rax
   3233 ; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
   3234 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
   3235 ; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   3236 ; AVX512DQ-NEXT:    kmovw %k0, %eax
   3237 ; AVX512DQ-NEXT:    testw %ax, %ax
   3238 ; AVX512DQ-NEXT:    jle LBB64_1
   3239 ; AVX512DQ-NEXT:  ## %bb.2: ## %bb.2
   3240 ; AVX512DQ-NEXT:    popq %rax
   3241 ; AVX512DQ-NEXT:    vzeroupper
   3242 ; AVX512DQ-NEXT:    retq
   3243 ; AVX512DQ-NEXT:  LBB64_1: ## %bb.1
   3244 ; AVX512DQ-NEXT:    vzeroupper
   3245 ; AVX512DQ-NEXT:    callq _foo
   3246 ; AVX512DQ-NEXT:    popq %rax
   3247 ; AVX512DQ-NEXT:    retq
   3248 ;
   3249 ; X86-LABEL: ktest_signed:
   3250 ; X86:       ## %bb.0:
   3251 ; X86-NEXT:    subl $12, %esp
   3252 ; X86-NEXT:    .cfi_def_cfa_offset 16
   3253 ; X86-NEXT:    vporq %zmm1, %zmm0, %zmm0
   3254 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   3255 ; X86-NEXT:    kmovd %k0, %eax
   3256 ; X86-NEXT:    testw %ax, %ax
   3257 ; X86-NEXT:    jle LBB64_1
   3258 ; X86-NEXT:  ## %bb.2: ## %bb.2
   3259 ; X86-NEXT:    addl $12, %esp
   3260 ; X86-NEXT:    vzeroupper
   3261 ; X86-NEXT:    retl
   3262 ; X86-NEXT:  LBB64_1: ## %bb.1
   3263 ; X86-NEXT:    vzeroupper
   3264 ; X86-NEXT:    calll _foo
   3265 ; X86-NEXT:    addl $12, %esp
   3266 ; X86-NEXT:    retl
   3267   %a = icmp eq <16 x i32> %x, zeroinitializer
   3268   %b = icmp eq <16 x i32> %y, zeroinitializer
   3269   %c = and <16 x i1> %a, %b
   3270   %d = bitcast <16 x i1> %c to i16
   3271   %e = icmp sgt i16 %d, 0
   3272   br i1 %e, label %bb.2, label %bb.1
   3273 bb.1:
   3274   call void @foo()
   3275   br label %bb.2
   3276 bb.2:
   3277   ret void
   3278 }
   3279 declare void @foo()
   3280 
   3281 ; Make sure we can use the C flag from kortest to check for all ones.
   3282 define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
   3283 ; CHECK-LABEL: ktest_allones:
   3284 ; CHECK:       ## %bb.0:
   3285 ; CHECK-NEXT:    pushq %rax
   3286 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
   3287 ; CHECK-NEXT:    vporq %zmm1, %zmm0, %zmm0
   3288 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   3289 ; CHECK-NEXT:    kortestw %k0, %k0
   3290 ; CHECK-NEXT:    jb LBB65_2
   3291 ; CHECK-NEXT:  ## %bb.1: ## %bb.1
   3292 ; CHECK-NEXT:    vzeroupper
   3293 ; CHECK-NEXT:    callq _foo
   3294 ; CHECK-NEXT:  LBB65_2: ## %bb.2
   3295 ; CHECK-NEXT:    popq %rax
   3296 ; CHECK-NEXT:    vzeroupper
   3297 ; CHECK-NEXT:    retq
   3298 ;
   3299 ; X86-LABEL: ktest_allones:
   3300 ; X86:       ## %bb.0:
   3301 ; X86-NEXT:    subl $12, %esp
   3302 ; X86-NEXT:    .cfi_def_cfa_offset 16
   3303 ; X86-NEXT:    vporq %zmm1, %zmm0, %zmm0
   3304 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
   3305 ; X86-NEXT:    kortestw %k0, %k0
   3306 ; X86-NEXT:    jb LBB65_2
   3307 ; X86-NEXT:  ## %bb.1: ## %bb.1
   3308 ; X86-NEXT:    vzeroupper
   3309 ; X86-NEXT:    calll _foo
   3310 ; X86-NEXT:  LBB65_2: ## %bb.2
   3311 ; X86-NEXT:    addl $12, %esp
   3312 ; X86-NEXT:    vzeroupper
   3313 ; X86-NEXT:    retl
   3314   %a = icmp eq <16 x i32> %x, zeroinitializer
   3315   %b = icmp eq <16 x i32> %y, zeroinitializer
   3316   %c = and <16 x i1> %a, %b
   3317   %d = bitcast <16 x i1> %c to i16
   3318   %e = icmp eq i16 %d, -1
   3319   br i1 %e, label %bb.2, label %bb.1
   3320 bb.1:
   3321   call void @foo()
   3322   br label %bb.2
   3323 bb.2:
   3324   ret void
   3325 }
   3326 
   3327 ; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask.
   3328 ; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this.
   3329 define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) {
   3330 ; KNL-LABEL: mask_widening:
   3331 ; KNL:       ## %bb.0: ## %entry
   3332 ; KNL-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
   3333 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
   3334 ; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
   3335 ; KNL-NEXT:    kshiftlw $12, %k0, %k0
   3336 ; KNL-NEXT:    kshiftrw $12, %k0, %k1
   3337 ; KNL-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
   3338 ; KNL-NEXT:    retq
   3339 ;
   3340 ; SKX-LABEL: mask_widening:
   3341 ; SKX:       ## %bb.0: ## %entry
   3342 ; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
   3343 ; SKX-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
   3344 ; SKX-NEXT:    retq
   3345 ;
   3346 ; AVX512BW-LABEL: mask_widening:
   3347 ; AVX512BW:       ## %bb.0: ## %entry
   3348 ; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
   3349 ; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
   3350 ; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
   3351 ; AVX512BW-NEXT:    kshiftlw $12, %k0, %k0
   3352 ; AVX512BW-NEXT:    kshiftrw $12, %k0, %k1
   3353 ; AVX512BW-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
   3354 ; AVX512BW-NEXT:    retq
   3355 ;
   3356 ; AVX512DQ-LABEL: mask_widening:
   3357 ; AVX512DQ:       ## %bb.0: ## %entry
   3358 ; AVX512DQ-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
   3359 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
   3360 ; AVX512DQ-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
   3361 ; AVX512DQ-NEXT:    kshiftlw $12, %k0, %k0
   3362 ; AVX512DQ-NEXT:    kshiftrw $12, %k0, %k1
   3363 ; AVX512DQ-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
   3364 ; AVX512DQ-NEXT:    retq
   3365 ;
   3366 ; X86-LABEL: mask_widening:
   3367 ; X86:       ## %bb.0: ## %entry
   3368 ; X86-NEXT:    pushl %ebp
   3369 ; X86-NEXT:    .cfi_def_cfa_offset 8
   3370 ; X86-NEXT:    .cfi_offset %ebp, -8
   3371 ; X86-NEXT:    movl %esp, %ebp
   3372 ; X86-NEXT:    .cfi_def_cfa_register %ebp
   3373 ; X86-NEXT:    andl $-64, %esp
   3374 ; X86-NEXT:    subl $64, %esp
   3375 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
   3376 ; X86-NEXT:    vmovdqa64 8(%ebp), %zmm0
   3377 ; X86-NEXT:    vmovdqa32 72(%ebp), %zmm0 {%k1}
   3378 ; X86-NEXT:    movl %ebp, %esp
   3379 ; X86-NEXT:    popl %ebp
   3380 ; X86-NEXT:    retl
   3381 entry:
   3382   %0 = bitcast <2 x i64> %a to <4 x i32>
   3383   %1 = bitcast <2 x i64> %b to <4 x i32>
   3384   %2 = icmp eq <4 x i32> %0, %1
   3385   %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   3386   %4 = bitcast <8 x i64> %f to <16 x i32>
   3387   %5 = bitcast <8 x i64> %e to <16 x i32>
   3388   %6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   3389   %7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5
   3390   %8 = bitcast <16 x i32> %7 to <8 x i64>
   3391   ret <8 x i64> %8
   3392 }
   3393 
   3394 define void @store_v64i1_constant(<64 x i1>* %R) {
   3395 ; CHECK-LABEL: store_v64i1_constant:
   3396 ; CHECK:       ## %bb.0: ## %entry
   3397 ; CHECK-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
   3398 ; CHECK-NEXT:    movq %rax, (%rdi)
   3399 ; CHECK-NEXT:    retq
   3400 ;
   3401 ; X86-LABEL: store_v64i1_constant:
   3402 ; X86:       ## %bb.0: ## %entry
   3403 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3404 ; X86-NEXT:    movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
   3405 ; X86-NEXT:    movl $-4099, (%eax) ## imm = 0xEFFD
   3406 ; X86-NEXT:    retl
   3407 entry:
   3408   store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R
   3409   ret void
   3410 }
   3411 
   3412 define void @store_v2i1_constant(<2 x i1>* %R) {
   3413 ; CHECK-LABEL: store_v2i1_constant:
   3414 ; CHECK:       ## %bb.0: ## %entry
   3415 ; CHECK-NEXT:    movb $1, (%rdi)
   3416 ; CHECK-NEXT:    retq
   3417 ;
   3418 ; X86-LABEL: store_v2i1_constant:
   3419 ; X86:       ## %bb.0: ## %entry
   3420 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3421 ; X86-NEXT:    movb $1, (%eax)
   3422 ; X86-NEXT:    retl
   3423 entry:
   3424   store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R
   3425   ret void
   3426 }
   3427 
   3428 define void @store_v4i1_constant(<4 x i1>* %R) {
   3429 ; CHECK-LABEL: store_v4i1_constant:
   3430 ; CHECK:       ## %bb.0: ## %entry
   3431 ; CHECK-NEXT:    movb $5, (%rdi)
   3432 ; CHECK-NEXT:    retq
   3433 ;
   3434 ; X86-LABEL: store_v4i1_constant:
   3435 ; X86:       ## %bb.0: ## %entry
   3436 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3437 ; X86-NEXT:    movb $5, (%eax)
   3438 ; X86-NEXT:    retl
   3439 entry:
   3440   store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R
   3441   ret void
   3442 }
   3443 
   3444 ; Make sure we bring the -1 constant into the mask domain.
   3445 define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
   3446 ; CHECK-LABEL: mask_not_cast:
   3447 ; CHECK:       ## %bb.0:
   3448 ; CHECK-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
   3449 ; CHECK-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
   3450 ; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1}
   3451 ; CHECK-NEXT:    vzeroupper
   3452 ; CHECK-NEXT:    retq
   3453 ;
   3454 ; X86-LABEL: mask_not_cast:
   3455 ; X86:       ## %bb.0:
   3456 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3457 ; X86-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
   3458 ; X86-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
   3459 ; X86-NEXT:    vmovdqu32 %zmm0, (%eax) {%k1}
   3460 ; X86-NEXT:    vzeroupper
   3461 ; X86-NEXT:    retl
   3462   %6 = and <8 x i64> %2, %1
   3463   %7 = bitcast <8 x i64> %6 to <16 x i32>
   3464   %8 = icmp ne <16 x i32> %7, zeroinitializer
   3465   %9 = bitcast <16 x i1> %8 to i16
   3466   %10 = bitcast <8 x i64> %3 to <16 x i32>
   3467   %11 = bitcast <8 x i64> %4 to <16 x i32>
   3468   %12 = icmp ule <16 x i32> %10, %11
   3469   %13 = bitcast <16 x i1> %12 to i16
   3470   %14 = xor i16 %13, -1
   3471   %15 = and i16 %14, %9
   3472   %16 = bitcast <8 x i64> %1 to <16 x i32>
   3473   %17 = bitcast i8* %0 to <16 x i32>*
   3474   %18 = bitcast i16 %15 to <16 x i1>
   3475   tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %16, <16 x i32>* %17, i32 1, <16 x i1> %18) #2
   3476   ret void
   3477 }
   3478 declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
   3479