Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
      3 ; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
      4 
      5 define i16 @mask16(i16 %x) {
      6 ; CHECK-LABEL: mask16:
      7 ; CHECK:       ## BB#0:
      8 ; CHECK-NEXT:    kmovw %edi, %k0
      9 ; CHECK-NEXT:    knotw %k0, %k0
     10 ; CHECK-NEXT:    kmovw %k0, %eax
     11 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
     12 ; CHECK-NEXT:    retq
     13   %m0 = bitcast i16 %x to <16 x i1>
     14   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
     15   %ret = bitcast <16 x i1> %m1 to i16
     16   ret i16 %ret
     17 }
     18 
     19 define i32 @mask16_zext(i16 %x) {
     20 ; CHECK-LABEL: mask16_zext:
     21 ; CHECK:       ## BB#0:
     22 ; CHECK-NEXT:    kmovw %edi, %k0
     23 ; CHECK-NEXT:    knotw %k0, %k0
     24 ; CHECK-NEXT:    kmovw %k0, %eax
     25 ; CHECK-NEXT:    retq
     26   %m0 = bitcast i16 %x to <16 x i1>
     27   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
     28   %m2 = bitcast <16 x i1> %m1 to i16
     29   %ret = zext i16 %m2 to i32
     30   ret i32 %ret
     31 }
     32 
     33 define i8 @mask8(i8 %x) {
     34 ; KNL-LABEL: mask8:
     35 ; KNL:       ## BB#0:
     36 ; KNL-NEXT:    kmovw %edi, %k0
     37 ; KNL-NEXT:    knotw %k0, %k0
     38 ; KNL-NEXT:    kmovw %k0, %eax
     39 ; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
     40 ; KNL-NEXT:    retq
     41 ;
     42 ; SKX-LABEL: mask8:
     43 ; SKX:       ## BB#0:
     44 ; SKX-NEXT:    kmovb %edi, %k0
     45 ; SKX-NEXT:    knotb %k0, %k0
     46 ; SKX-NEXT:    kmovb %k0, %eax
     47 ; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
     48 ; SKX-NEXT:    retq
     49   %m0 = bitcast i8 %x to <8 x i1>
     50   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
     51   %ret = bitcast <8 x i1> %m1 to i8
     52   ret i8 %ret
     53 }
     54 
     55 define i32 @mask8_zext(i8 %x) {
     56 ; KNL-LABEL: mask8_zext:
     57 ; KNL:       ## BB#0:
     58 ; KNL-NEXT:    kmovw %edi, %k0
     59 ; KNL-NEXT:    knotw %k0, %k0
     60 ; KNL-NEXT:    kmovw %k0, %eax
     61 ; KNL-NEXT:    retq
     62 ;
     63 ; SKX-LABEL: mask8_zext:
     64 ; SKX:       ## BB#0:
     65 ; SKX-NEXT:    kmovb %edi, %k0
     66 ; SKX-NEXT:    knotb %k0, %k0
     67 ; SKX-NEXT:    kmovb %k0, %eax
     68 ; SKX-NEXT:    retq
     69   %m0 = bitcast i8 %x to <8 x i1>
     70   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
     71   %m2 = bitcast <8 x i1> %m1 to i8
     72   %ret = zext i8 %m2 to i32
     73   ret i32 %ret
     74 }
     75 
     76 define void @mask16_mem(i16* %ptr) {
     77 ; CHECK-LABEL: mask16_mem:
     78 ; CHECK:       ## BB#0:
     79 ; CHECK-NEXT:    kmovw (%rdi), %k0
     80 ; CHECK-NEXT:    knotw %k0, %k0
     81 ; CHECK-NEXT:    kmovw %k0, (%rdi)
     82 ; CHECK-NEXT:    retq
     83   %x = load i16, i16* %ptr, align 4
     84   %m0 = bitcast i16 %x to <16 x i1>
     85   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
     86   %ret = bitcast <16 x i1> %m1 to i16
     87   store i16 %ret, i16* %ptr, align 4
     88   ret void
     89 }
     90 
     91 define void @mask8_mem(i8* %ptr) {
     92 ; KNL-LABEL: mask8_mem:
     93 ; KNL:       ## BB#0:
     94 ; KNL-NEXT:    movzbl (%rdi), %eax
     95 ; KNL-NEXT:    kmovw %eax, %k0
     96 ; KNL-NEXT:    knotw %k0, %k0
     97 ; KNL-NEXT:    kmovw %k0, %eax
     98 ; KNL-NEXT:    movb %al, (%rdi)
     99 ; KNL-NEXT:    retq
    100 ;
    101 ; SKX-LABEL: mask8_mem:
    102 ; SKX:       ## BB#0:
    103 ; SKX-NEXT:    kmovb (%rdi), %k0
    104 ; SKX-NEXT:    knotb %k0, %k0
    105 ; SKX-NEXT:    kmovb %k0, (%rdi)
    106 ; SKX-NEXT:    retq
    107   %x = load i8, i8* %ptr, align 4
    108   %m0 = bitcast i8 %x to <8 x i1>
    109   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
    110   %ret = bitcast <8 x i1> %m1 to i8
    111   store i8 %ret, i8* %ptr, align 4
    112   ret void
    113 }
    114 
    115 define i16 @mand16(i16 %x, i16 %y) {
    116 ; CHECK-LABEL: mand16:
    117 ; CHECK:       ## BB#0:
    118 ; CHECK-NEXT:    movl %edi, %eax
    119 ; CHECK-NEXT:    xorl %esi, %eax
    120 ; CHECK-NEXT:    andl %esi, %edi
    121 ; CHECK-NEXT:    orl %eax, %edi
    122 ; CHECK-NEXT:    movl %edi, %eax
    123 ; CHECK-NEXT:    retq
    124   %ma = bitcast i16 %x to <16 x i1>
    125   %mb = bitcast i16 %y to <16 x i1>
    126   %mc = and <16 x i1> %ma, %mb
    127   %md = xor <16 x i1> %ma, %mb
    128   %me = or <16 x i1> %mc, %md
    129   %ret = bitcast <16 x i1> %me to i16
    130   ret i16 %ret
    131 }
    132 
    133 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
    134 ; CHECK-LABEL: mand16_mem:
    135 ; CHECK:       ## BB#0:
    136 ; CHECK-NEXT:    kmovw (%rdi), %k0
    137 ; CHECK-NEXT:    kmovw (%rsi), %k1
    138 ; CHECK-NEXT:    kandw %k1, %k0, %k2
    139 ; CHECK-NEXT:    kxorw %k1, %k0, %k0
    140 ; CHECK-NEXT:    korw %k0, %k2, %k0
    141 ; CHECK-NEXT:    kmovw %k0, %eax
    142 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    143 ; CHECK-NEXT:    retq
    144   %ma = load <16 x i1>, <16 x i1>* %x
    145   %mb = load <16 x i1>, <16 x i1>* %y
    146   %mc = and <16 x i1> %ma, %mb
    147   %md = xor <16 x i1> %ma, %mb
    148   %me = or <16 x i1> %mc, %md
    149   %ret = bitcast <16 x i1> %me to i16
    150   ret i16 %ret
    151 }
    152 
    153 define i8 @shuf_test1(i16 %v) nounwind {
    154 ; KNL-LABEL: shuf_test1:
    155 ; KNL:       ## BB#0:
    156 ; KNL-NEXT:    kmovw %edi, %k0
    157 ; KNL-NEXT:    kshiftrw $8, %k0, %k0
    158 ; KNL-NEXT:    kmovw %k0, %eax
    159 ; KNL-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    160 ; KNL-NEXT:    retq
    161 ;
    162 ; SKX-LABEL: shuf_test1:
    163 ; SKX:       ## BB#0:
    164 ; SKX-NEXT:    kmovw %edi, %k0
    165 ; SKX-NEXT:    kshiftrw $8, %k0, %k0
    166 ; SKX-NEXT:    kmovb %k0, %eax
    167 ; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    168 ; SKX-NEXT:    retq
    169    %v1 = bitcast i16 %v to <16 x i1>
    170    %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    171    %mask1 = bitcast <8 x i1> %mask to i8
    172    ret i8 %mask1
    173 }
    174 
    175 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
    176 ; CHECK-LABEL: zext_test1:
    177 ; CHECK:       ## BB#0:
    178 ; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    179 ; CHECK-NEXT:    kshiftlw $10, %k0, %k0
    180 ; CHECK-NEXT:    kshiftrw $15, %k0, %k0
    181 ; CHECK-NEXT:    kmovw %k0, %eax
    182 ; CHECK-NEXT:    retq
    183   %cmp_res = icmp ugt <16 x i32> %a, %b
    184   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
    185   %res = zext i1 %cmp_res.i1 to i32
    186   ret i32 %res
    187 }
    188 
    189 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
    190 ; CHECK-LABEL: zext_test2:
    191 ; CHECK:       ## BB#0:
    192 ; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    193 ; CHECK-NEXT:    kshiftlw $10, %k0, %k0
    194 ; CHECK-NEXT:    kshiftrw $15, %k0, %k0
    195 ; CHECK-NEXT:    kmovw %k0, %eax
    196 ; CHECK-NEXT:    retq
    197   %cmp_res = icmp ugt <16 x i32> %a, %b
    198   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
    199   %res = zext i1 %cmp_res.i1 to i16
    200   ret i16 %res
    201 }
    202 
    203 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
    204 ; CHECK-LABEL: zext_test3:
    205 ; CHECK:       ## BB#0:
    206 ; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
    207 ; CHECK-NEXT:    kshiftlw $10, %k0, %k0
    208 ; CHECK-NEXT:    kshiftrw $15, %k0, %k0
    209 ; CHECK-NEXT:    kmovw %k0, %eax
    210 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %AX<kill>
    211 ; CHECK-NEXT:    retq
    212   %cmp_res = icmp ugt <16 x i32> %a, %b
    213   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
    214   %res = zext i1 %cmp_res.i1 to i8
    215   ret i8 %res
    216 }
    217 
    218 define i8 @conv1(<8 x i1>* %R) {
    219 ; KNL-LABEL: conv1:
    220 ; KNL:       ## BB#0: ## %entry
    221 ; KNL-NEXT:    kxnorw %k0, %k0, %k0
    222 ; KNL-NEXT:    kmovw %k0, %eax
    223 ; KNL-NEXT:    movb %al, (%rdi)
    224 ; KNL-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
    225 ; KNL-NEXT:    movb $-2, %al
    226 ; KNL-NEXT:    retq
    227 ;
    228 ; SKX-LABEL: conv1:
    229 ; SKX:       ## BB#0: ## %entry
    230 ; SKX-NEXT:    kxnorw %k0, %k0, %k0
    231 ; SKX-NEXT:    kmovb %k0, (%rdi)
    232 ; SKX-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
    233 ; SKX-NEXT:    movb $-2, %al
    234 ; SKX-NEXT:    retq
    235 entry:
    236   store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
    237 
    238   %maskPtr = alloca <8 x i1>
    239   store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
    240   %mask = load <8 x i1>, <8 x i1>* %maskPtr
    241   %mask_convert = bitcast <8 x i1> %mask to i8
    242   ret i8 %mask_convert
    243 }
    244 
    245 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
    246 ; KNL-LABEL: test4:
    247 ; KNL:       ## BB#0:
    248 ; KNL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
    249 ; KNL-NEXT:    vpmovqd %zmm0, %ymm0
    250 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
    251 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
    252 ; KNL-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm1
    253 ; KNL-NEXT:    vpmovqd %zmm1, %ymm1
    254 ; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
    255 ; KNL-NEXT:    vpsrad $31, %xmm1, %xmm1
    256 ; KNL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
    257 ; KNL-NEXT:    retq
    258 ;
    259 ; SKX-LABEL: test4:
    260 ; SKX:       ## BB#0:
    261 ; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0
    262 ; SKX-NEXT:    knotw %k0, %k1
    263 ; SKX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
    264 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
    265 ; SKX-NEXT:    retq
    266   %x_gt_y = icmp sgt <4 x i64> %x, %y
    267   %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
    268   %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
    269   %resse = sext <4 x i1>%res to <4 x i32>
    270   ret <4 x i32> %resse
    271 }
    272 
    273 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
    274 ; KNL-LABEL: test5:
    275 ; KNL:       ## BB#0:
    276 ; KNL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
    277 ; KNL-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm1
    278 ; KNL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
    279 ; KNL-NEXT:    retq
    280 ;
    281 ; SKX-LABEL: test5:
    282 ; SKX:       ## BB#0:
    283 ; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0
    284 ; SKX-NEXT:    knotw %k0, %k1
    285 ; SKX-NEXT:    vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
    286 ; SKX-NEXT:    vpmovm2q %k0, %xmm0
    287 ; SKX-NEXT:    retq
    288   %x_gt_y = icmp slt <2 x i64> %x, %y
    289   %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
    290   %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
    291   %resse = sext <2 x i1>%res to <2 x i64>
    292   ret <2 x i64> %resse
    293 }define void @test6(<16 x i1> %mask)  {
    294 allocas:
    295   %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
    296   %b = bitcast <16 x i1> %a to i16
    297   %c = icmp eq i16 %b, 0
    298   br i1 %c, label %true, label %false
    299 
    300 true:
    301   ret void
    302 
    303 false:
    304   ret void
    305 }
    306 define void @test7(<8 x i1> %mask)  {
    307 ; KNL-LABEL: test7:
    308 ; KNL:       ## BB#0: ## %allocas
    309 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    310 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    311 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
    312 ; KNL-NEXT:    movb $85, %al
    313 ; KNL-NEXT:    kmovw %eax, %k1
    314 ; KNL-NEXT:    korw %k1, %k0, %k0
    315 ; KNL-NEXT:    kmovw %k0, %eax
    316 ; KNL-NEXT:    testb %al, %al
    317 ; KNL-NEXT:    retq
    318 ;
    319 ; SKX-LABEL: test7:
    320 ; SKX:       ## BB#0: ## %allocas
    321 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    322 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
    323 ; SKX-NEXT:    movb $85, %al
    324 ; SKX-NEXT:    kmovb %eax, %k1
    325 ; SKX-NEXT:    korb %k1, %k0, %k0
    326 ; SKX-NEXT:    ktestb %k0, %k0
    327 ; SKX-NEXT:    retq
    328 allocas:
    329   %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
    330   %b = bitcast <8 x i1> %a to i8
    331   %c = icmp eq i8 %b, 0
    332   br i1 %c, label %true, label %false
    333 
    334 true:
    335   ret void
    336 
    337 false:
    338   ret void
    339 }
    340 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
    341 ; KNL-LABEL: test8:
    342 ; KNL:       ## BB#0:
    343 ; KNL-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    344 ; KNL-NEXT:    cmpl %esi, %edi
    345 ; KNL-NEXT:    jg LBB17_1
    346 ; KNL-NEXT:  ## BB#2:
    347 ; KNL-NEXT:    vpcmpltud %zmm2, %zmm1, %k1
    348 ; KNL-NEXT:    jmp LBB17_3
    349 ; KNL-NEXT:  LBB17_1:
    350 ; KNL-NEXT:    vpcmpgtd %zmm2, %zmm0, %k1
    351 ; KNL-NEXT:  LBB17_3:
    352 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
    353 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    354 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
    355 ; KNL-NEXT:    retq
    356 ;
    357 ; SKX-LABEL: test8:
    358 ; SKX:       ## BB#0:
    359 ; SKX-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    360 ; SKX-NEXT:    cmpl %esi, %edi
    361 ; SKX-NEXT:    jg LBB17_1
    362 ; SKX-NEXT:  ## BB#2:
    363 ; SKX-NEXT:    vpcmpltud %zmm2, %zmm1, %k0
    364 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
    365 ; SKX-NEXT:    retq
    366 ; SKX-NEXT:  LBB17_1:
    367 ; SKX-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0
    368 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
    369 ; SKX-NEXT:    retq
    370   %cond = icmp sgt i32 %a1, %b1
    371   %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
    372   %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
    373   %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
    374   %res = sext <16 x i1> %mix to <16 x i8>
    375   ret <16 x i8> %res
    376 }
    377 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
    378 ; KNL-LABEL: test9:
    379 ; KNL:       ## BB#0:
    380 ; KNL-NEXT:    cmpl %esi, %edi
    381 ; KNL-NEXT:    jg LBB18_1
    382 ; KNL-NEXT:  ## BB#2:
    383 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm0
    384 ; KNL-NEXT:    jmp LBB18_3
    385 ; KNL-NEXT:  LBB18_1:
    386 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    387 ; KNL-NEXT:  LBB18_3:
    388 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    389 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
    390 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
    391 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    392 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
    393 ; KNL-NEXT:    retq
    394 ;
    395 ; SKX-LABEL: test9:
    396 ; SKX:       ## BB#0:
    397 ; SKX-NEXT:    cmpl %esi, %edi
    398 ; SKX-NEXT:    jg LBB18_1
    399 ; SKX-NEXT:  ## BB#2:
    400 ; SKX-NEXT:    vpsllw $7, %xmm1, %xmm0
    401 ; SKX-NEXT:    jmp LBB18_3
    402 ; SKX-NEXT:  LBB18_1:
    403 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    404 ; SKX-NEXT:  LBB18_3:
    405 ; SKX-NEXT:    vpmovb2m %xmm0, %k0
    406 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
    407 ; SKX-NEXT:    retq
    408   %mask = icmp sgt i32 %a1, %b1
    409   %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
    410   ret <16 x i1>%c
    411 }define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
    412   %mask = icmp sgt i32 %a1, %b1
    413   %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
    414   ret <8 x i1>%c
    415 }
    416 
    417 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
    418 ; KNL-LABEL: test11:
    419 ; KNL:       ## BB#0:
    420 ; KNL-NEXT:    cmpl %esi, %edi
    421 ; KNL-NEXT:    jg LBB20_2
    422 ; KNL-NEXT:  ## BB#1:
    423 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    424 ; KNL-NEXT:  LBB20_2:
    425 ; KNL-NEXT:    retq
    426 ;
    427 ; SKX-LABEL: test11:
    428 ; SKX:       ## BB#0:
    429 ; SKX-NEXT:    cmpl %esi, %edi
    430 ; SKX-NEXT:    jg LBB20_1
    431 ; SKX-NEXT:  ## BB#2:
    432 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
    433 ; SKX-NEXT:    jmp LBB20_3
    434 ; SKX-NEXT:  LBB20_1:
    435 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    436 ; SKX-NEXT:  LBB20_3:
    437 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
    438 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
    439 ; SKX-NEXT:    retq
    440   %mask = icmp sgt i32 %a1, %b1
    441   %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
    442   ret <4 x i1>%c
    443 }
    444 
    445 define i32 @test12(i32 %x, i32 %y)  {
    446 ; CHECK-LABEL: test12:
    447 ; CHECK:       ## BB#0:
    448 ; CHECK-NEXT:    movl %edi, %eax
    449 ; CHECK-NEXT:    retq
    450   %a = bitcast i16 21845 to <16 x i1>
    451   %b = extractelement <16 x i1> %a, i32 0
    452   %c = select i1 %b, i32 %x, i32 %y
    453   ret i32 %c
    454 }
    455 
    456 define i32 @test13(i32 %x, i32 %y)  {
    457 ; CHECK-LABEL: test13:
    458 ; CHECK:       ## BB#0:
    459 ; CHECK-NEXT:    movl %esi, %eax
    460 ; CHECK-NEXT:    retq
    461   %a = bitcast i16 21845 to <16 x i1>
    462   %b = extractelement <16 x i1> %a, i32 3
    463   %c = select i1 %b, i32 %x, i32 %y
    464   ret i32 %c
    465 }define <4 x i1> @test14()  {
    466   %a = bitcast i16 21845 to <16 x i1>
    467   %b = extractelement <16 x i1> %a, i32 2
    468   %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
    469   ret <4 x i1> %c
    470 }
    471 
    472 define <16 x i1> @test15(i32 %x, i32 %y)  {
    473 ; KNL-LABEL: test15:
    474 ; KNL:       ## BB#0:
    475 ; KNL-NEXT:    cmpl %esi, %edi
    476 ; KNL-NEXT:    movw $21845, %ax ## imm = 0x5555
    477 ; KNL-NEXT:    movw $1, %cx
    478 ; KNL-NEXT:    cmovgw %ax, %cx
    479 ; KNL-NEXT:    kmovw %ecx, %k1
    480 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
    481 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    482 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
    483 ; KNL-NEXT:    retq
    484 ;
    485 ; SKX-LABEL: test15:
    486 ; SKX:       ## BB#0:
    487 ; SKX-NEXT:    cmpl %esi, %edi
    488 ; SKX-NEXT:    movw $21845, %ax ## imm = 0x5555
    489 ; SKX-NEXT:    movw $1, %cx
    490 ; SKX-NEXT:    cmovgw %ax, %cx
    491 ; SKX-NEXT:    kmovw %ecx, %k0
    492 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
    493 ; SKX-NEXT:    retq
    494   %a = bitcast i16 21845 to <16 x i1>
    495   %b = bitcast i16 1 to <16 x i1>
    496   %mask = icmp sgt i32 %x, %y
    497   %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
    498   ret <16 x i1> %c
    499 }
    500 
    501 define <64 x i8> @test16(i64 %x) {
    502 ;
    503 ; KNL-LABEL: test16:
    504 ; KNL:       ## BB#0:
    505 ; KNL-NEXT:    pushq %rbp
    506 ; KNL-NEXT:  Ltmp0:
    507 ; KNL-NEXT:    .cfi_def_cfa_offset 16
    508 ; KNL-NEXT:  Ltmp1:
    509 ; KNL-NEXT:    .cfi_offset %rbp, -16
    510 ; KNL-NEXT:    movq %rsp, %rbp
    511 ; KNL-NEXT:  Ltmp2:
    512 ; KNL-NEXT:    .cfi_def_cfa_register %rbp
    513 ; KNL-NEXT:    andq $-32, %rsp
    514 ; KNL-NEXT:    subq $64, %rsp
    515 ; KNL-NEXT:    movl %edi, (%rsp)
    516 ; KNL-NEXT:    shrq $32, %rdi
    517 ; KNL-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
    518 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
    519 ; KNL-NEXT:    kmovw (%rsp), %k1
    520 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} {z}
    521 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
    522 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
    523 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z}
    524 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
    525 ; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm2
    526 ; KNL-NEXT:    movl $1, %eax
    527 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
    528 ; KNL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7]
    529 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
    530 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} {z}
    531 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
    532 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
    533 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    534 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
    535 ; KNL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
    536 ; KNL-NEXT:    vpsllw $7, %ymm2, %ymm0
    537 ; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    538 ; KNL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
    539 ; KNL-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
    540 ; KNL-NEXT:    movq %rbp, %rsp
    541 ; KNL-NEXT:    popq %rbp
    542 ; KNL-NEXT:    retq
    543 ;
    544 ; SKX-LABEL: test16:
    545 ; SKX:       ## BB#0:
    546 ; SKX-NEXT:    kmovq %rdi, %k0
    547 ; SKX-NEXT:    kxnorw %k0, %k0, %k1
    548 ; SKX-NEXT:    kshiftrw $15, %k1, %k1
    549 ; SKX-NEXT:    kshiftlq $5, %k1, %k1
    550 ; SKX-NEXT:    korq %k1, %k0, %k0
    551 ; SKX-NEXT:    vpmovm2b %k0, %zmm0
    552 ; SKX-NEXT:    retq
    553   %a = bitcast i64 %x to <64 x i1>
    554   %b = insertelement <64 x i1>%a, i1 true, i32 5
    555   %c = sext <64 x i1>%b to <64 x i8>
    556   ret <64 x i8>%c
    557 }
    558 
    559 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
    560 ;
    561 ; KNL-LABEL: test17:
    562 ; KNL:       ## BB#0:
    563 ; KNL-NEXT:    pushq %rbp
    564 ; KNL-NEXT:  Ltmp3:
    565 ; KNL-NEXT:    .cfi_def_cfa_offset 16
    566 ; KNL-NEXT:  Ltmp4:
    567 ; KNL-NEXT:    .cfi_offset %rbp, -16
    568 ; KNL-NEXT:    movq %rsp, %rbp
    569 ; KNL-NEXT:  Ltmp5:
    570 ; KNL-NEXT:    .cfi_def_cfa_register %rbp
    571 ; KNL-NEXT:    andq $-32, %rsp
    572 ; KNL-NEXT:    subq $64, %rsp
    573 ; KNL-NEXT:    movl %edi, (%rsp)
    574 ; KNL-NEXT:    shrq $32, %rdi
    575 ; KNL-NEXT:    movl %edi, {{[0-9]+}}(%rsp)
    576 ; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
    577 ; KNL-NEXT:    kmovw (%rsp), %k1
    578 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
    579 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
    580 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
    581 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
    582 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
    583 ; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
    584 ; KNL-NEXT:    xorl %eax, %eax
    585 ; KNL-NEXT:    cmpl %edx, %esi
    586 ; KNL-NEXT:    setg %al
    587 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
    588 ; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
    589 ; KNL-NEXT:    vpsllw $7, %ymm0, %ymm0
    590 ; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    591 ; KNL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
    592 ; KNL-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
    593 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
    594 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
    595 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
    596 ; KNL-NEXT:    kmovw {{[0-9]+}}(%rsp), %k1
    597 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
    598 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
    599 ; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
    600 ; KNL-NEXT:    movq %rbp, %rsp
    601 ; KNL-NEXT:    popq %rbp
    602 ; KNL-NEXT:    retq
    603 ;
    604 ; SKX-LABEL: test17:
    605 ; SKX:       ## BB#0:
    606 ; SKX-NEXT:    kmovq %rdi, %k0
    607 ; SKX-NEXT:    cmpl %edx, %esi
    608 ; SKX-NEXT:    setg %al
    609 ; SKX-NEXT:    kmovw %eax, %k1
    610 ; SKX-NEXT:    kshiftlq $5, %k1, %k1
    611 ; SKX-NEXT:    korq %k1, %k0, %k0
    612 ; SKX-NEXT:    vpmovm2b %k0, %zmm0
    613 ; SKX-NEXT:    retq
    614   %a = bitcast i64 %x to <64 x i1>
    615   %b = icmp sgt i32 %y, %z
    616   %c = insertelement <64 x i1>%a, i1 %b, i32 5
    617   %d = sext <64 x i1>%c to <64 x i8>
    618   ret <64 x i8>%d
    619 }
    620 
    621 define <8 x i1> @test18(i8 %a, i16 %y) {
    622 ; KNL-LABEL: test18:
    623 ; KNL:       ## BB#0:
    624 ; KNL-NEXT:    kmovw %edi, %k0
    625 ; KNL-NEXT:    kmovw %esi, %k1
    626 ; KNL-NEXT:    kshiftlw $7, %k1, %k2
    627 ; KNL-NEXT:    kshiftrw $15, %k2, %k2
    628 ; KNL-NEXT:    kshiftlw $6, %k1, %k1
    629 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    630 ; KNL-NEXT:    kshiftlw $6, %k1, %k1
    631 ; KNL-NEXT:    korw %k1, %k0, %k0
    632 ; KNL-NEXT:    kshiftlw $7, %k2, %k1
    633 ; KNL-NEXT:    korw %k1, %k0, %k1
    634 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
    635 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    636 ; KNL-NEXT:    vpmovqw %zmm0, %xmm0
    637 ; KNL-NEXT:    retq
    638 ;
    639 ; SKX-LABEL: test18:
    640 ; SKX:       ## BB#0:
    641 ; SKX-NEXT:    kmovb %edi, %k0
    642 ; SKX-NEXT:    kmovw %esi, %k1
    643 ; SKX-NEXT:    kshiftlw $6, %k1, %k2
    644 ; SKX-NEXT:    kshiftrw $15, %k2, %k2
    645 ; SKX-NEXT:    kshiftlw $7, %k1, %k1
    646 ; SKX-NEXT:    kshiftrw $15, %k1, %k1
    647 ; SKX-NEXT:    kshiftlb $7, %k1, %k1
    648 ; SKX-NEXT:    kshiftlb $6, %k2, %k2
    649 ; SKX-NEXT:    korb %k2, %k0, %k0
    650 ; SKX-NEXT:    korb %k1, %k0, %k0
    651 ; SKX-NEXT:    vpmovm2w %k0, %xmm0
    652 ; SKX-NEXT:    retq
    653   %b = bitcast i8 %a to <8 x i1>
    654   %b1 = bitcast i16 %y to <16 x i1>
    655   %el1 = extractelement <16 x i1>%b1, i32 8
    656   %el2 = extractelement <16 x i1>%b1, i32 9
    657   %c = insertelement <8 x i1>%b, i1 %el1, i32 7
    658   %d = insertelement <8 x i1>%c, i1 %el2, i32 6
    659   ret <8 x i1>%d
    660 }
    661 define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
    662 ; KNL-LABEL: test21:
    663 ; KNL:       ## BB#0:
    664 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
    665 ; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
    666 ; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
    667 ; KNL-NEXT:    vpand %ymm0, %ymm3, %ymm0
    668 ; KNL-NEXT:    vextracti128 $1, %ymm2, %xmm2
    669 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
    670 ; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
    671 ; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
    672 ; KNL-NEXT:    vpand %ymm1, %ymm2, %ymm1
    673 ; KNL-NEXT:    retq
    674 ;
    675 ; SKX-LABEL: test21:
    676 ; SKX:       ## BB#0:
    677 ; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
    678 ; SKX-NEXT:    vpmovb2m %ymm1, %k1
    679 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
    680 ; SKX-NEXT:    retq
    681   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
    682   ret <32 x i16> %ret
    683 }
    684 
    685 define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
    686 ; KNL-LABEL: test22:
    687 ; KNL:       ## BB#0:
    688 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
    689 ; KNL-NEXT:    vpslld $31, %ymm0, %ymm0
    690 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    691 ; KNL-NEXT:    kmovw %k0, %eax
    692 ; KNL-NEXT:    movb %al, (%rdi)
    693 ; KNL-NEXT:    retq
    694 ;
    695 ; SKX-LABEL: test22:
    696 ; SKX:       ## BB#0:
    697 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    698 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
    699 ; SKX-NEXT:    kmovb %k0, (%rdi)
    700 ; SKX-NEXT:    retq
    701   store <4 x i1> %a, <4 x i1>* %addr
    702   ret void
    703 }
    704 
    705 define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
    706 ; KNL-LABEL: test23:
    707 ; KNL:       ## BB#0:
    708 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
    709 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    710 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
    711 ; KNL-NEXT:    kmovw %k0, %eax
    712 ; KNL-NEXT:    movb %al, (%rdi)
    713 ; KNL-NEXT:    retq
    714 ;
    715 ; SKX-LABEL: test23:
    716 ; SKX:       ## BB#0:
    717 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    718 ; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k0
    719 ; SKX-NEXT:    kmovb %k0, (%rdi)
    720 ; SKX-NEXT:    retq
    721   store <2 x i1> %a, <2 x i1>* %addr
    722   ret void
    723 }
    724 
    725 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
    726 ; KNL-LABEL: store_v1i1:
    727 ; KNL:       ## BB#0:
    728 ; KNL-NEXT:    andl $1, %edi
    729 ; KNL-NEXT:    kmovw %edi, %k0
    730 ; KNL-NEXT:    kxnorw %k0, %k0, %k1
    731 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    732 ; KNL-NEXT:    kxorw %k1, %k0, %k0
    733 ; KNL-NEXT:    kmovw %k0, %eax
    734 ; KNL-NEXT:    movb %al, (%rsi)
    735 ; KNL-NEXT:    retq
    736 ;
    737 ; SKX-LABEL: store_v1i1:
    738 ; SKX:       ## BB#0:
    739 ; SKX-NEXT:    andl $1, %edi
    740 ; SKX-NEXT:    kmovw %edi, %k0
    741 ; SKX-NEXT:    kxnorw %k0, %k0, %k1
    742 ; SKX-NEXT:    kshiftrw $15, %k1, %k1
    743 ; SKX-NEXT:    kxorw %k1, %k0, %k0
    744 ; SKX-NEXT:    kmovb %k0, (%rsi)
    745 ; SKX-NEXT:    retq
    746   %x = xor <1 x i1> %c, <i1 1>
    747   store <1 x i1> %x, <1 x i1>*  %ptr, align 4
    748   ret void
    749 }
    750 
    751 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
    752 ; KNL-LABEL: store_v2i1:
    753 ; KNL:       ## BB#0:
    754 ; KNL-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
    755 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    756 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
    757 ; KNL-NEXT:    kmovw %k0, %eax
    758 ; KNL-NEXT:    movb %al, (%rdi)
    759 ; KNL-NEXT:    retq
    760 ;
    761 ; SKX-LABEL: store_v2i1:
    762 ; SKX:       ## BB#0:
    763 ; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
    764 ; SKX-NEXT:    vptestmq %xmm0, %xmm0, %k0
    765 ; SKX-NEXT:    knotw %k0, %k0
    766 ; SKX-NEXT:    kmovb %k0, (%rdi)
    767 ; SKX-NEXT:    retq
    768   %x = xor <2 x i1> %c, <i1 1, i1 1>
    769   store <2 x i1> %x, <2 x i1>*  %ptr, align 4
    770   ret void
    771 }
    772 
    773 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
    774 ; KNL-LABEL: store_v4i1:
    775 ; KNL:       ## BB#0:
    776 ; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
    777 ; KNL-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    778 ; KNL-NEXT:    vpslld $31, %ymm0, %ymm0
    779 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    780 ; KNL-NEXT:    kmovw %k0, %eax
    781 ; KNL-NEXT:    movb %al, (%rdi)
    782 ; KNL-NEXT:    retq
    783 ;
    784 ; SKX-LABEL: store_v4i1:
    785 ; SKX:       ## BB#0:
    786 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
    787 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0
    788 ; SKX-NEXT:    knotw %k0, %k0
    789 ; SKX-NEXT:    kmovb %k0, (%rdi)
    790 ; SKX-NEXT:    retq
    791   %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
    792   store <4 x i1> %x, <4 x i1>*  %ptr, align 4
    793   ret void
    794 }
    795 
    796 define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
    797 ; KNL-LABEL: store_v8i1:
    798 ; KNL:       ## BB#0:
    799 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
    800 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
    801 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
    802 ; KNL-NEXT:    knotw %k0, %k0
    803 ; KNL-NEXT:    kmovw %k0, %eax
    804 ; KNL-NEXT:    movb %al, (%rdi)
    805 ; KNL-NEXT:    retq
    806 ;
    807 ; SKX-LABEL: store_v8i1:
    808 ; SKX:       ## BB#0:
    809 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
    810 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
    811 ; SKX-NEXT:    knotb %k0, %k0
    812 ; SKX-NEXT:    kmovb %k0, (%rdi)
    813 ; SKX-NEXT:    retq
    814   %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
    815   store <8 x i1> %x, <8 x i1>*  %ptr, align 4
    816   ret void
    817 }
    818 
    819 define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
    820 ; KNL-LABEL: store_v16i1:
    821 ; KNL:       ## BB#0:
    822 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    823 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    824 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    825 ; KNL-NEXT:    knotw %k0, %k0
    826 ; KNL-NEXT:    kmovw %k0, (%rdi)
    827 ; KNL-NEXT:    retq
    828 ;
    829 ; SKX-LABEL: store_v16i1:
    830 ; SKX:       ## BB#0:
    831 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
    832 ; SKX-NEXT:    vpmovb2m %xmm0, %k0
    833 ; SKX-NEXT:    knotw %k0, %k0
    834 ; SKX-NEXT:    kmovw %k0, (%rdi)
    835 ; SKX-NEXT:    retq
    836   %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
    837   store <16 x i1> %x, <16 x i1>*  %ptr, align 4
    838   ret void
    839 }
    840 
    841 ;void f2(int);
    842 ;void f1(int c)
    843 ;{
    844 ;  static int v = 0;
    845 ;  if (v == 0)
    846 ;    v = 1;
    847 ;  else
    848 ;    v = 0;
    849 ;  f2(v);
    850 ;}
    851 
    852 @f1.v = internal unnamed_addr global i1 false, align 4
    853 
    854 define void @f1(i32 %c) {
    855 ; KNL-LABEL: f1:
    856 ; KNL:       ## BB#0: ## %entry
    857 ; KNL-NEXT:    movzbl {{.*}}(%rip), %edi
    858 ; KNL-NEXT:    movl %edi, %eax
    859 ; KNL-NEXT:    andl $1, %eax
    860 ; KNL-NEXT:    kmovw %eax, %k0
    861 ; KNL-NEXT:    kxnorw %k0, %k0, %k1
    862 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    863 ; KNL-NEXT:    kxorw %k1, %k0, %k0
    864 ; KNL-NEXT:    kmovw %k0, %eax
    865 ; KNL-NEXT:    movb %al, {{.*}}(%rip)
    866 ; KNL-NEXT:    xorl $1, %edi
    867 ; KNL-NEXT:    jmp _f2 ## TAILCALL
    868 ;
    869 ; SKX-LABEL: f1:
    870 ; SKX:       ## BB#0: ## %entry
    871 ; SKX-NEXT:    movzbl {{.*}}(%rip), %edi
    872 ; SKX-NEXT:    movl %edi, %eax
    873 ; SKX-NEXT:    andl $1, %eax
    874 ; SKX-NEXT:    kmovw %eax, %k0
    875 ; SKX-NEXT:    kxnorw %k0, %k0, %k1
    876 ; SKX-NEXT:    kshiftrw $15, %k1, %k1
    877 ; SKX-NEXT:    kxorw %k1, %k0, %k0
    878 ; SKX-NEXT:    kmovb %k0, {{.*}}(%rip)
    879 ; SKX-NEXT:    xorl $1, %edi
    880 ; SKX-NEXT:    jmp _f2 ## TAILCALL
    881 entry:
    882   %.b1 = load i1, i1* @f1.v, align 4
    883   %not..b1 = xor i1 %.b1, true
    884   store i1 %not..b1, i1* @f1.v, align 4
    885   %0 = zext i1 %not..b1 to i32
    886   tail call void @f2(i32 %0) #2
    887   ret void
    888 }
    889 
    890 declare void @f2(i32) #1
    891 
    892 define void @store_i16_i1(i16 %x, i1 *%y) {
    893 ; CHECK-LABEL: store_i16_i1:
    894 ; CHECK:       ## BB#0:
    895 ; CHECK-NEXT:    andl $1, %edi
    896 ; CHECK-NEXT:    movb %dil, (%rsi)
    897 ; CHECK-NEXT:    retq
    898   %c = trunc i16 %x to i1
    899   store i1 %c, i1* %y
    900   ret void
    901 }
    902 
    903 define void @store_i8_i1(i8 %x, i1 *%y) {
    904 ; CHECK-LABEL: store_i8_i1:
    905 ; CHECK:       ## BB#0:
    906 ; CHECK-NEXT:    andl $1, %edi
    907 ; CHECK-NEXT:    movb %dil, (%rsi)
    908 ; CHECK-NEXT:    retq
    909   %c = trunc i8 %x to i1
    910   store i1 %c, i1* %y
    911   ret void
    912 }
    913 
    914 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
    915 ; KNL-LABEL: test_build_vec_v32i1:
    916 ; KNL:       ## BB#0:
    917 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    918 ; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
    919 ; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
    920 ; KNL-NEXT:    vpand %ymm0, %ymm2, %ymm0
    921 ; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
    922 ; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
    923 ; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
    924 ; KNL-NEXT:    vpand %ymm1, %ymm2, %ymm1
    925 ; KNL-NEXT:    retq
    926 ;
    927 ; SKX-LABEL: test_build_vec_v32i1:
    928 ; SKX:       ## BB#0:
    929 ; SKX-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
    930 ; SKX-NEXT:    kmovd %eax, %k1
    931 ; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
    932 ; SKX-NEXT:    retq
    933   %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
    934   ret <32 x i16> %ret
    935 }
    936 
    937 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
    938 ; KNL-LABEL: test_build_vec_v64i1:
    939 ; KNL:       ## BB#0:
    940 ; KNL-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    941 ; KNL-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
    942 ; KNL-NEXT:    retq
    943 ;
    944 ; SKX-LABEL: test_build_vec_v64i1:
    945 ; SKX:       ## BB#0:
    946 ; SKX-NEXT:    movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544
    947 ; SKX-NEXT:    kmovq %rax, %k1
    948 ; SKX-NEXT:    vmovdqu8 %zmm0, %zmm0 {%k1} {z}
    949 ; SKX-NEXT:    retq
    950   %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
    951   ret <64 x i8> %ret
    952 }
    953 
    954 define void @ktest_1(<8 x double> %in, double * %base) {
    955 ; KNL-LABEL: ktest_1:
    956 ; KNL:       ## BB#0:
    957 ; KNL-NEXT:    vmovupd (%rdi), %zmm1
    958 ; KNL-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
    959 ; KNL-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
    960 ; KNL-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
    961 ; KNL-NEXT:    kmovw %k0, %eax
    962 ; KNL-NEXT:    testb %al, %al
    963 ; KNL-NEXT:    je LBB41_2
    964 ; KNL-NEXT:  ## BB#1: ## %L1
    965 ; KNL-NEXT:    vmovapd %zmm0, (%rdi)
    966 ; KNL-NEXT:    retq
    967 ; KNL-NEXT:  LBB41_2: ## %L2
    968 ; KNL-NEXT:    vmovapd %zmm0, 8(%rdi)
    969 ; KNL-NEXT:    retq
    970 ;
    971 ; SKX-LABEL: ktest_1:
    972 ; SKX:       ## BB#0:
    973 ; SKX-NEXT:    vmovupd (%rdi), %zmm1
    974 ; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k1
    975 ; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
    976 ; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
    977 ; SKX-NEXT:    ktestb %k0, %k0
    978 ; SKX-NEXT:    je LBB41_2
    979 ; SKX-NEXT:  ## BB#1: ## %L1
    980 ; SKX-NEXT:    vmovapd %zmm0, (%rdi)
    981 ; SKX-NEXT:    retq
    982 ; SKX-NEXT:  LBB41_2: ## %L2
    983 ; SKX-NEXT:    vmovapd %zmm0, 8(%rdi)
    984 ; SKX-NEXT:    retq
    985   %addr1 = getelementptr double, double * %base, i64 0
    986   %addr2 = getelementptr double, double * %base, i64 1
    987 
    988   %vaddr1 = bitcast double* %addr1 to <8 x double>*
    989   %vaddr2 = bitcast double* %addr2 to <8 x double>*
    990 
    991   %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
    992   %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
    993 
    994   %sel1 = fcmp ogt <8 x double>%in, %val1
    995   %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
    996   %sel2 = fcmp olt <8 x double> %in, %val3
    997   %sel3 = and <8 x i1> %sel1, %sel2
    998 
    999   %int_sel3 = bitcast <8 x i1> %sel3 to i8
   1000   %res = icmp eq i8 %int_sel3, zeroinitializer
   1001   br i1 %res, label %L2, label %L1
   1002 L1:
   1003   store <8 x double> %in, <8 x double>* %vaddr1
   1004   br label %End
   1005 L2:
   1006   store <8 x double> %in, <8 x double>* %vaddr2
   1007   br label %End
   1008 End:
   1009   ret void
   1010 }
   1011 
   1012 define void @ktest_2(<32 x float> %in, float * %base) {
   1013 ;
   1014 ; KNL-LABEL: ktest_2:
   1015 ; KNL:       ## BB#0:
   1016 ; KNL-NEXT:    pushq %rbp
   1017 ; KNL-NEXT:  Ltmp6:
   1018 ; KNL-NEXT:    .cfi_def_cfa_offset 16
   1019 ; KNL-NEXT:  Ltmp7:
   1020 ; KNL-NEXT:    .cfi_offset %rbp, -16
   1021 ; KNL-NEXT:    movq %rsp, %rbp
   1022 ; KNL-NEXT:  Ltmp8:
   1023 ; KNL-NEXT:    .cfi_def_cfa_register %rbp
   1024 ; KNL-NEXT:    andq $-32, %rsp
   1025 ; KNL-NEXT:    subq $32, %rsp
   1026 ; KNL-NEXT:    vmovups (%rdi), %zmm2
   1027 ; KNL-NEXT:    vmovups 64(%rdi), %zmm3
   1028 ; KNL-NEXT:    vcmpltps %zmm1, %zmm3, %k1
   1029 ; KNL-NEXT:    kshiftlw $14, %k1, %k0
   1030 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1031 ; KNL-NEXT:    kmovw %k0, %eax
   1032 ; KNL-NEXT:    kshiftlw $15, %k1, %k0
   1033 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1034 ; KNL-NEXT:    kmovw %k0, %ecx
   1035 ; KNL-NEXT:    vmovd %ecx, %xmm3
   1036 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
   1037 ; KNL-NEXT:    kshiftlw $13, %k1, %k0
   1038 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1039 ; KNL-NEXT:    kmovw %k0, %eax
   1040 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
   1041 ; KNL-NEXT:    kshiftlw $12, %k1, %k0
   1042 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1043 ; KNL-NEXT:    kmovw %k0, %eax
   1044 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
   1045 ; KNL-NEXT:    kshiftlw $11, %k1, %k0
   1046 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1047 ; KNL-NEXT:    kmovw %k0, %eax
   1048 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
   1049 ; KNL-NEXT:    kshiftlw $10, %k1, %k0
   1050 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1051 ; KNL-NEXT:    kmovw %k0, %eax
   1052 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
   1053 ; KNL-NEXT:    kshiftlw $9, %k1, %k0
   1054 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1055 ; KNL-NEXT:    kmovw %k0, %eax
   1056 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
   1057 ; KNL-NEXT:    kshiftlw $8, %k1, %k0
   1058 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1059 ; KNL-NEXT:    kmovw %k0, %eax
   1060 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
   1061 ; KNL-NEXT:    kshiftlw $7, %k1, %k0
   1062 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1063 ; KNL-NEXT:    kmovw %k0, %eax
   1064 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
   1065 ; KNL-NEXT:    kshiftlw $6, %k1, %k0
   1066 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1067 ; KNL-NEXT:    kmovw %k0, %eax
   1068 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
   1069 ; KNL-NEXT:    kshiftlw $5, %k1, %k0
   1070 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1071 ; KNL-NEXT:    kmovw %k0, %eax
   1072 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
   1073 ; KNL-NEXT:    kshiftlw $4, %k1, %k0
   1074 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1075 ; KNL-NEXT:    kmovw %k0, %eax
   1076 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
   1077 ; KNL-NEXT:    kshiftlw $3, %k1, %k0
   1078 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1079 ; KNL-NEXT:    kmovw %k0, %eax
   1080 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
   1081 ; KNL-NEXT:    kshiftlw $2, %k1, %k0
   1082 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1083 ; KNL-NEXT:    kmovw %k0, %eax
   1084 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
   1085 ; KNL-NEXT:    kshiftlw $1, %k1, %k0
   1086 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1087 ; KNL-NEXT:    kmovw %k0, %eax
   1088 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
   1089 ; KNL-NEXT:    kshiftlw $0, %k1, %k0
   1090 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1091 ; KNL-NEXT:    kmovw %k0, %eax
   1092 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm3
   1093 ; KNL-NEXT:    vcmpltps %zmm0, %zmm2, %k2
   1094 ; KNL-NEXT:    kshiftlw $14, %k2, %k0
   1095 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1096 ; KNL-NEXT:    kmovw %k0, %eax
   1097 ; KNL-NEXT:    kshiftlw $15, %k2, %k0
   1098 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1099 ; KNL-NEXT:    kmovw %k0, %ecx
   1100 ; KNL-NEXT:    vmovd %ecx, %xmm2
   1101 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
   1102 ; KNL-NEXT:    kshiftlw $13, %k2, %k0
   1103 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1104 ; KNL-NEXT:    kmovw %k0, %eax
   1105 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
   1106 ; KNL-NEXT:    kshiftlw $12, %k2, %k0
   1107 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1108 ; KNL-NEXT:    kmovw %k0, %eax
   1109 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
   1110 ; KNL-NEXT:    kshiftlw $11, %k2, %k0
   1111 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1112 ; KNL-NEXT:    kmovw %k0, %eax
   1113 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
   1114 ; KNL-NEXT:    kshiftlw $10, %k2, %k0
   1115 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1116 ; KNL-NEXT:    kmovw %k0, %eax
   1117 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
   1118 ; KNL-NEXT:    kshiftlw $9, %k2, %k0
   1119 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1120 ; KNL-NEXT:    kmovw %k0, %eax
   1121 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
   1122 ; KNL-NEXT:    kshiftlw $8, %k2, %k0
   1123 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1124 ; KNL-NEXT:    kmovw %k0, %eax
   1125 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
   1126 ; KNL-NEXT:    kshiftlw $7, %k2, %k0
   1127 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1128 ; KNL-NEXT:    kmovw %k0, %eax
   1129 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
   1130 ; KNL-NEXT:    kshiftlw $6, %k2, %k0
   1131 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1132 ; KNL-NEXT:    kmovw %k0, %eax
   1133 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
   1134 ; KNL-NEXT:    kshiftlw $5, %k2, %k0
   1135 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1136 ; KNL-NEXT:    kmovw %k0, %eax
   1137 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
   1138 ; KNL-NEXT:    kshiftlw $4, %k2, %k0
   1139 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1140 ; KNL-NEXT:    kmovw %k0, %eax
   1141 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
   1142 ; KNL-NEXT:    kshiftlw $3, %k2, %k0
   1143 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1144 ; KNL-NEXT:    kmovw %k0, %eax
   1145 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
   1146 ; KNL-NEXT:    kshiftlw $2, %k2, %k0
   1147 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1148 ; KNL-NEXT:    kmovw %k0, %eax
   1149 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
   1150 ; KNL-NEXT:    kshiftlw $1, %k2, %k0
   1151 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1152 ; KNL-NEXT:    kmovw %k0, %eax
   1153 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
   1154 ; KNL-NEXT:    kshiftlw $0, %k2, %k0
   1155 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1156 ; KNL-NEXT:    kmovw %k0, %eax
   1157 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm2
   1158 ; KNL-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
   1159 ; KNL-NEXT:    vpsllw $7, %ymm2, %ymm2
   1160 ; KNL-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
   1161 ; KNL-NEXT:    vpxor %ymm3, %ymm3, %ymm3
   1162 ; KNL-NEXT:    vpcmpgtb %ymm2, %ymm3, %ymm2
   1163 ; KNL-NEXT:    vmovups 4(%rdi), %zmm3 {%k2} {z}
   1164 ; KNL-NEXT:    vmovups 68(%rdi), %zmm4 {%k1} {z}
   1165 ; KNL-NEXT:    vcmpltps %zmm4, %zmm1, %k0
   1166 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
   1167 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1168 ; KNL-NEXT:    kmovw %k1, %eax
   1169 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
   1170 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1171 ; KNL-NEXT:    kmovw %k1, %ecx
   1172 ; KNL-NEXT:    vmovd %ecx, %xmm4
   1173 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm4, %xmm4
   1174 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
   1175 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1176 ; KNL-NEXT:    kmovw %k1, %eax
   1177 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm4, %xmm4
   1178 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
   1179 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1180 ; KNL-NEXT:    kmovw %k1, %eax
   1181 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm4, %xmm4
   1182 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
   1183 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1184 ; KNL-NEXT:    kmovw %k1, %eax
   1185 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm4, %xmm4
   1186 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
   1187 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1188 ; KNL-NEXT:    kmovw %k1, %eax
   1189 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm4, %xmm4
   1190 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
   1191 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1192 ; KNL-NEXT:    kmovw %k1, %eax
   1193 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm4, %xmm4
   1194 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
   1195 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1196 ; KNL-NEXT:    kmovw %k1, %eax
   1197 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm4, %xmm4
   1198 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
   1199 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1200 ; KNL-NEXT:    kmovw %k1, %eax
   1201 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm4, %xmm4
   1202 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
   1203 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1204 ; KNL-NEXT:    kmovw %k1, %eax
   1205 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm4, %xmm4
   1206 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
   1207 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1208 ; KNL-NEXT:    kmovw %k1, %eax
   1209 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm4, %xmm4
   1210 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
   1211 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1212 ; KNL-NEXT:    kmovw %k1, %eax
   1213 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm4, %xmm4
   1214 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
   1215 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1216 ; KNL-NEXT:    kmovw %k1, %eax
   1217 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm4, %xmm4
   1218 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
   1219 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1220 ; KNL-NEXT:    kmovw %k1, %eax
   1221 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm4, %xmm4
   1222 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
   1223 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1224 ; KNL-NEXT:    kmovw %k1, %eax
   1225 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm4, %xmm4
   1226 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
   1227 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1228 ; KNL-NEXT:    kmovw %k0, %eax
   1229 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm4, %xmm4
   1230 ; KNL-NEXT:    vcmpltps %zmm3, %zmm0, %k0
   1231 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
   1232 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1233 ; KNL-NEXT:    kmovw %k1, %eax
   1234 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
   1235 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1236 ; KNL-NEXT:    kmovw %k1, %ecx
   1237 ; KNL-NEXT:    vmovd %ecx, %xmm3
   1238 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm3, %xmm3
   1239 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
   1240 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1241 ; KNL-NEXT:    kmovw %k1, %eax
   1242 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm3, %xmm3
   1243 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
   1244 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1245 ; KNL-NEXT:    kmovw %k1, %eax
   1246 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm3, %xmm3
   1247 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
   1248 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1249 ; KNL-NEXT:    kmovw %k1, %eax
   1250 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm3, %xmm3
   1251 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
   1252 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1253 ; KNL-NEXT:    kmovw %k1, %eax
   1254 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm3, %xmm3
   1255 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
   1256 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1257 ; KNL-NEXT:    kmovw %k1, %eax
   1258 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm3, %xmm3
   1259 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
   1260 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1261 ; KNL-NEXT:    kmovw %k1, %eax
   1262 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm3, %xmm3
   1263 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
   1264 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1265 ; KNL-NEXT:    kmovw %k1, %eax
   1266 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm3, %xmm3
   1267 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
   1268 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1269 ; KNL-NEXT:    kmovw %k1, %eax
   1270 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm3, %xmm3
   1271 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
   1272 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1273 ; KNL-NEXT:    kmovw %k1, %eax
   1274 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm3, %xmm3
   1275 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
   1276 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1277 ; KNL-NEXT:    kmovw %k1, %eax
   1278 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm3, %xmm3
   1279 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
   1280 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1281 ; KNL-NEXT:    kmovw %k1, %eax
   1282 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm3, %xmm3
   1283 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
   1284 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1285 ; KNL-NEXT:    kmovw %k1, %eax
   1286 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm3, %xmm3
   1287 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
   1288 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1289 ; KNL-NEXT:    kmovw %k1, %eax
   1290 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm3
   1291 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
   1292 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1293 ; KNL-NEXT:    kmovw %k0, %eax
   1294 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm3, %xmm3
   1295 ; KNL-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
   1296 ; KNL-NEXT:    vpor %ymm3, %ymm2, %ymm2
   1297 ; KNL-NEXT:    vextracti128 $1, %ymm2, %xmm3
   1298 ; KNL-NEXT:    vpmovsxbd %xmm3, %zmm3
   1299 ; KNL-NEXT:    vpslld $31, %zmm3, %zmm3
   1300 ; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k0
   1301 ; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
   1302 ; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
   1303 ; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
   1304 ; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k0
   1305 ; KNL-NEXT:    kmovw %k0, (%rsp)
   1306 ; KNL-NEXT:    cmpl $0, (%rsp)
   1307 ; KNL-NEXT:    je LBB42_2
   1308 ; KNL-NEXT:  ## BB#1: ## %L1
   1309 ; KNL-NEXT:    vmovaps %zmm0, (%rdi)
   1310 ; KNL-NEXT:    vmovaps %zmm1, 64(%rdi)
   1311 ; KNL-NEXT:    jmp LBB42_3
   1312 ; KNL-NEXT:  LBB42_2: ## %L2
   1313 ; KNL-NEXT:    vmovaps %zmm0, 4(%rdi)
   1314 ; KNL-NEXT:    vmovaps %zmm1, 68(%rdi)
   1315 ; KNL-NEXT:  LBB42_3: ## %End
   1316 ; KNL-NEXT:    movq %rbp, %rsp
   1317 ; KNL-NEXT:    popq %rbp
   1318 ; KNL-NEXT:    retq
   1319 ;
   1320 ; SKX-LABEL: ktest_2:
   1321 ; SKX:       ## BB#0:
   1322 ; SKX-NEXT:    vmovups 64(%rdi), %zmm2
   1323 ; SKX-NEXT:    vmovups (%rdi), %zmm3
   1324 ; SKX-NEXT:    vcmpltps %zmm0, %zmm3, %k1
   1325 ; SKX-NEXT:    vcmpltps %zmm1, %zmm2, %k2
   1326 ; SKX-NEXT:    kunpckwd %k1, %k2, %k0
   1327 ; SKX-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
   1328 ; SKX-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
   1329 ; SKX-NEXT:    vcmpltps %zmm3, %zmm0, %k1
   1330 ; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2
   1331 ; SKX-NEXT:    kunpckwd %k1, %k2, %k1
   1332 ; SKX-NEXT:    kord %k1, %k0, %k0
   1333 ; SKX-NEXT:    ktestd %k0, %k0
   1334 ; SKX-NEXT:    je LBB42_2
   1335 ; SKX-NEXT:  ## BB#1: ## %L1
   1336 ; SKX-NEXT:    vmovaps %zmm0, (%rdi)
   1337 ; SKX-NEXT:    vmovaps %zmm1, 64(%rdi)
   1338 ; SKX-NEXT:    retq
   1339 ; SKX-NEXT:  LBB42_2: ## %L2
   1340 ; SKX-NEXT:    vmovaps %zmm0, 4(%rdi)
   1341 ; SKX-NEXT:    vmovaps %zmm1, 68(%rdi)
   1342 ; SKX-NEXT:    retq
   1343   %addr1 = getelementptr float, float * %base, i64 0
   1344   %addr2 = getelementptr float, float * %base, i64 1
   1345 
   1346   %vaddr1 = bitcast float* %addr1 to <32 x float>*
   1347   %vaddr2 = bitcast float* %addr2 to <32 x float>*
   1348 
   1349   %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
   1350   %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
   1351 
   1352   %sel1 = fcmp ogt <32 x float>%in, %val1
   1353   %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
   1354   %sel2 = fcmp olt <32 x float> %in, %val3
   1355   %sel3 = or <32 x i1> %sel1, %sel2
   1356 
   1357   %int_sel3 = bitcast <32 x i1> %sel3 to i32
   1358   %res = icmp eq i32 %int_sel3, zeroinitializer
   1359   br i1 %res, label %L2, label %L1
   1360 L1:
   1361   store <32 x float> %in, <32 x float>* %vaddr1
   1362   br label %End
   1363 L2:
   1364   store <32 x float> %in, <32 x float>* %vaddr2
   1365   br label %End
   1366 End:
   1367   ret void
   1368 }
   1369 
   1370 define <8 x i64> @load_8i1(<8 x i1>* %a) {
   1371 ; KNL-LABEL: load_8i1:
   1372 ; KNL:       ## BB#0:
   1373 ; KNL-NEXT:    movzbl (%rdi), %eax
   1374 ; KNL-NEXT:    kmovw %eax, %k1
   1375 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
   1376 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
   1377 ; KNL-NEXT:    retq
   1378 ;
   1379 ; SKX-LABEL: load_8i1:
   1380 ; SKX:       ## BB#0:
   1381 ; SKX-NEXT:    kmovb (%rdi), %k0
   1382 ; SKX-NEXT:    vpmovm2q %k0, %zmm0
   1383 ; SKX-NEXT:    retq
   1384   %b = load <8 x i1>, <8 x i1>* %a
   1385   %c = sext <8 x i1> %b to <8 x i64>
   1386   ret <8 x i64> %c
   1387 }
   1388 
   1389 define <16 x i32> @load_16i1(<16 x i1>* %a) {
   1390 ; KNL-LABEL: load_16i1:
   1391 ; KNL:       ## BB#0:
   1392 ; KNL-NEXT:    kmovw (%rdi), %k1
   1393 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
   1394 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
   1395 ; KNL-NEXT:    retq
   1396 ;
   1397 ; SKX-LABEL: load_16i1:
   1398 ; SKX:       ## BB#0:
   1399 ; SKX-NEXT:    kmovw (%rdi), %k0
   1400 ; SKX-NEXT:    vpmovm2d %k0, %zmm0
   1401 ; SKX-NEXT:    retq
   1402   %b = load <16 x i1>, <16 x i1>* %a
   1403   %c = sext <16 x i1> %b to <16 x i32>
   1404   ret <16 x i32> %c
   1405 }
   1406 
   1407 define <2 x i16> @load_2i1(<2 x i1>* %a) {
   1408 ; KNL-LABEL: load_2i1:
   1409 ; KNL:       ## BB#0:
   1410 ; KNL-NEXT:    movzbl (%rdi), %eax
   1411 ; KNL-NEXT:    kmovw %eax, %k1
   1412 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
   1413 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
   1414 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
   1415 ; KNL-NEXT:    retq
   1416 ;
   1417 ; SKX-LABEL: load_2i1:
   1418 ; SKX:       ## BB#0:
   1419 ; SKX-NEXT:    kmovb (%rdi), %k0
   1420 ; SKX-NEXT:    vpmovm2q %k0, %xmm0
   1421 ; SKX-NEXT:    retq
   1422   %b = load <2 x i1>, <2 x i1>* %a
   1423   %c = sext <2 x i1> %b to <2 x i16>
   1424   ret <2 x i16> %c
   1425 }
   1426 
   1427 define <4 x i16> @load_4i1(<4 x i1>* %a) {
   1428 ; KNL-LABEL: load_4i1:
   1429 ; KNL:       ## BB#0:
   1430 ; KNL-NEXT:    movzbl (%rdi), %eax
   1431 ; KNL-NEXT:    kmovw %eax, %k1
   1432 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
   1433 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
   1434 ; KNL-NEXT:    vpmovqd %zmm0, %ymm0
   1435 ; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
   1436 ; KNL-NEXT:    retq
   1437 ;
   1438 ; SKX-LABEL: load_4i1:
   1439 ; SKX:       ## BB#0:
   1440 ; SKX-NEXT:    kmovb (%rdi), %k0
   1441 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
   1442 ; SKX-NEXT:    retq
   1443   %b = load <4 x i1>, <4 x i1>* %a
   1444   %c = sext <4 x i1> %b to <4 x i16>
   1445   ret <4 x i16> %c
   1446 }
   1447 
   1448 define <32 x i16> @load_32i1(<32 x i1>* %a) {
   1449 ; KNL-LABEL: load_32i1:
   1450 ; KNL:       ## BB#0:
   1451 ; KNL-NEXT:    kmovw (%rdi), %k1
   1452 ; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
   1453 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
   1454 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0
   1455 ; KNL-NEXT:    kmovw 2(%rdi), %k1
   1456 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
   1457 ; KNL-NEXT:    vpmovdw %zmm1, %ymm1
   1458 ; KNL-NEXT:    retq
   1459 ;
   1460 ; SKX-LABEL: load_32i1:
   1461 ; SKX:       ## BB#0:
   1462 ; SKX-NEXT:    kmovd (%rdi), %k0
   1463 ; SKX-NEXT:    vpmovm2w %k0, %zmm0
   1464 ; SKX-NEXT:    retq
   1465   %b = load <32 x i1>, <32 x i1>* %a
   1466   %c = sext <32 x i1> %b to <32 x i16>
   1467   ret <32 x i16> %c
   1468 }
   1469 
   1470 define <64 x i8> @load_64i1(<64 x i1>* %a) {
   1471 ; KNL-LABEL: load_64i1:
   1472 ; KNL:       ## BB#0:
   1473 ; KNL-NEXT:    kmovw (%rdi), %k1
   1474 ; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
   1475 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
   1476 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   1477 ; KNL-NEXT:    kmovw 2(%rdi), %k1
   1478 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
   1479 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
   1480 ; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1481 ; KNL-NEXT:    kmovw 4(%rdi), %k1
   1482 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1} {z}
   1483 ; KNL-NEXT:    vpmovdb %zmm2, %xmm2
   1484 ; KNL-NEXT:    kmovw 6(%rdi), %k1
   1485 ; KNL-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
   1486 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
   1487 ; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
   1488 ; KNL-NEXT:    retq
   1489 ;
   1490 ; SKX-LABEL: load_64i1:
   1491 ; SKX:       ## BB#0:
   1492 ; SKX-NEXT:    kmovq (%rdi), %k0
   1493 ; SKX-NEXT:    vpmovm2b %k0, %zmm0
   1494 ; SKX-NEXT:    retq
   1495   %b = load <64 x i1>, <64 x i1>* %a
   1496   %c = sext <64 x i1> %b to <64 x i8>
   1497   ret <64 x i8> %c
   1498 }
   1499 
   1500 define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
   1501 ; KNL-LABEL: store_8i1:
   1502 ; KNL:       ## BB#0:
   1503 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1504 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1505 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
   1506 ; KNL-NEXT:    kmovw %k0, %eax
   1507 ; KNL-NEXT:    movb %al, (%rdi)
   1508 ; KNL-NEXT:    retq
   1509 ;
   1510 ; SKX-LABEL: store_8i1:
   1511 ; SKX:       ## BB#0:
   1512 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1513 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
   1514 ; SKX-NEXT:    kmovb %k0, (%rdi)
   1515 ; SKX-NEXT:    retq
   1516   store <8 x i1> %v, <8 x i1>* %a
   1517   ret void
   1518 }
   1519 
   1520 define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
   1521 ; KNL-LABEL: store_8i1_1:
   1522 ; KNL:       ## BB#0:
   1523 ; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
   1524 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
   1525 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
   1526 ; KNL-NEXT:    kmovw %k0, %eax
   1527 ; KNL-NEXT:    movb %al, (%rdi)
   1528 ; KNL-NEXT:    retq
   1529 ;
   1530 ; SKX-LABEL: store_8i1_1:
   1531 ; SKX:       ## BB#0:
   1532 ; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
   1533 ; SKX-NEXT:    vpmovw2m %xmm0, %k0
   1534 ; SKX-NEXT:    kmovb %k0, (%rdi)
   1535 ; SKX-NEXT:    retq
   1536   %v1 = trunc <8 x i16> %v to <8 x i1>
   1537   store <8 x i1> %v1, <8 x i1>* %a
   1538   ret void
   1539 }
   1540 
   1541 define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
   1542 ; KNL-LABEL: store_16i1:
   1543 ; KNL:       ## BB#0:
   1544 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   1545 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1546 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1547 ; KNL-NEXT:    kmovw %k0, (%rdi)
   1548 ; KNL-NEXT:    retq
   1549 ;
   1550 ; SKX-LABEL: store_16i1:
   1551 ; SKX:       ## BB#0:
   1552 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
   1553 ; SKX-NEXT:    vpmovb2m %xmm0, %k0
   1554 ; SKX-NEXT:    kmovw %k0, (%rdi)
   1555 ; SKX-NEXT:    retq
   1556   store <16 x i1> %v, <16 x i1>* %a
   1557   ret void
   1558 }
   1559 
   1560 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
   1561 ; KNL-LABEL: store_32i1:
   1562 ; KNL:       ## BB#0:
   1563 ; KNL-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1564 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
   1565 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
   1566 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
   1567 ; KNL-NEXT:    kmovw %k0, 2(%rdi)
   1568 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   1569 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1570 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1571 ; KNL-NEXT:    kmovw %k0, (%rdi)
   1572 ; KNL-NEXT:    retq
   1573 ;
   1574 ; SKX-LABEL: store_32i1:
   1575 ; SKX:       ## BB#0:
   1576 ; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
   1577 ; SKX-NEXT:    vpmovb2m %ymm0, %k0
   1578 ; SKX-NEXT:    kmovd %k0, (%rdi)
   1579 ; SKX-NEXT:    retq
   1580   store <32 x i1> %v, <32 x i1>* %a
   1581   ret void
   1582 }
   1583 
   1584 define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
   1585 ; KNL-LABEL: store_32i1_1:
   1586 ; KNL:       ## BB#0:
   1587 ; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
   1588 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
   1589 ; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
   1590 ; KNL-NEXT:    vpmovdb %zmm1, %xmm1
   1591 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
   1592 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
   1593 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
   1594 ; KNL-NEXT:    kmovw %k0, 2(%rdi)
   1595 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   1596 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1597 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1598 ; KNL-NEXT:    kmovw %k0, (%rdi)
   1599 ; KNL-NEXT:    retq
   1600 ;
   1601 ; SKX-LABEL: store_32i1_1:
   1602 ; SKX:       ## BB#0:
   1603 ; SKX-NEXT:    vpsllw $15, %zmm0, %zmm0
   1604 ; SKX-NEXT:    vpmovw2m %zmm0, %k0
   1605 ; SKX-NEXT:    kmovd %k0, (%rdi)
   1606 ; SKX-NEXT:    retq
   1607   %v1 = trunc <32 x i16> %v to <32 x i1>
   1608   store <32 x i1> %v1, <32 x i1>* %a
   1609   ret void
   1610 }
   1611 
   1612 
   1613 define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
   1614 ;
   1615 ; KNL-LABEL: store_64i1:
   1616 ; KNL:       ## BB#0:
   1617 ; KNL-NEXT:    pushq %rbp
   1618 ; KNL-NEXT:  Ltmp9:
   1619 ; KNL-NEXT:    .cfi_def_cfa_offset 16
   1620 ; KNL-NEXT:    pushq %r15
   1621 ; KNL-NEXT:  Ltmp10:
   1622 ; KNL-NEXT:    .cfi_def_cfa_offset 24
   1623 ; KNL-NEXT:    pushq %r14
   1624 ; KNL-NEXT:  Ltmp11:
   1625 ; KNL-NEXT:    .cfi_def_cfa_offset 32
   1626 ; KNL-NEXT:    pushq %r13
   1627 ; KNL-NEXT:  Ltmp12:
   1628 ; KNL-NEXT:    .cfi_def_cfa_offset 40
   1629 ; KNL-NEXT:    pushq %r12
   1630 ; KNL-NEXT:  Ltmp13:
   1631 ; KNL-NEXT:    .cfi_def_cfa_offset 48
   1632 ; KNL-NEXT:    pushq %rbx
   1633 ; KNL-NEXT:  Ltmp14:
   1634 ; KNL-NEXT:    .cfi_def_cfa_offset 56
   1635 ; KNL-NEXT:  Ltmp15:
   1636 ; KNL-NEXT:    .cfi_offset %rbx, -56
   1637 ; KNL-NEXT:  Ltmp16:
   1638 ; KNL-NEXT:    .cfi_offset %r12, -48
   1639 ; KNL-NEXT:  Ltmp17:
   1640 ; KNL-NEXT:    .cfi_offset %r13, -40
   1641 ; KNL-NEXT:  Ltmp18:
   1642 ; KNL-NEXT:    .cfi_offset %r14, -32
   1643 ; KNL-NEXT:  Ltmp19:
   1644 ; KNL-NEXT:    .cfi_offset %r15, -24
   1645 ; KNL-NEXT:  Ltmp20:
   1646 ; KNL-NEXT:    .cfi_offset %rbp, -16
   1647 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   1648 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1649 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
   1650 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
   1651 ; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
   1652 ; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
   1653 ; KNL-NEXT:    vpmovsxbd %xmm3, %zmm3
   1654 ; KNL-NEXT:    vpslld $31, %zmm3, %zmm3
   1655 ; KNL-NEXT:    vptestmd %zmm3, %zmm3, %k0
   1656 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
   1657 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1658 ; KNL-NEXT:    kmovw %k1, %r8d
   1659 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
   1660 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1661 ; KNL-NEXT:    kmovw %k1, %r9d
   1662 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
   1663 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1664 ; KNL-NEXT:    kmovw %k1, %r10d
   1665 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
   1666 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1667 ; KNL-NEXT:    kmovw %k1, %r11d
   1668 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
   1669 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1670 ; KNL-NEXT:    kmovw %k1, %r14d
   1671 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
   1672 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1673 ; KNL-NEXT:    kmovw %k1, %r15d
   1674 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
   1675 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1676 ; KNL-NEXT:    kmovw %k1, %r12d
   1677 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
   1678 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1679 ; KNL-NEXT:    kmovw %k1, %r13d
   1680 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
   1681 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1682 ; KNL-NEXT:    kmovw %k1, %ebx
   1683 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
   1684 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1685 ; KNL-NEXT:    kmovw %k1, %ebp
   1686 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
   1687 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1688 ; KNL-NEXT:    kmovw %k1, %eax
   1689 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
   1690 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1691 ; KNL-NEXT:    kmovw %k1, %ecx
   1692 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
   1693 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1694 ; KNL-NEXT:    kmovw %k1, %edx
   1695 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
   1696 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1697 ; KNL-NEXT:    kmovw %k1, %esi
   1698 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
   1699 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1700 ; KNL-NEXT:    vmovd %r9d, %xmm3
   1701 ; KNL-NEXT:    kmovw %k1, %r9d
   1702 ; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k2
   1703 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
   1704 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1705 ; KNL-NEXT:    vpinsrb $1, %r8d, %xmm3, %xmm2
   1706 ; KNL-NEXT:    vpinsrb $2, %r10d, %xmm2, %xmm2
   1707 ; KNL-NEXT:    vpinsrb $3, %r11d, %xmm2, %xmm2
   1708 ; KNL-NEXT:    vpinsrb $4, %r14d, %xmm2, %xmm2
   1709 ; KNL-NEXT:    vpinsrb $5, %r15d, %xmm2, %xmm2
   1710 ; KNL-NEXT:    vpinsrb $6, %r12d, %xmm2, %xmm2
   1711 ; KNL-NEXT:    vpinsrb $7, %r13d, %xmm2, %xmm2
   1712 ; KNL-NEXT:    vpinsrb $8, %ebx, %xmm2, %xmm2
   1713 ; KNL-NEXT:    vpinsrb $9, %ebp, %xmm2, %xmm2
   1714 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
   1715 ; KNL-NEXT:    vpinsrb $11, %ecx, %xmm2, %xmm2
   1716 ; KNL-NEXT:    vpinsrb $12, %edx, %xmm2, %xmm2
   1717 ; KNL-NEXT:    vpinsrb $13, %esi, %xmm2, %xmm2
   1718 ; KNL-NEXT:    vpinsrb $14, %r9d, %xmm2, %xmm2
   1719 ; KNL-NEXT:    kmovw %k0, %eax
   1720 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm2
   1721 ; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
   1722 ; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
   1723 ; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k0
   1724 ; KNL-NEXT:    kmovw %k0, 6(%rdi)
   1725 ; KNL-NEXT:    kshiftlw $14, %k2, %k0
   1726 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1727 ; KNL-NEXT:    kmovw %k0, %r8d
   1728 ; KNL-NEXT:    kshiftlw $15, %k2, %k0
   1729 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1730 ; KNL-NEXT:    kmovw %k0, %r10d
   1731 ; KNL-NEXT:    kshiftlw $13, %k2, %k0
   1732 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1733 ; KNL-NEXT:    kmovw %k0, %r9d
   1734 ; KNL-NEXT:    kshiftlw $12, %k2, %k0
   1735 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1736 ; KNL-NEXT:    kmovw %k0, %r11d
   1737 ; KNL-NEXT:    kshiftlw $11, %k2, %k0
   1738 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1739 ; KNL-NEXT:    kmovw %k0, %r14d
   1740 ; KNL-NEXT:    kshiftlw $10, %k2, %k0
   1741 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1742 ; KNL-NEXT:    kmovw %k0, %r15d
   1743 ; KNL-NEXT:    kshiftlw $9, %k2, %k0
   1744 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1745 ; KNL-NEXT:    kmovw %k0, %r12d
   1746 ; KNL-NEXT:    kshiftlw $8, %k2, %k0
   1747 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1748 ; KNL-NEXT:    kmovw %k0, %r13d
   1749 ; KNL-NEXT:    kshiftlw $7, %k2, %k0
   1750 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1751 ; KNL-NEXT:    kmovw %k0, %edx
   1752 ; KNL-NEXT:    kshiftlw $6, %k2, %k0
   1753 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1754 ; KNL-NEXT:    kmovw %k0, %esi
   1755 ; KNL-NEXT:    kshiftlw $5, %k2, %k0
   1756 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1757 ; KNL-NEXT:    kmovw %k0, %ebp
   1758 ; KNL-NEXT:    kshiftlw $4, %k2, %k0
   1759 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1760 ; KNL-NEXT:    kmovw %k0, %ebx
   1761 ; KNL-NEXT:    kshiftlw $3, %k2, %k0
   1762 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1763 ; KNL-NEXT:    kmovw %k0, %eax
   1764 ; KNL-NEXT:    kshiftlw $2, %k2, %k0
   1765 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1766 ; KNL-NEXT:    kmovw %k0, %ecx
   1767 ; KNL-NEXT:    kshiftlw $1, %k2, %k0
   1768 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1769 ; KNL-NEXT:    vmovd %r10d, %xmm2
   1770 ; KNL-NEXT:    kmovw %k0, %r10d
   1771 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
   1772 ; KNL-NEXT:    kshiftlw $0, %k2, %k0
   1773 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1774 ; KNL-NEXT:    vpinsrb $1, %r8d, %xmm2, %xmm1
   1775 ; KNL-NEXT:    vpinsrb $2, %r9d, %xmm1, %xmm1
   1776 ; KNL-NEXT:    vpinsrb $3, %r11d, %xmm1, %xmm1
   1777 ; KNL-NEXT:    vpinsrb $4, %r14d, %xmm1, %xmm1
   1778 ; KNL-NEXT:    vpinsrb $5, %r15d, %xmm1, %xmm1
   1779 ; KNL-NEXT:    vpinsrb $6, %r12d, %xmm1, %xmm1
   1780 ; KNL-NEXT:    vpinsrb $7, %r13d, %xmm1, %xmm1
   1781 ; KNL-NEXT:    vpinsrb $8, %edx, %xmm1, %xmm1
   1782 ; KNL-NEXT:    vpinsrb $9, %esi, %xmm1, %xmm1
   1783 ; KNL-NEXT:    vpinsrb $10, %ebp, %xmm1, %xmm1
   1784 ; KNL-NEXT:    vpinsrb $11, %ebx, %xmm1, %xmm1
   1785 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
   1786 ; KNL-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
   1787 ; KNL-NEXT:    vpinsrb $14, %r10d, %xmm1, %xmm1
   1788 ; KNL-NEXT:    kmovw %k0, %eax
   1789 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
   1790 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
   1791 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
   1792 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
   1793 ; KNL-NEXT:    kmovw %k0, 4(%rdi)
   1794 ; KNL-NEXT:    kshiftlw $14, %k1, %k0
   1795 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1796 ; KNL-NEXT:    kmovw %k0, %r8d
   1797 ; KNL-NEXT:    kshiftlw $15, %k1, %k0
   1798 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1799 ; KNL-NEXT:    kmovw %k0, %r10d
   1800 ; KNL-NEXT:    kshiftlw $13, %k1, %k0
   1801 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1802 ; KNL-NEXT:    kmovw %k0, %r9d
   1803 ; KNL-NEXT:    kshiftlw $12, %k1, %k0
   1804 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1805 ; KNL-NEXT:    kmovw %k0, %r11d
   1806 ; KNL-NEXT:    kshiftlw $11, %k1, %k0
   1807 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1808 ; KNL-NEXT:    kmovw %k0, %r14d
   1809 ; KNL-NEXT:    kshiftlw $10, %k1, %k0
   1810 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1811 ; KNL-NEXT:    kmovw %k0, %r15d
   1812 ; KNL-NEXT:    kshiftlw $9, %k1, %k0
   1813 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1814 ; KNL-NEXT:    kmovw %k0, %r12d
   1815 ; KNL-NEXT:    kshiftlw $8, %k1, %k0
   1816 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1817 ; KNL-NEXT:    kmovw %k0, %r13d
   1818 ; KNL-NEXT:    kshiftlw $7, %k1, %k0
   1819 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1820 ; KNL-NEXT:    kmovw %k0, %edx
   1821 ; KNL-NEXT:    kshiftlw $6, %k1, %k0
   1822 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1823 ; KNL-NEXT:    kmovw %k0, %esi
   1824 ; KNL-NEXT:    kshiftlw $5, %k1, %k0
   1825 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1826 ; KNL-NEXT:    kmovw %k0, %ebp
   1827 ; KNL-NEXT:    kshiftlw $4, %k1, %k0
   1828 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1829 ; KNL-NEXT:    kmovw %k0, %ebx
   1830 ; KNL-NEXT:    kshiftlw $3, %k1, %k0
   1831 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1832 ; KNL-NEXT:    kmovw %k0, %eax
   1833 ; KNL-NEXT:    kshiftlw $2, %k1, %k0
   1834 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1835 ; KNL-NEXT:    kmovw %k0, %ecx
   1836 ; KNL-NEXT:    kshiftlw $1, %k1, %k0
   1837 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1838 ; KNL-NEXT:    vmovd %r10d, %xmm1
   1839 ; KNL-NEXT:    kmovw %k0, %r10d
   1840 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1841 ; KNL-NEXT:    kshiftlw $0, %k1, %k1
   1842 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1843 ; KNL-NEXT:    vpinsrb $1, %r8d, %xmm1, %xmm0
   1844 ; KNL-NEXT:    vpinsrb $2, %r9d, %xmm0, %xmm0
   1845 ; KNL-NEXT:    vpinsrb $3, %r11d, %xmm0, %xmm0
   1846 ; KNL-NEXT:    vpinsrb $4, %r14d, %xmm0, %xmm0
   1847 ; KNL-NEXT:    vpinsrb $5, %r15d, %xmm0, %xmm0
   1848 ; KNL-NEXT:    vpinsrb $6, %r12d, %xmm0, %xmm0
   1849 ; KNL-NEXT:    vpinsrb $7, %r13d, %xmm0, %xmm0
   1850 ; KNL-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
   1851 ; KNL-NEXT:    vpinsrb $9, %esi, %xmm0, %xmm0
   1852 ; KNL-NEXT:    vpinsrb $10, %ebp, %xmm0, %xmm0
   1853 ; KNL-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
   1854 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
   1855 ; KNL-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
   1856 ; KNL-NEXT:    vpinsrb $14, %r10d, %xmm0, %xmm0
   1857 ; KNL-NEXT:    kmovw %k1, %eax
   1858 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
   1859 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   1860 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1861 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
   1862 ; KNL-NEXT:    kmovw %k1, 2(%rdi)
   1863 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
   1864 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1865 ; KNL-NEXT:    kmovw %k1, %r8d
   1866 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
   1867 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1868 ; KNL-NEXT:    kmovw %k1, %r9d
   1869 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
   1870 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1871 ; KNL-NEXT:    kmovw %k1, %r10d
   1872 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
   1873 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1874 ; KNL-NEXT:    kmovw %k1, %r11d
   1875 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
   1876 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1877 ; KNL-NEXT:    kmovw %k1, %r14d
   1878 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
   1879 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1880 ; KNL-NEXT:    kmovw %k1, %r15d
   1881 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
   1882 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1883 ; KNL-NEXT:    kmovw %k1, %r12d
   1884 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
   1885 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1886 ; KNL-NEXT:    kmovw %k1, %r13d
   1887 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
   1888 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1889 ; KNL-NEXT:    kmovw %k1, %edx
   1890 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
   1891 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1892 ; KNL-NEXT:    kmovw %k1, %esi
   1893 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
   1894 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1895 ; KNL-NEXT:    kmovw %k1, %ebp
   1896 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
   1897 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1898 ; KNL-NEXT:    kmovw %k1, %ebx
   1899 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
   1900 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1901 ; KNL-NEXT:    kmovw %k1, %eax
   1902 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
   1903 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1904 ; KNL-NEXT:    kmovw %k1, %ecx
   1905 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
   1906 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
   1907 ; KNL-NEXT:    vmovd %r9d, %xmm0
   1908 ; KNL-NEXT:    kmovw %k1, %r9d
   1909 ; KNL-NEXT:    vpinsrb $1, %r8d, %xmm0, %xmm0
   1910 ; KNL-NEXT:    vpinsrb $2, %r10d, %xmm0, %xmm0
   1911 ; KNL-NEXT:    vpinsrb $3, %r11d, %xmm0, %xmm0
   1912 ; KNL-NEXT:    vpinsrb $4, %r14d, %xmm0, %xmm0
   1913 ; KNL-NEXT:    vpinsrb $5, %r15d, %xmm0, %xmm0
   1914 ; KNL-NEXT:    vpinsrb $6, %r12d, %xmm0, %xmm0
   1915 ; KNL-NEXT:    vpinsrb $7, %r13d, %xmm0, %xmm0
   1916 ; KNL-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
   1917 ; KNL-NEXT:    vpinsrb $9, %esi, %xmm0, %xmm0
   1918 ; KNL-NEXT:    vpinsrb $10, %ebp, %xmm0, %xmm0
   1919 ; KNL-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
   1920 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
   1921 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
   1922 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
   1923 ; KNL-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
   1924 ; KNL-NEXT:    vpinsrb $14, %r9d, %xmm0, %xmm0
   1925 ; KNL-NEXT:    kmovw %k0, %eax
   1926 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
   1927 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
   1928 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
   1929 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
   1930 ; KNL-NEXT:    kmovw %k0, (%rdi)
   1931 ; KNL-NEXT:    popq %rbx
   1932 ; KNL-NEXT:    popq %r12
   1933 ; KNL-NEXT:    popq %r13
   1934 ; KNL-NEXT:    popq %r14
   1935 ; KNL-NEXT:    popq %r15
   1936 ; KNL-NEXT:    popq %rbp
   1937 ; KNL-NEXT:    retq
   1938 ;
   1939 ; SKX-LABEL: store_64i1:
   1940 ; SKX:       ## BB#0:
   1941 ; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
   1942 ; SKX-NEXT:    vpmovb2m %zmm0, %k0
   1943 ; SKX-NEXT:    kmovq %k0, (%rdi)
   1944 ; SKX-NEXT:    retq
   1945   store <64 x i1> %v, <64 x i1>* %a
   1946   ret void
   1947 }
   1948