Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
      4 
      5 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
      6 ; CHECK-LABEL: test1:
      7 ; CHECK:       ## BB#0:
      8 ; CHECK-NEXT:    vcmpleps %zmm1, %zmm0, %k1
      9 ; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
     10 ; CHECK-NEXT:    retq
     11   %mask = fcmp ole <16 x float> %x, %y
     12   %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
     13   ret <16 x float> %max
     14 }
     15 
     16 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
     17 ; CHECK-LABEL: test2:
     18 ; CHECK:       ## BB#0:
     19 ; CHECK-NEXT:    vcmplepd %zmm1, %zmm0, %k1
     20 ; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
     21 ; CHECK-NEXT:    retq
     22   %mask = fcmp ole <8 x double> %x, %y
     23   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
     24   ret <8 x double> %max
     25 }
     26 
     27 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
     28 ; CHECK-LABEL: test3:
     29 ; CHECK:       ## BB#0:
     30 ; CHECK-NEXT:    vpcmpeqd (%rdi), %zmm0, %k1
     31 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
     32 ; CHECK-NEXT:    retq
     33   %y = load <16 x i32>, <16 x i32>* %yp, align 4
     34   %mask = icmp eq <16 x i32> %x, %y
     35   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
     36   ret <16 x i32> %max
     37 }
     38 
     39 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
     40 ; CHECK-LABEL: test4_unsigned:
     41 ; CHECK:       ## BB#0:
     42 ; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1
     43 ; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
     44 ; CHECK-NEXT:    retq
     45   %mask = icmp uge <16 x i32> %x, %y
     46   %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
     47   ret <16 x i32> %max
     48 }
     49 
     50 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
     51 ; CHECK-LABEL: test5:
     52 ; CHECK:       ## BB#0:
     53 ; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
     54 ; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
     55 ; CHECK-NEXT:    retq
     56   %mask = icmp eq <8 x i64> %x, %y
     57   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
     58   ret <8 x i64> %max
     59 }
     60 
     61 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
     62 ; CHECK-LABEL: test6_unsigned:
     63 ; CHECK:       ## BB#0:
     64 ; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1
     65 ; CHECK-NEXT:    vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
     66 ; CHECK-NEXT:    retq
     67   %mask = icmp ugt <8 x i64> %x, %y
     68   %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
     69   ret <8 x i64> %max
     70 }
     71 
     72 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
     73 ; KNL-LABEL: test7:
     74 ; KNL:       ## BB#0:
     75 ; KNL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
     76 ; KNL-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
     77 ; KNL-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
     78 ; KNL-NEXT:    retq
     79 ;
     80 ; SKX-LABEL: test7:
     81 ; SKX:       ## BB#0:
     82 ; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
     83 ; SKX-NEXT:    vcmpltps %xmm2, %xmm0, %k1
     84 ; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
     85 ; SKX-NEXT:    retq
     86 
     87   %mask = fcmp olt <4 x float> %a, zeroinitializer
     88   %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
     89   ret <4 x float>%c
     90 }
     91 
     92 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
     93 ; KNL-LABEL: test8:
     94 ; KNL:       ## BB#0:
     95 ; KNL-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
     96 ; KNL-NEXT:    vcmpltpd %xmm2, %xmm0, %xmm2
     97 ; KNL-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
     98 ; KNL-NEXT:    retq
     99 ;
    100 ; SKX-LABEL: test8:
    101 ; SKX:       ## BB#0:
    102 ; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
    103 ; SKX-NEXT:    vcmpltpd %xmm2, %xmm0, %k1
    104 ; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
    105 ; SKX-NEXT:    retq
    106   %mask = fcmp olt <2 x double> %a, zeroinitializer
    107   %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
    108   ret <2 x double>%c
    109 }
    110 
    111 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
    112 ; KNL-LABEL: test9:
    113 ; KNL:       ## BB#0:
    114 ; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
    115 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
    116 ; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
    117 ; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    118 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
    119 ; KNL-NEXT:    retq
    120 ;
    121 ; SKX-LABEL: test9:
    122 ; SKX:       ## BB#0:
    123 ; SKX-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1
    124 ; SKX-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
    125 ; SKX-NEXT:    retq
    126   %mask = icmp eq <8 x i32> %x, %y
    127   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
    128   ret <8 x i32> %max
    129 }
    130 
    131 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
    132 ; KNL-LABEL: test10:
    133 ; KNL:       ## BB#0:
    134 ; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
    135 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
    136 ; KNL-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
    137 ; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
    138 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
    139 ; KNL-NEXT:    retq
    140 ;
    141 ; SKX-LABEL: test10:
    142 ; SKX:       ## BB#0:
    143 ; SKX-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
    144 ; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
    145 ; SKX-NEXT:    retq
    146 
    147   %mask = fcmp oeq <8 x float> %x, %y
    148   %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
    149   ret <8 x float> %max
    150 }
    151 
    152 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
    153 ; CHECK-LABEL: test11_unsigned:
    154 ; CHECK:       ## BB#0:
    155 ; CHECK-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
    156 ; CHECK-NEXT:    retq
    157   %mask = icmp ugt <8 x i32> %x, %y
    158   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
    159   ret <8 x i32> %max
    160 }
    161 
    162 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
    163 ; CHECK-LABEL: test12:
    164 ; CHECK:       ## BB#0:
    165 ; CHECK-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0
    166 ; CHECK-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
    167 ; CHECK-NEXT:    kunpckbw %k0, %k1, %k0
    168 ; CHECK-NEXT:    kmovw %k0, %eax
    169 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    170 ; CHECK-NEXT:    retq
    171   %res = icmp eq <16 x i64> %a, %b
    172   %res1 = bitcast <16 x i1> %res to i16
    173   ret i16 %res1
    174 }
    175 
    176 define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
    177 ; KNL-LABEL: test12_v32i32:
    178 ; KNL:       ## BB#0:
    179 ; KNL-NEXT:    pushq %rbp
    180 ; KNL-NEXT:    movq %rsp, %rbp
    181 ; KNL-NEXT:    andq $-32, %rsp
    182 ; KNL-NEXT:    subq $32, %rsp
    183 ; KNL-NEXT:    vpcmpeqd %zmm3, %zmm1, %k0
    184 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
    185 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    186 ; KNL-NEXT:    kmovw %k1, %eax
    187 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
    188 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    189 ; KNL-NEXT:    kmovw %k1, %ecx
    190 ; KNL-NEXT:    vmovd %ecx, %xmm1
    191 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
    192 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
    193 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    194 ; KNL-NEXT:    kmovw %k1, %eax
    195 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
    196 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
    197 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    198 ; KNL-NEXT:    kmovw %k1, %eax
    199 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
    200 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
    201 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    202 ; KNL-NEXT:    kmovw %k1, %eax
    203 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
    204 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
    205 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    206 ; KNL-NEXT:    kmovw %k1, %eax
    207 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
    208 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
    209 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    210 ; KNL-NEXT:    kmovw %k1, %eax
    211 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
    212 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
    213 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    214 ; KNL-NEXT:    kmovw %k1, %eax
    215 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
    216 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
    217 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    218 ; KNL-NEXT:    kmovw %k1, %eax
    219 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
    220 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
    221 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    222 ; KNL-NEXT:    kmovw %k1, %eax
    223 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
    224 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
    225 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    226 ; KNL-NEXT:    kmovw %k1, %eax
    227 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
    228 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
    229 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    230 ; KNL-NEXT:    kmovw %k1, %eax
    231 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
    232 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
    233 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    234 ; KNL-NEXT:    kmovw %k1, %eax
    235 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
    236 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
    237 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    238 ; KNL-NEXT:    kmovw %k1, %eax
    239 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
    240 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
    241 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    242 ; KNL-NEXT:    kmovw %k1, %eax
    243 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
    244 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
    245 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
    246 ; KNL-NEXT:    kmovw %k0, %eax
    247 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
    248 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
    249 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
    250 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
    251 ; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
    252 ; KNL-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
    253 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
    254 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    255 ; KNL-NEXT:    kmovw %k1, %eax
    256 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
    257 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    258 ; KNL-NEXT:    kmovw %k1, %ecx
    259 ; KNL-NEXT:    vmovd %ecx, %xmm0
    260 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
    261 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
    262 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    263 ; KNL-NEXT:    kmovw %k1, %eax
    264 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
    265 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
    266 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    267 ; KNL-NEXT:    kmovw %k1, %eax
    268 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
    269 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
    270 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    271 ; KNL-NEXT:    kmovw %k1, %eax
    272 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
    273 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
    274 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    275 ; KNL-NEXT:    kmovw %k1, %eax
    276 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
    277 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
    278 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    279 ; KNL-NEXT:    kmovw %k1, %eax
    280 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
    281 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
    282 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    283 ; KNL-NEXT:    kmovw %k1, %eax
    284 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
    285 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
    286 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    287 ; KNL-NEXT:    kmovw %k1, %eax
    288 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
    289 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
    290 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    291 ; KNL-NEXT:    kmovw %k1, %eax
    292 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
    293 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
    294 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    295 ; KNL-NEXT:    kmovw %k1, %eax
    296 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
    297 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
    298 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    299 ; KNL-NEXT:    kmovw %k1, %eax
    300 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
    301 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
    302 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    303 ; KNL-NEXT:    kmovw %k1, %eax
    304 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
    305 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
    306 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    307 ; KNL-NEXT:    kmovw %k1, %eax
    308 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
    309 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
    310 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    311 ; KNL-NEXT:    kmovw %k1, %eax
    312 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
    313 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
    314 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
    315 ; KNL-NEXT:    kmovw %k0, %eax
    316 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    317 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    318 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    319 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    320 ; KNL-NEXT:    kmovw %k0, (%rsp)
    321 ; KNL-NEXT:    movl (%rsp), %eax
    322 ; KNL-NEXT:    movq %rbp, %rsp
    323 ; KNL-NEXT:    popq %rbp
    324 ; KNL-NEXT:    retq
    325 ;
    326 ; SKX-LABEL: test12_v32i32:
    327 ; SKX:       ## BB#0:
    328 ; SKX-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
    329 ; SKX-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
    330 ; SKX-NEXT:    kunpckwd %k0, %k1, %k0
    331 ; SKX-NEXT:    kmovd %k0, %eax
    332 ; SKX-NEXT:    retq
    333   %res = icmp eq <32 x i32> %a, %b
    334   %res1 = bitcast <32 x i1> %res to i32
    335   ret i32 %res1
    336 }
    337 
    338 define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
    339 ; KNL-LABEL: test12_v64i16:
    340 ; KNL:       ## BB#0:
    341 ; KNL-NEXT:    pushq %rbp
    342 ; KNL-NEXT:    movq %rsp, %rbp
    343 ; KNL-NEXT:    andq $-32, %rsp
    344 ; KNL-NEXT:    subq $64, %rsp
    345 ; KNL-NEXT:    vpcmpeqw %ymm5, %ymm1, %ymm1
    346 ; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
    347 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
    348 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
    349 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
    350 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    351 ; KNL-NEXT:    kmovw %k1, %eax
    352 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
    353 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    354 ; KNL-NEXT:    kmovw %k1, %ecx
    355 ; KNL-NEXT:    vmovd %ecx, %xmm1
    356 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
    357 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
    358 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    359 ; KNL-NEXT:    kmovw %k1, %eax
    360 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
    361 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
    362 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    363 ; KNL-NEXT:    kmovw %k1, %eax
    364 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
    365 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
    366 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    367 ; KNL-NEXT:    kmovw %k1, %eax
    368 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
    369 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
    370 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    371 ; KNL-NEXT:    kmovw %k1, %eax
    372 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
    373 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
    374 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    375 ; KNL-NEXT:    kmovw %k1, %eax
    376 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
    377 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
    378 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    379 ; KNL-NEXT:    kmovw %k1, %eax
    380 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
    381 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
    382 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    383 ; KNL-NEXT:    kmovw %k1, %eax
    384 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
    385 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
    386 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    387 ; KNL-NEXT:    kmovw %k1, %eax
    388 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
    389 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
    390 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    391 ; KNL-NEXT:    kmovw %k1, %eax
    392 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
    393 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
    394 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    395 ; KNL-NEXT:    kmovw %k1, %eax
    396 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
    397 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
    398 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    399 ; KNL-NEXT:    kmovw %k1, %eax
    400 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
    401 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
    402 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    403 ; KNL-NEXT:    kmovw %k1, %eax
    404 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
    405 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
    406 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    407 ; KNL-NEXT:    kmovw %k1, %eax
    408 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
    409 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
    410 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
    411 ; KNL-NEXT:    kmovw %k0, %eax
    412 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
    413 ; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
    414 ; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
    415 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
    416 ; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
    417 ; KNL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
    418 ; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
    419 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    420 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    421 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
    422 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    423 ; KNL-NEXT:    kmovw %k1, %eax
    424 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
    425 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    426 ; KNL-NEXT:    kmovw %k1, %ecx
    427 ; KNL-NEXT:    vmovd %ecx, %xmm0
    428 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
    429 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
    430 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    431 ; KNL-NEXT:    kmovw %k1, %eax
    432 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
    433 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
    434 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    435 ; KNL-NEXT:    kmovw %k1, %eax
    436 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
    437 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
    438 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    439 ; KNL-NEXT:    kmovw %k1, %eax
    440 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
    441 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
    442 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    443 ; KNL-NEXT:    kmovw %k1, %eax
    444 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
    445 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
    446 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    447 ; KNL-NEXT:    kmovw %k1, %eax
    448 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
    449 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
    450 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    451 ; KNL-NEXT:    kmovw %k1, %eax
    452 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
    453 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
    454 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    455 ; KNL-NEXT:    kmovw %k1, %eax
    456 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
    457 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
    458 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    459 ; KNL-NEXT:    kmovw %k1, %eax
    460 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
    461 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
    462 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    463 ; KNL-NEXT:    kmovw %k1, %eax
    464 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
    465 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
    466 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    467 ; KNL-NEXT:    kmovw %k1, %eax
    468 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
    469 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
    470 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    471 ; KNL-NEXT:    kmovw %k1, %eax
    472 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
    473 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
    474 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    475 ; KNL-NEXT:    kmovw %k1, %eax
    476 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
    477 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
    478 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    479 ; KNL-NEXT:    kmovw %k1, %eax
    480 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
    481 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
    482 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
    483 ; KNL-NEXT:    kmovw %k0, %eax
    484 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    485 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    486 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    487 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    488 ; KNL-NEXT:    kmovw %k0, (%rsp)
    489 ; KNL-NEXT:    vpcmpeqw %ymm7, %ymm3, %ymm0
    490 ; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
    491 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    492 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    493 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
    494 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    495 ; KNL-NEXT:    kmovw %k1, %eax
    496 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
    497 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    498 ; KNL-NEXT:    kmovw %k1, %ecx
    499 ; KNL-NEXT:    vmovd %ecx, %xmm0
    500 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
    501 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
    502 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    503 ; KNL-NEXT:    kmovw %k1, %eax
    504 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
    505 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
    506 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    507 ; KNL-NEXT:    kmovw %k1, %eax
    508 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
    509 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
    510 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    511 ; KNL-NEXT:    kmovw %k1, %eax
    512 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
    513 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
    514 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    515 ; KNL-NEXT:    kmovw %k1, %eax
    516 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
    517 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
    518 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    519 ; KNL-NEXT:    kmovw %k1, %eax
    520 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
    521 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
    522 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    523 ; KNL-NEXT:    kmovw %k1, %eax
    524 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
    525 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
    526 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    527 ; KNL-NEXT:    kmovw %k1, %eax
    528 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
    529 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
    530 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    531 ; KNL-NEXT:    kmovw %k1, %eax
    532 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
    533 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
    534 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    535 ; KNL-NEXT:    kmovw %k1, %eax
    536 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
    537 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
    538 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    539 ; KNL-NEXT:    kmovw %k1, %eax
    540 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
    541 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
    542 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    543 ; KNL-NEXT:    kmovw %k1, %eax
    544 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
    545 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
    546 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    547 ; KNL-NEXT:    kmovw %k1, %eax
    548 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
    549 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
    550 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    551 ; KNL-NEXT:    kmovw %k1, %eax
    552 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
    553 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
    554 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
    555 ; KNL-NEXT:    kmovw %k0, %eax
    556 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    557 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    558 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    559 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    560 ; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
    561 ; KNL-NEXT:    vpcmpeqw %ymm6, %ymm2, %ymm0
    562 ; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
    563 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    564 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    565 ; KNL-NEXT:    kshiftlw $14, %k0, %k1
    566 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    567 ; KNL-NEXT:    kmovw %k1, %eax
    568 ; KNL-NEXT:    kshiftlw $15, %k0, %k1
    569 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    570 ; KNL-NEXT:    kmovw %k1, %ecx
    571 ; KNL-NEXT:    vmovd %ecx, %xmm0
    572 ; KNL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
    573 ; KNL-NEXT:    kshiftlw $13, %k0, %k1
    574 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    575 ; KNL-NEXT:    kmovw %k1, %eax
    576 ; KNL-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
    577 ; KNL-NEXT:    kshiftlw $12, %k0, %k1
    578 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    579 ; KNL-NEXT:    kmovw %k1, %eax
    580 ; KNL-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
    581 ; KNL-NEXT:    kshiftlw $11, %k0, %k1
    582 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    583 ; KNL-NEXT:    kmovw %k1, %eax
    584 ; KNL-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
    585 ; KNL-NEXT:    kshiftlw $10, %k0, %k1
    586 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    587 ; KNL-NEXT:    kmovw %k1, %eax
    588 ; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
    589 ; KNL-NEXT:    kshiftlw $9, %k0, %k1
    590 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    591 ; KNL-NEXT:    kmovw %k1, %eax
    592 ; KNL-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
    593 ; KNL-NEXT:    kshiftlw $8, %k0, %k1
    594 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    595 ; KNL-NEXT:    kmovw %k1, %eax
    596 ; KNL-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
    597 ; KNL-NEXT:    kshiftlw $7, %k0, %k1
    598 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    599 ; KNL-NEXT:    kmovw %k1, %eax
    600 ; KNL-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
    601 ; KNL-NEXT:    kshiftlw $6, %k0, %k1
    602 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    603 ; KNL-NEXT:    kmovw %k1, %eax
    604 ; KNL-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
    605 ; KNL-NEXT:    kshiftlw $5, %k0, %k1
    606 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    607 ; KNL-NEXT:    kmovw %k1, %eax
    608 ; KNL-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
    609 ; KNL-NEXT:    kshiftlw $4, %k0, %k1
    610 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    611 ; KNL-NEXT:    kmovw %k1, %eax
    612 ; KNL-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
    613 ; KNL-NEXT:    kshiftlw $3, %k0, %k1
    614 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    615 ; KNL-NEXT:    kmovw %k1, %eax
    616 ; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
    617 ; KNL-NEXT:    kshiftlw $2, %k0, %k1
    618 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    619 ; KNL-NEXT:    kmovw %k1, %eax
    620 ; KNL-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
    621 ; KNL-NEXT:    kshiftlw $1, %k0, %k1
    622 ; KNL-NEXT:    kshiftrw $15, %k1, %k1
    623 ; KNL-NEXT:    kmovw %k1, %eax
    624 ; KNL-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
    625 ; KNL-NEXT:    kshiftlw $0, %k0, %k0
    626 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
    627 ; KNL-NEXT:    kmovw %k0, %eax
    628 ; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
    629 ; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
    630 ; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
    631 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
    632 ; KNL-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
    633 ; KNL-NEXT:    movl (%rsp), %ecx
    634 ; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    635 ; KNL-NEXT:    shlq $32, %rax
    636 ; KNL-NEXT:    orq %rcx, %rax
    637 ; KNL-NEXT:    movq %rbp, %rsp
    638 ; KNL-NEXT:    popq %rbp
    639 ; KNL-NEXT:    retq
    640 ;
    641 ; SKX-LABEL: test12_v64i16:
    642 ; SKX:       ## BB#0:
    643 ; SKX-NEXT:    vpcmpeqw %zmm2, %zmm0, %k0
    644 ; SKX-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
    645 ; SKX-NEXT:    kunpckdq %k0, %k1, %k0
    646 ; SKX-NEXT:    kmovq %k0, %rax
    647 ; SKX-NEXT:    retq
    648   %res = icmp eq <64 x i16> %a, %b
    649   %res1 = bitcast <64 x i1> %res to i64
    650   ret i64 %res1
    651 }
    652 
    653 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
    654 ; CHECK-LABEL: test13:
    655 ; CHECK:       ## BB#0:
    656 ; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
    657 ; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
    658 ; CHECK-NEXT:    retq
    659 {
    660   %cmpvector_i = fcmp oeq <16 x float> %a, %b
    661   %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
    662   ret <16 x i32> %conv
    663 }
    664 
    665 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
    666 ; CHECK-LABEL: test14:
    667 ; CHECK:       ## BB#0:
    668 ; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm1
    669 ; CHECK-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
    670 ; CHECK-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1} {z}
    671 ; CHECK-NEXT:    retq
    672   %sub_r = sub <16 x i32> %a, %b
    673   %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
    674   %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
    675   %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
    676   %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
    677   ret <16 x i32>%res
    678 }
    679 
    680 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
    681 ; CHECK-LABEL: test15:
    682 ; CHECK:       ## BB#0:
    683 ; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm1
    684 ; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
    685 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k1} {z}
    686 ; CHECK-NEXT:    retq
    687   %sub_r = sub <8 x i64> %a, %b
    688   %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
    689   %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
    690   %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
    691   %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
    692   ret <8 x i64>%res
    693 }
    694 
    695 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
    696 ; CHECK-LABEL: test16:
    697 ; CHECK:       ## BB#0:
    698 ; CHECK-NEXT:    vpcmpled %zmm0, %zmm1, %k1
    699 ; CHECK-NEXT:    vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
    700 ; CHECK-NEXT:    retq
    701   %mask = icmp sge <16 x i32> %x, %y
    702   %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
    703   ret <16 x i32> %max
    704 }
    705 
    706 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
    707 ; CHECK-LABEL: test17:
    708 ; CHECK:       ## BB#0:
    709 ; CHECK-NEXT:    vpcmpgtd (%rdi), %zmm0, %k1
    710 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    711 ; CHECK-NEXT:    retq
    712   %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
    713   %mask = icmp sgt <16 x i32> %x, %y
    714   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    715   ret <16 x i32> %max
    716 }
    717 
    718 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
    719 ; CHECK-LABEL: test18:
    720 ; CHECK:       ## BB#0:
    721 ; CHECK-NEXT:    vpcmpled (%rdi), %zmm0, %k1
    722 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    723 ; CHECK-NEXT:    retq
    724   %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
    725   %mask = icmp sle <16 x i32> %x, %y
    726   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    727   ret <16 x i32> %max
    728 }
    729 
    730 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
    731 ; CHECK-LABEL: test19:
    732 ; CHECK:       ## BB#0:
    733 ; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1
    734 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    735 ; CHECK-NEXT:    retq
    736   %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
    737   %mask = icmp ule <16 x i32> %x, %y
    738   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    739   ret <16 x i32> %max
    740 }
    741 
    742 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
    743 ; CHECK-LABEL: test20:
    744 ; CHECK:       ## BB#0:
    745 ; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
    746 ; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
    747 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    748 ; CHECK-NEXT:    retq
    749   %mask1 = icmp eq <16 x i32> %x1, %y1
    750   %mask0 = icmp eq <16 x i32> %x, %y
    751   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
    752   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
    753   ret <16 x i32> %max
    754 }
    755 
    756 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
    757 ; CHECK-LABEL: test21:
    758 ; CHECK:       ## BB#0:
    759 ; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
    760 ; CHECK-NEXT:    vpcmpleq %zmm2, %zmm3, %k1 {%k1}
    761 ; CHECK-NEXT:    vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
    762 ; CHECK-NEXT:    retq
    763   %mask1 = icmp sge <8 x i64> %x1, %y1
    764   %mask0 = icmp sle <8 x i64> %x, %y
    765   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
    766   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
    767   ret <8 x i64> %max
    768 }
    769 
    770 define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
    771 ; CHECK-LABEL: test22:
    772 ; CHECK:       ## BB#0:
    773 ; CHECK-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1
    774 ; CHECK-NEXT:    vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
    775 ; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
    776 ; CHECK-NEXT:    retq
    777   %mask1 = icmp sgt <8 x i64> %x1, %y1
    778   %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
    779   %mask0 = icmp sgt <8 x i64> %x, %y
    780   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
    781   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
    782   ret <8 x i64> %max
    783 }
    784 
    785 define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
    786 ; CHECK-LABEL: test23:
    787 ; CHECK:       ## BB#0:
    788 ; CHECK-NEXT:    vpcmpled %zmm1, %zmm2, %k1
    789 ; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 {%k1}
    790 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    791 ; CHECK-NEXT:    retq
    792   %mask1 = icmp sge <16 x i32> %x1, %y1
    793   %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
    794   %mask0 = icmp ule <16 x i32> %x, %y
    795   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
    796   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    797   ret <16 x i32> %max
    798 }
    799 
    800 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
    801 ; CHECK-LABEL: test24:
    802 ; CHECK:       ## BB#0:
    803 ; CHECK-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k1
    804 ; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
    805 ; CHECK-NEXT:    retq
    806   %yb = load i64, i64* %yb.ptr, align 4
    807   %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
    808   %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
    809   %mask = icmp eq <8 x i64> %x, %y
    810   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
    811   ret <8 x i64> %max
    812 }
    813 
    814 define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
    815 ; CHECK-LABEL: test25:
    816 ; CHECK:       ## BB#0:
    817 ; CHECK-NEXT:    vpcmpled (%rdi){1to16}, %zmm0, %k1
    818 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    819 ; CHECK-NEXT:    retq
    820   %yb = load i32, i32* %yb.ptr, align 4
    821   %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
    822   %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
    823   %mask = icmp sle <16 x i32> %x, %y
    824   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    825   ret <16 x i32> %max
    826 }
    827 
    828 define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
    829 ; CHECK-LABEL: test26:
    830 ; CHECK:       ## BB#0:
    831 ; CHECK-NEXT:    vpcmpled %zmm1, %zmm2, %k1
    832 ; CHECK-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
    833 ; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    834 ; CHECK-NEXT:    retq
    835   %mask1 = icmp sge <16 x i32> %x1, %y1
    836   %yb = load i32, i32* %yb.ptr, align 4
    837   %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
    838   %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
    839   %mask0 = icmp sgt <16 x i32> %x, %y
    840   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
    841   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    842   ret <16 x i32> %max
    843 }
    844 
    845 define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
    846 ; CHECK-LABEL: test27:
    847 ; CHECK:       ## BB#0:
    848 ; CHECK-NEXT:    vpcmpleq %zmm1, %zmm2, %k1
    849 ; CHECK-NEXT:    vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
    850 ; CHECK-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
    851 ; CHECK-NEXT:    retq
    852   %mask1 = icmp sge <8 x i64> %x1, %y1
    853   %yb = load i64, i64* %yb.ptr, align 4
    854   %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
    855   %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
    856   %mask0 = icmp sle <8 x i64> %x, %y
    857   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
    858   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
    859   ret <8 x i64> %max
    860 }
    861 
    862 define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
    863 ; KNL-LABEL: test28:
    864 ; KNL:       ## BB#0:
    865 ; KNL-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
    866 ; KNL-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
    867 ; KNL-NEXT:    kxnorw %k1, %k0, %k1
    868 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
    869 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
    870 ; KNL-NEXT:    vpmovqd %zmm0, %ymm0
    871 ; KNL-NEXT:    retq
    872 ;
    873 ; SKX-LABEL: test28:
    874 ; SKX:       ## BB#0:
    875 ; SKX-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
    876 ; SKX-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
    877 ; SKX-NEXT:    kxnorb %k1, %k0, %k0
    878 ; SKX-NEXT:    vpmovm2d %k0, %ymm0
    879 ; SKX-NEXT:    retq
    880   %x_gt_y = icmp sgt <8 x i64> %x, %y
    881   %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
    882   %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
    883   %resse = sext <8 x i1>%res to <8 x i32>
    884   ret <8 x i32> %resse
    885 }
    886 
    887 define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
    888 ; KNL-LABEL: test29:
    889 ; KNL:       ## BB#0:
    890 ; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
    891 ; KNL-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1
    892 ; KNL-NEXT:    kxorw %k1, %k0, %k1
    893 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
    894 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
    895 ; KNL-NEXT:    vpmovdb %zmm0, %xmm0
    896 ; KNL-NEXT:    retq
    897 ;
    898 ; SKX-LABEL: test29:
    899 ; SKX:       ## BB#0:
    900 ; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
    901 ; SKX-NEXT:    vpcmpgtd %zmm3, %zmm2, %k1
    902 ; SKX-NEXT:    kxorw %k1, %k0, %k0
    903 ; SKX-NEXT:    vpmovm2b %k0, %xmm0
    904 ; SKX-NEXT:    retq
    905   %x_gt_y = icmp sgt <16 x i32> %x, %y
    906   %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
    907   %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
    908   %resse = sext <16 x i1>%res to <16 x i8>
    909   ret <16 x i8> %resse
    910 }
    911 
    912 define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
    913 ; KNL-LABEL: test30:
    914 ; KNL:       ## BB#0:
    915 ; KNL-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm2
    916 ; KNL-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    917 ; KNL-NEXT:    retq
    918 ;
    919 ; SKX-LABEL: test30:
    920 ; SKX:       ## BB#0:
    921 ; SKX-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
    922 ; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
    923 ; SKX-NEXT:    retq
    924 
    925   %mask = fcmp oeq <4 x double> %x, %y
    926   %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
    927   ret <4 x double> %max
    928 }
    929 
    930 define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
    931 ; KNL-LABEL: test31:
    932 ; KNL:       ## BB#0:
    933 ; KNL-NEXT:    vcmpltpd (%rdi), %xmm0, %xmm2
    934 ; KNL-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
    935 ; KNL-NEXT:    retq
    936 ;
    937 ; SKX-LABEL: test31:
    938 ; SKX:       ## BB#0:
    939 ; SKX-NEXT:    vcmpltpd (%rdi), %xmm0, %k1
    940 ; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
    941 ; SKX-NEXT:    retq
    942 
    943   %y = load <2 x double>, <2 x double>* %yp, align 4
    944   %mask = fcmp olt <2 x double> %x, %y
    945   %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
    946   ret <2 x double> %max
    947 }
    948 
    949 define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
    950 ; KNL-LABEL: test32:
    951 ; KNL:       ## BB#0:
    952 ; KNL-NEXT:    vcmpltpd (%rdi), %ymm0, %ymm2
    953 ; KNL-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    954 ; KNL-NEXT:    retq
    955 ;
    956 ; SKX-LABEL: test32:
    957 ; SKX:       ## BB#0:
    958 ; SKX-NEXT:    vcmpltpd (%rdi), %ymm0, %k1
    959 ; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
    960 ; SKX-NEXT:    retq
    961 
    962   %y = load <4 x double>, <4 x double>* %yp, align 4
    963   %mask = fcmp ogt <4 x double> %y, %x
    964   %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
    965   ret <4 x double> %max
    966 }
    967 
    968 define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
    969 ; CHECK-LABEL: test33:
    970 ; CHECK:       ## BB#0:
    971 ; CHECK-NEXT:    vcmpltpd (%rdi), %zmm0, %k1
    972 ; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
    973 ; CHECK-NEXT:    retq
    974   %y = load <8 x double>, <8 x double>* %yp, align 4
    975   %mask = fcmp olt <8 x double> %x, %y
    976   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
    977   ret <8 x double> %max
    978 }
    979 
    980 define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
    981 ; KNL-LABEL: test34:
    982 ; KNL:       ## BB#0:
    983 ; KNL-NEXT:    vcmpltps (%rdi), %xmm0, %xmm2
    984 ; KNL-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
    985 ; KNL-NEXT:    retq
    986 ;
    987 ; SKX-LABEL: test34:
    988 ; SKX:       ## BB#0:
    989 ; SKX-NEXT:    vcmpltps (%rdi), %xmm0, %k1
    990 ; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
    991 ; SKX-NEXT:    retq
    992   %y = load <4 x float>, <4 x float>* %yp, align 4
    993   %mask = fcmp olt <4 x float> %x, %y
    994   %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
    995   ret <4 x float> %max
    996 }
    997 
    998 define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
    999 ; KNL-LABEL: test35:
   1000 ; KNL:       ## BB#0:
   1001 ; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
   1002 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
   1003 ; KNL-NEXT:    vmovups (%rdi), %ymm2
   1004 ; KNL-NEXT:    vcmpltps %zmm2, %zmm0, %k1
   1005 ; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
   1006 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
   1007 ; KNL-NEXT:    retq
   1008 ;
   1009 ; SKX-LABEL: test35:
   1010 ; SKX:       ## BB#0:
   1011 ; SKX-NEXT:    vcmpltps (%rdi), %ymm0, %k1
   1012 ; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
   1013 ; SKX-NEXT:    retq
   1014 
   1015   %y = load <8 x float>, <8 x float>* %yp, align 4
   1016   %mask = fcmp ogt <8 x float> %y, %x
   1017   %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
   1018   ret <8 x float> %max
   1019 }
   1020 
   1021 define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
   1022 ; CHECK-LABEL: test36:
   1023 ; CHECK:       ## BB#0:
   1024 ; CHECK-NEXT:    vcmpltps (%rdi), %zmm0, %k1
   1025 ; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
   1026 ; CHECK-NEXT:    retq
   1027   %y = load <16 x float>, <16 x float>* %yp, align 4
   1028   %mask = fcmp olt <16 x float> %x, %y
   1029   %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
   1030   ret <16 x float> %max
   1031 }
   1032 
   1033 define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
   1034 ; CHECK-LABEL: test37:
   1035 ; CHECK:       ## BB#0:
   1036 ; CHECK-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1
   1037 ; CHECK-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
   1038 ; CHECK-NEXT:    retq
   1039 
   1040   %a = load double, double* %ptr
   1041   %v = insertelement <8 x double> undef, double %a, i32 0
   1042   %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
   1043 
   1044   %mask = fcmp ogt <8 x double> %shuffle, %x
   1045   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
   1046   ret <8 x double> %max
   1047 }
   1048 
   1049 define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
   1050 ; KNL-LABEL: test38:
   1051 ; KNL:       ## BB#0:
   1052 ; KNL-NEXT:    vbroadcastsd (%rdi), %ymm2
   1053 ; KNL-NEXT:    vcmpltpd %ymm2, %ymm0, %ymm2
   1054 ; KNL-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1055 ; KNL-NEXT:    retq
   1056 ;
   1057 ; SKX-LABEL: test38:
   1058 ; SKX:       ## BB#0:
   1059 ; SKX-NEXT:    vcmpltpd (%rdi){1to4}, %ymm0, %k1
   1060 ; SKX-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
   1061 ; SKX-NEXT:    retq
   1062 
   1063   %a = load double, double* %ptr
   1064   %v = insertelement <4 x double> undef, double %a, i32 0
   1065   %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
   1066 
   1067   %mask = fcmp ogt <4 x double> %shuffle, %x
   1068   %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
   1069   ret <4 x double> %max
   1070 }
   1071 
   1072 define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
   1073 ; KNL-LABEL: test39:
   1074 ; KNL:       ## BB#0:
   1075 ; KNL-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
   1076 ; KNL-NEXT:    vcmpltpd %xmm2, %xmm0, %xmm2
   1077 ; KNL-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
   1078 ; KNL-NEXT:    retq
   1079 ;
   1080 ; SKX-LABEL: test39:
   1081 ; SKX:       ## BB#0:
   1082 ; SKX-NEXT:    vcmpltpd (%rdi){1to2}, %xmm0, %k1
   1083 ; SKX-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
   1084 ; SKX-NEXT:    retq
   1085 
   1086   %a = load double, double* %ptr
   1087   %v = insertelement <2 x double> undef, double %a, i32 0
   1088   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   1089 
   1090   %mask = fcmp ogt <2 x double> %shuffle, %x
   1091   %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
   1092   ret <2 x double> %max
   1093 }
   1094 
   1095 
   1096 define <16  x float> @test40(<16  x float> %x, <16  x float> %x1, float* %ptr) nounwind {
   1097 ; CHECK-LABEL: test40:
   1098 ; CHECK:       ## BB#0:
   1099 ; CHECK-NEXT:    vcmpltps (%rdi){1to16}, %zmm0, %k1
   1100 ; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
   1101 ; CHECK-NEXT:    retq
   1102 
   1103   %a = load float, float* %ptr
   1104   %v = insertelement <16  x float> undef, float %a, i32 0
   1105   %shuffle = shufflevector <16  x float> %v, <16  x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1106 
   1107   %mask = fcmp ogt <16  x float> %shuffle, %x
   1108   %max = select <16 x i1> %mask, <16  x float> %x, <16  x float> %x1
   1109   ret <16  x float> %max
   1110 }
   1111 
   1112 define <8  x float> @test41(<8  x float> %x, <8  x float> %x1, float* %ptr) nounwind {
   1113 ; KNL-LABEL: test41:
   1114 ; KNL:       ## BB#0:
   1115 ; KNL-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
   1116 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
   1117 ; KNL-NEXT:    vbroadcastss (%rdi), %ymm2
   1118 ; KNL-NEXT:    vcmpltps %zmm2, %zmm0, %k1
   1119 ; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
   1120 ; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
   1121 ; KNL-NEXT:    retq
   1122 ;
   1123 ; SKX-LABEL: test41:
   1124 ; SKX:       ## BB#0:
   1125 ; SKX-NEXT:    vcmpltps (%rdi){1to8}, %ymm0, %k1
   1126 ; SKX-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
   1127 ; SKX-NEXT:    retq
   1128 
   1129   %a = load float, float* %ptr
   1130   %v = insertelement <8  x float> undef, float %a, i32 0
   1131   %shuffle = shufflevector <8  x float> %v, <8  x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1132 
   1133   %mask = fcmp ogt <8  x float> %shuffle, %x
   1134   %max = select <8 x i1> %mask, <8  x float> %x, <8  x float> %x1
   1135   ret <8  x float> %max
   1136 }
   1137 
   1138 define <4  x float> @test42(<4  x float> %x, <4  x float> %x1, float* %ptr) nounwind {
   1139 ; KNL-LABEL: test42:
   1140 ; KNL:       ## BB#0:
   1141 ; KNL-NEXT:    vbroadcastss (%rdi), %xmm2
   1142 ; KNL-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
   1143 ; KNL-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
   1144 ; KNL-NEXT:    retq
   1145 ;
   1146 ; SKX-LABEL: test42:
   1147 ; SKX:       ## BB#0:
   1148 ; SKX-NEXT:    vcmpltps (%rdi){1to4}, %xmm0, %k1
   1149 ; SKX-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
   1150 ; SKX-NEXT:    retq
   1151 
   1152   %a = load float, float* %ptr
   1153   %v = insertelement <4  x float> undef, float %a, i32 0
   1154   %shuffle = shufflevector <4  x float> %v, <4  x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1155 
   1156   %mask = fcmp ogt <4  x float> %shuffle, %x
   1157   %max = select <4 x i1> %mask, <4  x float> %x, <4  x float> %x1
   1158   ret <4  x float> %max
   1159 }
   1160 
   1161 define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
   1162 ; KNL-LABEL: test43:
   1163 ; KNL:       ## BB#0:
   1164 ; KNL-NEXT:    vpmovsxwq %xmm2, %zmm2
   1165 ; KNL-NEXT:    vpsllq $63, %zmm2, %zmm2
   1166 ; KNL-NEXT:    vptestmq %zmm2, %zmm2, %k1
   1167 ; KNL-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
   1168 ; KNL-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
   1169 ; KNL-NEXT:    retq
   1170 ;
   1171 ; SKX-LABEL: test43:
   1172 ; SKX:       ## BB#0:
   1173 ; SKX-NEXT:    vpsllw $15, %xmm2, %xmm2
   1174 ; SKX-NEXT:    vpmovw2m %xmm2, %k1
   1175 ; SKX-NEXT:    vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
   1176 ; SKX-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
   1177 ; SKX-NEXT:    retq
   1178 
   1179   %a = load double, double* %ptr
   1180   %v = insertelement <8 x double> undef, double %a, i32 0
   1181   %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
   1182 
   1183   %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
   1184   %mask = and <8 x i1> %mask_cmp, %mask_in
   1185   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
   1186   ret <8 x double> %max
   1187 }
   1188 
   1189 define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
   1190 ; KNL-LABEL: test44:
   1191 ; KNL:       ## BB#0:
   1192 ; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1193 ; KNL-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
   1194 ; KNL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
   1195 ; KNL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
   1196 ; KNL-NEXT:    retq
   1197 ;
   1198 ; SKX-LABEL: test44:
   1199 ; SKX:       ## BB#0:
   1200 ; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
   1201 ; SKX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
   1202 ; SKX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
   1203 ; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
   1204 ; SKX-NEXT:    vpmovm2d %k0, %xmm0
   1205 ; SKX-NEXT:    retq
   1206   %mask = icmp eq <4 x i16> %x, %y
   1207   %1 = sext <4 x i1> %mask to <4 x i32>
   1208   ret <4 x i32> %1
   1209 }
   1210 
   1211 define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
   1212 ; KNL-LABEL: test45:
   1213 ; KNL:       ## BB#0:
   1214 ; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
   1215 ; KNL-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
   1216 ; KNL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
   1217 ; KNL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
   1218 ; KNL-NEXT:    vpsrlq $63, %xmm0, %xmm0
   1219 ; KNL-NEXT:    retq
   1220 ;
   1221 ; SKX-LABEL: test45:
   1222 ; SKX:       ## BB#0:
   1223 ; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
   1224 ; SKX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
   1225 ; SKX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
   1226 ; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k1
   1227 ; SKX-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
   1228 ; SKX-NEXT:    retq
   1229   %mask = icmp eq <2 x i16> %x, %y
   1230   %1 = zext <2 x i1> %mask to <2 x i64>
   1231   ret <2 x i64> %1
   1232 }
   1233 
   1234 define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
   1235 ; KNL-LABEL: test46:
   1236 ; KNL:       ## BB#0:
   1237 ; KNL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
   1238 ; KNL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1239 ; KNL-NEXT:    vpsllq $32, %xmm0, %xmm0
   1240 ; KNL-NEXT:    vpsrad $31, %xmm0, %xmm1
   1241 ; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
   1242 ; KNL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
   1243 ; KNL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
   1244 ; KNL-NEXT:    retq
   1245 ;
   1246 ; SKX-LABEL: test46:
   1247 ; SKX:       ## BB#0:
   1248 ; SKX-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
   1249 ; SKX-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
   1250 ; SKX-NEXT:    retq
   1251   %mask = fcmp oeq <2 x float> %x, %y
   1252   %1 = zext <2 x i1> %mask to <2 x i64>
   1253   ret <2 x i64> %1
   1254 }
   1255