Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
      3 
      4 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
      5 ; KNL-LABEL: test1:
      6 ; KNL:       ## BB#0:
      7 ; KNL-NEXT:    vcmpleps %zmm1, %zmm0, %k1
      8 ; KNL-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
      9 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
     10 ; KNL-NEXT:    retq
     11   %mask = fcmp ole <16 x float> %x, %y
     12   %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
     13   ret <16 x float> %max
     14 }
     15 
     16 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
     17 ; KNL-LABEL: test2:
     18 ; KNL:       ## BB#0:
     19 ; KNL-NEXT:    vcmplepd %zmm1, %zmm0, %k1
     20 ; KNL-NEXT:    vmovapd %zmm0, %zmm1 {%k1}
     21 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
     22 ; KNL-NEXT:    retq
     23   %mask = fcmp ole <8 x double> %x, %y
     24   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
     25   ret <8 x double> %max
     26 }
     27 
     28 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
     29 ; KNL-LABEL: test3:
     30 ; KNL:       ## BB#0:
     31 ; KNL-NEXT:    vpcmpeqd (%rdi), %zmm0, %k1
     32 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
     33 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
     34 ; KNL-NEXT:    retq
     35   %y = load <16 x i32>, <16 x i32>* %yp, align 4
     36   %mask = icmp eq <16 x i32> %x, %y
     37   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
     38   ret <16 x i32> %max
     39 }
     40 
     41 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
     42 ; KNL-LABEL: test4_unsigned:
     43 ; KNL:       ## BB#0:
     44 ; KNL-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1
     45 ; KNL-NEXT:    vmovdqa32 %zmm2, %zmm1 {%k1}
     46 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
     47 ; KNL-NEXT:    retq
     48   %mask = icmp uge <16 x i32> %x, %y
     49   %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
     50   ret <16 x i32> %max
     51 }
     52 
     53 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
     54 ; KNL-LABEL: test5:
     55 ; KNL:       ## BB#0:
     56 ; KNL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
     57 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
     58 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
     59 ; KNL-NEXT:    retq
     60   %mask = icmp eq <8 x i64> %x, %y
     61   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
     62   ret <8 x i64> %max
     63 }
     64 
     65 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
     66 ; KNL-LABEL: test6_unsigned:
     67 ; KNL:       ## BB#0:
     68 ; KNL-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1
     69 ; KNL-NEXT:    vmovdqa64 %zmm2, %zmm1 {%k1}
     70 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
     71 ; KNL-NEXT:    retq
     72   %mask = icmp ugt <8 x i64> %x, %y
     73   %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
     74   ret <8 x i64> %max
     75 }
     76 
     77 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
     78 ; KNL-LABEL: test7:
     79 ; KNL:       ## BB#0:
     80 ; KNL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
     81 ; KNL-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
     82 ; KNL-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
     83 ; KNL-NEXT:    retq
     84 ; SKX-LABEL: test7:
     85 ; SKX:       ## BB#0:
     86 ; SKX:    vxorps   %xmm2, %xmm2, %xmm2
     87 ; SKX:    vcmpltps %xmm2, %xmm0, %k1 
     88 ; SKX:    vmovaps  %xmm0, %xmm1 {%k1}
     89 ; SKX:    vmovaps  %zmm1, %zmm0
     90 ; SKX:    retq
     91 
     92   %mask = fcmp olt <4 x float> %a, zeroinitializer
     93   %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
     94   ret <4 x float>%c
     95 }
     96 
     97 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
     98 ; KNL-LABEL: test8:
     99 ; KNL:       ## BB#0:
    100 ; KNL-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    101 ; KNL-NEXT:    vcmpltpd %xmm2, %xmm0, %xmm2
    102 ; KNL-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
    103 ; KNL-NEXT:    retq
    104 ; SKX-LABEL: test8:
    105 ; SKX:       ## BB#0:
    106 ; SKX: vxorpd  %xmm2, %xmm2, %xmm2
    107 ; SKX: vcmpltpd    %xmm2, %xmm0, %k1 
    108 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
    109 ; SKX: vmovaps %zmm1, %zmm0
    110 ; SKX: retq
    111   %mask = fcmp olt <2 x double> %a, zeroinitializer
    112   %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
    113   ret <2 x double>%c
    114 }
    115 
    116 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
    117 ; KNL-LABEL: test9:
    118 ; KNL:       ## BB#0:
    119 ; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
    120 ; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
    121 ; KNL-NEXT:    retq
    122   %mask = icmp eq <8 x i32> %x, %y
    123   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
    124   ret <8 x i32> %max
    125 }
    126 
    127 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
    128 ; KNL-LABEL: test10:
    129 ; KNL:       ## BB#0:
    130 ; KNL-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
    131 ; KNL-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
    132 ; KNL-NEXT:    retq
    133 ; SKX-LABEL: test10:
    134 ; SKX:       ## BB#0:
    135 ; SKX: vcmpeqps    %ymm1, %ymm0, %k1 
    136 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
    137 ; SKX: vmovaps %zmm1, %zmm0
    138 ; SKX: retq
    139 
    140   %mask = fcmp oeq <8 x float> %x, %y
    141   %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
    142   ret <8 x float> %max
    143 }
    144 
    145 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
    146 ; KNL-LABEL: test11_unsigned:
    147 ; KNL:       ## BB#0:
    148 ; KNL-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
    149 ; KNL-NEXT:    retq
    150   %mask = icmp ugt <8 x i32> %x, %y
    151   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
    152   ret <8 x i32> %max
    153 }
    154 
    155 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
    156 ; KNL-LABEL: test12:
    157 ; KNL:       ## BB#0:
    158 ; KNL-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0
    159 ; KNL-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
    160 ; KNL-NEXT:    kunpckbw %k0, %k1, %k0
    161 ; KNL-NEXT:    kmovw %k0, %eax
    162 ; KNL-NEXT:    retq
    163   %res = icmp eq <16 x i64> %a, %b
    164   %res1 = bitcast <16 x i1> %res to i16
    165   ret i16 %res1
    166 }
    167 
    168 define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
    169 ; SKX-LABEL: test12_v32i32:
    170 ; SKX:       ## BB#0:
    171 ; SKX-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
    172 ; SKX-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
    173 ; SKX-NEXT:    kunpckwd %k0, %k1, %k0
    174 ; SKX-NEXT:    kmovd %k0, %eax
    175 ; SKX-NEXT:    retq
    176   %res = icmp eq <32 x i32> %a, %b
    177   %res1 = bitcast <32 x i1> %res to i32
    178   ret i32 %res1
    179 }
    180 
    181 define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
    182 ; SKX-LABEL: test12_v64i16:
    183 ; SKX:       ## BB#0:
    184 ; SKX-NEXT:    vpcmpeqw %zmm2, %zmm0, %k0
    185 ; SKX-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
    186 ; SKX-NEXT:    kunpckdq %k0, %k1, %k0
    187 ; SKX-NEXT:    kmovq %k0, %rax
    188 ; SKX-NEXT:    retq
    189   %res = icmp eq <64 x i16> %a, %b
    190   %res1 = bitcast <64 x i1> %res to i64
    191   ret i64 %res1
    192 }
    193 
    194 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
    195 ; KNL-LABEL: test13:
    196 ; KNL:       ## BB#0:
    197 ; KNL-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
    198 ; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
    199 ; KNL-NEXT:    retq
    200 {
    201   %cmpvector_i = fcmp oeq <16 x float> %a, %b
    202   %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
    203   ret <16 x i32> %conv
    204 }
    205 
    206 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
    207 ; KNL-LABEL: test14:
    208 ; KNL:       ## BB#0:
    209 ; KNL-NEXT:    vpsubd %zmm1, %zmm0, %zmm1
    210 ; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
    211 ; KNL-NEXT:    knotw %k0, %k0
    212 ; KNL-NEXT:    knotw %k0, %k1
    213 ; KNL-NEXT:    vmovdqu32 %zmm1, %zmm0 {%k1} {z}
    214 ; KNL-NEXT:    retq
    215   %sub_r = sub <16 x i32> %a, %b
    216   %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
    217   %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
    218   %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
    219   %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
    220   ret <16 x i32>%res
    221 }
    222 
    223 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
    224 ; KNL-LABEL: test15:
    225 ; KNL:       ## BB#0:
    226 ; KNL-NEXT:    vpsubq %zmm1, %zmm0, %zmm1
    227 ; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
    228 ; KNL-NEXT:    knotw %k0, %k0
    229 ; KNL-NEXT:    knotw %k0, %k1
    230 ; KNL-NEXT:    vmovdqu64 %zmm1, %zmm0 {%k1} {z}
    231 ; KNL-NEXT:    retq
    232   %sub_r = sub <8 x i64> %a, %b
    233   %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
    234   %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
    235   %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
    236   %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
    237   ret <8 x i64>%res
    238 }
    239 
    240 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
    241 ; KNL-LABEL: test16:
    242 ; KNL:       ## BB#0:
    243 ; KNL-NEXT:    vpcmpled %zmm0, %zmm1, %k1
    244 ; KNL-NEXT:    vmovdqa32 %zmm2, %zmm1 {%k1}
    245 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    246 ; KNL-NEXT:    retq
    247   %mask = icmp sge <16 x i32> %x, %y
    248   %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
    249   ret <16 x i32> %max
    250 }
    251 
    252 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
    253 ; KNL-LABEL: test17:
    254 ; KNL:       ## BB#0:
    255 ; KNL-NEXT:    vpcmpgtd (%rdi), %zmm0, %k1
    256 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
    257 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    258 ; KNL-NEXT:    retq
    259   %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
    260   %mask = icmp sgt <16 x i32> %x, %y
    261   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    262   ret <16 x i32> %max
    263 }
    264 
    265 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
    266 ; KNL-LABEL: test18:
    267 ; KNL:       ## BB#0:
    268 ; KNL-NEXT:    vpcmpled (%rdi), %zmm0, %k1
    269 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
    270 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    271 ; KNL-NEXT:    retq
    272   %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
    273   %mask = icmp sle <16 x i32> %x, %y
    274   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    275   ret <16 x i32> %max
    276 }
    277 
    278 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
    279 ; KNL-LABEL: test19:
    280 ; KNL:       ## BB#0:
    281 ; KNL-NEXT:    vpcmpleud (%rdi), %zmm0, %k1
    282 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
    283 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    284 ; KNL-NEXT:    retq
    285   %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
    286   %mask = icmp ule <16 x i32> %x, %y
    287   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    288   ret <16 x i32> %max
    289 }
    290 
    291 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
    292 ; KNL-LABEL: test20:
    293 ; KNL:       ## BB#0:
    294 ; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
    295 ; KNL-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
    296 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
    297 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    298 ; KNL-NEXT:    retq
    299   %mask1 = icmp eq <16 x i32> %x1, %y1
    300   %mask0 = icmp eq <16 x i32> %x, %y
    301   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
    302   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
    303   ret <16 x i32> %max
    304 }
    305 
    306 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
    307 ; KNL-LABEL: test21:
    308 ; KNL:       ## BB#0:
    309 ; KNL-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
    310 ; KNL-NEXT:    vpcmpleq %zmm2, %zmm3, %k1 {%k1}
    311 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm2 {%k1}
    312 ; KNL-NEXT:    vmovaps %zmm2, %zmm0
    313 ; KNL-NEXT:    retq
    314   %mask1 = icmp sge <8 x i64> %x1, %y1
    315   %mask0 = icmp sle <8 x i64> %x, %y
    316   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
    317   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
    318   ret <8 x i64> %max
    319 }
    320 
    321 define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
    322 ; KNL-LABEL: test22:
    323 ; KNL:       ## BB#0:
    324 ; KNL-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1
    325 ; KNL-NEXT:    vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
    326 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
    327 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    328 ; KNL-NEXT:    retq
    329   %mask1 = icmp sgt <8 x i64> %x1, %y1
    330   %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
    331   %mask0 = icmp sgt <8 x i64> %x, %y
    332   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
    333   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
    334   ret <8 x i64> %max
    335 }
    336 
    337 define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
    338 ; KNL-LABEL: test23:
    339 ; KNL:       ## BB#0:
    340 ; KNL-NEXT:    vpcmpled %zmm1, %zmm2, %k1
    341 ; KNL-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 {%k1}
    342 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
    343 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    344 ; KNL-NEXT:    retq
    345   %mask1 = icmp sge <16 x i32> %x1, %y1
    346   %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
    347   %mask0 = icmp ule <16 x i32> %x, %y
    348   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
    349   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    350   ret <16 x i32> %max
    351 }
    352 
    353 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
    354 ; KNL-LABEL: test24:
    355 ; KNL:       ## BB#0:
    356 ; KNL-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k1
    357 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
    358 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    359 ; KNL-NEXT:    retq
    360   %yb = load i64, i64* %yb.ptr, align 4
    361   %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
    362   %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
    363   %mask = icmp eq <8 x i64> %x, %y
    364   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
    365   ret <8 x i64> %max
    366 }
    367 
    368 define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
    369 ; KNL-LABEL: test25:
    370 ; KNL:       ## BB#0:
    371 ; KNL-NEXT:    vpcmpled (%rdi){1to16}, %zmm0, %k1
    372 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
    373 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    374 ; KNL-NEXT:    retq
    375   %yb = load i32, i32* %yb.ptr, align 4
    376   %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
    377   %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
    378   %mask = icmp sle <16 x i32> %x, %y
    379   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    380   ret <16 x i32> %max
    381 }
    382 
    383 define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
    384 ; KNL-LABEL: test26:
    385 ; KNL:       ## BB#0:
    386 ; KNL-NEXT:    vpcmpled %zmm1, %zmm2, %k1
    387 ; KNL-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
    388 ; KNL-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
    389 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    390 ; KNL-NEXT:    retq
    391   %mask1 = icmp sge <16 x i32> %x1, %y1
    392   %yb = load i32, i32* %yb.ptr, align 4
    393   %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
    394   %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
    395   %mask0 = icmp sgt <16 x i32> %x, %y
    396   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
    397   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
    398   ret <16 x i32> %max
    399 }
    400 
    401 define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
    402 ; KNL-LABEL: test27:
    403 ; KNL:       ## BB#0:
    404 ; KNL-NEXT:    vpcmpleq        %zmm1, %zmm2, %k1
    405 ; KNL-NEXT:    vpcmpleq        (%rdi){1to8}, %zmm0, %k1 {%k1}
    406 ; KNL-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
    407 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
    408 ; KNL-NEXT:    retq
    409   %mask1 = icmp sge <8 x i64> %x1, %y1
    410   %yb = load i64, i64* %yb.ptr, align 4
    411   %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
    412   %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
    413   %mask0 = icmp sle <8 x i64> %x, %y
    414   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
    415   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
    416   ret <8 x i64> %max
    417 }
    418 
    419 ; KNL-LABEL: test28
    420 ; KNL: vpcmpgtq
    421 ; KNL: vpcmpgtq
    422 ; KNL: kxnorw
    423 define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
    424   %x_gt_y = icmp sgt <8 x i64> %x, %y
    425   %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
    426   %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
    427   %resse = sext <8 x i1>%res to <8 x i32>
    428   ret <8 x i32> %resse
    429 }
    430 
    431 ; KNL-LABEL: test29
    432 ; KNL: vpcmpgtd
    433 ; KNL: vpcmpgtd
    434 ; KNL: kxorw
    435 define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
    436   %x_gt_y = icmp sgt <16 x i32> %x, %y
    437   %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
    438   %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
    439   %resse = sext <16 x i1>%res to <16 x i8>
    440   ret <16 x i8> %resse
    441 }
    442 
    443 define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
    444 ; SKX-LABEL: test30:
    445 ; SKX: vcmpeqpd   %ymm1, %ymm0, %k1 
    446 ; SKX: vmovapd    %ymm0, %ymm1 {%k1}
    447 
    448   %mask = fcmp oeq <4 x double> %x, %y
    449   %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
    450   ret <4 x double> %max
    451 }
    452 
    453 define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
    454 ; SKX-LABEL: test31:     
    455 ; SKX: vcmpltpd        (%rdi), %xmm0, %k1 
    456 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
    457 
    458   %y = load <2 x double>, <2 x double>* %yp, align 4
    459   %mask = fcmp olt <2 x double> %x, %y
    460   %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
    461   ret <2 x double> %max
    462 }
    463 
    464 define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
    465 ; SKX-LABEL: test32:
    466 ; SKX: vcmpltpd        (%rdi), %ymm0, %k1 
    467 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
    468 
    469   %y = load <4 x double>, <4 x double>* %yp, align 4
    470   %mask = fcmp ogt <4 x double> %y, %x
    471   %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
    472   ret <4 x double> %max
    473 }
    474 
    475 define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
    476 ; SKX-LABEL: test33:     
    477 ; SKX: vcmpltpd        (%rdi), %zmm0, %k1 
    478 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
    479   %y = load <8 x double>, <8 x double>* %yp, align 4
    480   %mask = fcmp olt <8 x double> %x, %y
    481   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
    482   ret <8 x double> %max
    483 }
    484 
    485 define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
    486 ; SKX-LABEL: test34:     
    487 ; SKX: vcmpltps        (%rdi), %xmm0, %k1 
    488 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
    489   %y = load <4 x float>, <4 x float>* %yp, align 4
    490   %mask = fcmp olt <4 x float> %x, %y
    491   %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
    492   ret <4 x float> %max
    493 }
    494 
    495 define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
    496 ; SKX-LABEL: test35:
    497 ; SKX: vcmpltps        (%rdi), %ymm0, %k1 
    498 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
    499 
    500   %y = load <8 x float>, <8 x float>* %yp, align 4
    501   %mask = fcmp ogt <8 x float> %y, %x
    502   %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
    503   ret <8 x float> %max
    504 }
    505 
    506 define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
    507 ; SKX-LABEL: test36:     
    508 ; SKX: vcmpltps        (%rdi), %zmm0, %k1 
    509 ; SKX: vmovaps %zmm0, %zmm1 {%k1}
    510   %y = load <16 x float>, <16 x float>* %yp, align 4
    511   %mask = fcmp olt <16 x float> %x, %y
    512   %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
    513   ret <16 x float> %max
    514 }
    515 
    516 define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
    517 ; SKX-LABEL: test37:                                
    518 ; SKX: vcmpltpd  (%rdi){1to8}, %zmm0, %k1 
    519 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
    520 
    521   %a = load double, double* %ptr
    522   %v = insertelement <8 x double> undef, double %a, i32 0
    523   %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
    524 
    525   %mask = fcmp ogt <8 x double> %shuffle, %x
    526   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
    527   ret <8 x double> %max
    528 }
    529 
    530 define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
    531 ; SKX-LABEL: test38:                                
    532 ; SKX: vcmpltpd  (%rdi){1to4}, %ymm0, %k1 
    533 ; SKX: vmovapd %ymm0, %ymm1 {%k1}
    534 
    535   %a = load double, double* %ptr
    536   %v = insertelement <4 x double> undef, double %a, i32 0
    537   %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
    538   
    539   %mask = fcmp ogt <4 x double> %shuffle, %x
    540   %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
    541   ret <4 x double> %max
    542 }
    543 
    544 define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
    545 ; SKX-LABEL: test39:                                
    546 ; SKX: vcmpltpd  (%rdi){1to2}, %xmm0, %k1 
    547 ; SKX: vmovapd %xmm0, %xmm1 {%k1}
    548 
    549   %a = load double, double* %ptr
    550   %v = insertelement <2 x double> undef, double %a, i32 0
    551   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
    552   
    553   %mask = fcmp ogt <2 x double> %shuffle, %x
    554   %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
    555   ret <2 x double> %max
    556 }
    557 
    558 
    559 define <16  x float> @test40(<16  x float> %x, <16  x float> %x1, float* %ptr) nounwind {
    560 ; SKX-LABEL: test40:                                
    561 ; SKX: vcmpltps  (%rdi){1to16}, %zmm0, %k1 
    562 ; SKX: vmovaps %zmm0, %zmm1 {%k1}
    563 
    564   %a = load float, float* %ptr
    565   %v = insertelement <16  x float> undef, float %a, i32 0
    566   %shuffle = shufflevector <16  x float> %v, <16  x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    567   
    568   %mask = fcmp ogt <16  x float> %shuffle, %x
    569   %max = select <16 x i1> %mask, <16  x float> %x, <16  x float> %x1
    570   ret <16  x float> %max
    571 }
    572 
    573 define <8  x float> @test41(<8  x float> %x, <8  x float> %x1, float* %ptr) nounwind {
    574 ; SKX-LABEL: test41:                                
    575 ; SKX: vcmpltps  (%rdi){1to8}, %ymm0, %k1 
    576 ; SKX: vmovaps %ymm0, %ymm1 {%k1}
    577 
    578   %a = load float, float* %ptr
    579   %v = insertelement <8  x float> undef, float %a, i32 0
    580   %shuffle = shufflevector <8  x float> %v, <8  x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    581   
    582   %mask = fcmp ogt <8  x float> %shuffle, %x
    583   %max = select <8 x i1> %mask, <8  x float> %x, <8  x float> %x1
    584   ret <8  x float> %max
    585 }
    586 
    587 define <4  x float> @test42(<4  x float> %x, <4  x float> %x1, float* %ptr) nounwind {
    588 ; SKX-LABEL: test42:                                
    589 ; SKX: vcmpltps  (%rdi){1to4}, %xmm0, %k1 
    590 ; SKX: vmovaps %xmm0, %xmm1 {%k1}
    591   
    592   %a = load float, float* %ptr
    593   %v = insertelement <4  x float> undef, float %a, i32 0
    594   %shuffle = shufflevector <4  x float> %v, <4  x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    595   
    596   %mask = fcmp ogt <4  x float> %shuffle, %x
    597   %max = select <4 x i1> %mask, <4  x float> %x, <4  x float> %x1
    598   ret <4  x float> %max
    599 }
    600 
    601 define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
    602 ; SKX-LABEL: test43:                                
    603 ; SKX: vpmovw2m  %xmm2, %k1
    604 ; SKX: vcmpltpd  (%rdi){1to8}, %zmm0, %k1 {%k1}
    605 ; SKX: vmovapd %zmm0, %zmm1 {%k1}
    606 
    607   %a = load double, double* %ptr
    608   %v = insertelement <8 x double> undef, double %a, i32 0
    609   %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
    610   
    611   %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
    612   %mask = and <8 x i1> %mask_cmp, %mask_in
    613   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
    614   ret <8 x double> %max
    615 }
    616