Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2     | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2
      3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2   | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE42
      4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx      | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1
      5 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2     | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2
      6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2   | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2
      7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE42
      8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx    | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
      9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2   | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
     10 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512
     11 
     12 ;
     13 ; 128-bit Vectors
     14 ;
     15 
     16 define i64 @test_reduce_v2i64(<2 x i64> %a0) {
     17 ; X86-SSE2-LABEL: test_reduce_v2i64:
     18 ; X86-SSE2:       ## %bb.0:
     19 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     20 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
     21 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
     22 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
     23 ; X86-SSE2-NEXT:    pxor %xmm1, %xmm2
     24 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
     25 ; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
     26 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
     27 ; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
     28 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
     29 ; X86-SSE2-NEXT:    pand %xmm5, %xmm2
     30 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
     31 ; X86-SSE2-NEXT:    por %xmm2, %xmm3
     32 ; X86-SSE2-NEXT:    pand %xmm3, %xmm0
     33 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm3
     34 ; X86-SSE2-NEXT:    por %xmm0, %xmm3
     35 ; X86-SSE2-NEXT:    movd %xmm3, %eax
     36 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
     37 ; X86-SSE2-NEXT:    movd %xmm0, %edx
     38 ; X86-SSE2-NEXT:    retl
     39 ;
     40 ; X86-SSE42-LABEL: test_reduce_v2i64:
     41 ; X86-SSE42:       ## %bb.0:
     42 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
     43 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
     44 ; X86-SSE42-NEXT:    movdqa %xmm2, %xmm0
     45 ; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
     46 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
     47 ; X86-SSE42-NEXT:    movd %xmm2, %eax
     48 ; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
     49 ; X86-SSE42-NEXT:    retl
     50 ;
     51 ; X86-AVX-LABEL: test_reduce_v2i64:
     52 ; X86-AVX:       ## %bb.0:
     53 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     54 ; X86-AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
     55 ; X86-AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
     56 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
     57 ; X86-AVX-NEXT:    vpextrd $1, %xmm0, %edx
     58 ; X86-AVX-NEXT:    retl
     59 ;
     60 ; X64-SSE2-LABEL: test_reduce_v2i64:
     61 ; X64-SSE2:       ## %bb.0:
     62 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     63 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
     64 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
     65 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
     66 ; X64-SSE2-NEXT:    pxor %xmm1, %xmm2
     67 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm4
     68 ; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
     69 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
     70 ; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
     71 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
     72 ; X64-SSE2-NEXT:    pand %xmm5, %xmm2
     73 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
     74 ; X64-SSE2-NEXT:    por %xmm2, %xmm3
     75 ; X64-SSE2-NEXT:    pand %xmm3, %xmm0
     76 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm3
     77 ; X64-SSE2-NEXT:    por %xmm0, %xmm3
     78 ; X64-SSE2-NEXT:    movq %xmm3, %rax
     79 ; X64-SSE2-NEXT:    retq
     80 ;
     81 ; X64-SSE42-LABEL: test_reduce_v2i64:
     82 ; X64-SSE42:       ## %bb.0:
     83 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
     84 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
     85 ; X64-SSE42-NEXT:    movdqa %xmm2, %xmm0
     86 ; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
     87 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
     88 ; X64-SSE42-NEXT:    movq %xmm2, %rax
     89 ; X64-SSE42-NEXT:    retq
     90 ;
     91 ; X64-AVX1-LABEL: test_reduce_v2i64:
     92 ; X64-AVX1:       ## %bb.0:
     93 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     94 ; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
     95 ; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
     96 ; X64-AVX1-NEXT:    vmovq %xmm0, %rax
     97 ; X64-AVX1-NEXT:    retq
     98 ;
     99 ; X64-AVX2-LABEL: test_reduce_v2i64:
    100 ; X64-AVX2:       ## %bb.0:
    101 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    102 ; X64-AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
    103 ; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
    104 ; X64-AVX2-NEXT:    vmovq %xmm0, %rax
    105 ; X64-AVX2-NEXT:    retq
    106 ;
    107 ; X64-AVX512-LABEL: test_reduce_v2i64:
    108 ; X64-AVX512:       ## %bb.0:
    109 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    110 ; X64-AVX512-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
    111 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
    112 ; X64-AVX512-NEXT:    retq
    113   %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
    114   %2 = icmp slt <2 x i64> %a0, %1
    115   %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
    116   %4 = extractelement <2 x i64> %3, i32 0
    117   ret i64 %4
    118 }
    119 
    120 define i32 @test_reduce_v4i32(<4 x i32> %a0) {
    121 ; X86-SSE2-LABEL: test_reduce_v4i32:
    122 ; X86-SSE2:       ## %bb.0:
    123 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    124 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
    125 ; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
    126 ; X86-SSE2-NEXT:    pand %xmm2, %xmm0
    127 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
    128 ; X86-SSE2-NEXT:    por %xmm0, %xmm2
    129 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
    130 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    131 ; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
    132 ; X86-SSE2-NEXT:    pand %xmm1, %xmm2
    133 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
    134 ; X86-SSE2-NEXT:    por %xmm2, %xmm1
    135 ; X86-SSE2-NEXT:    movd %xmm1, %eax
    136 ; X86-SSE2-NEXT:    retl
    137 ;
    138 ; X86-SSE42-LABEL: test_reduce_v4i32:
    139 ; X86-SSE42:       ## %bb.0:
    140 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    141 ; X86-SSE42-NEXT:    pminsd %xmm0, %xmm1
    142 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    143 ; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
    144 ; X86-SSE42-NEXT:    movd %xmm0, %eax
    145 ; X86-SSE42-NEXT:    retl
    146 ;
    147 ; X86-AVX-LABEL: test_reduce_v4i32:
    148 ; X86-AVX:       ## %bb.0:
    149 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    150 ; X86-AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    151 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    152 ; X86-AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    153 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
    154 ; X86-AVX-NEXT:    retl
    155 ;
    156 ; X64-SSE2-LABEL: test_reduce_v4i32:
    157 ; X64-SSE2:       ## %bb.0:
    158 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    159 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
    160 ; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
    161 ; X64-SSE2-NEXT:    pand %xmm2, %xmm0
    162 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
    163 ; X64-SSE2-NEXT:    por %xmm0, %xmm2
    164 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
    165 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    166 ; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
    167 ; X64-SSE2-NEXT:    pand %xmm1, %xmm2
    168 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
    169 ; X64-SSE2-NEXT:    por %xmm2, %xmm1
    170 ; X64-SSE2-NEXT:    movd %xmm1, %eax
    171 ; X64-SSE2-NEXT:    retq
    172 ;
    173 ; X64-SSE42-LABEL: test_reduce_v4i32:
    174 ; X64-SSE42:       ## %bb.0:
    175 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    176 ; X64-SSE42-NEXT:    pminsd %xmm0, %xmm1
    177 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    178 ; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
    179 ; X64-SSE42-NEXT:    movd %xmm0, %eax
    180 ; X64-SSE42-NEXT:    retq
    181 ;
    182 ; X64-AVX-LABEL: test_reduce_v4i32:
    183 ; X64-AVX:       ## %bb.0:
    184 ; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    185 ; X64-AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    186 ; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    187 ; X64-AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    188 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
    189 ; X64-AVX-NEXT:    retq
    190   %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
    191   %2 = icmp slt <4 x i32> %a0, %1
    192   %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
    193   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    194   %5 = icmp slt <4 x i32> %3, %4
    195   %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
    196   %7 = extractelement <4 x i32> %6, i32 0
    197   ret i32 %7
    198 }
    199 
    200 define i16 @test_reduce_v8i16(<8 x i16> %a0) {
    201 ; X86-SSE2-LABEL: test_reduce_v8i16:
    202 ; X86-SSE2:       ## %bb.0:
    203 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    204 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
    205 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    206 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
    207 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    208 ; X86-SSE2-NEXT:    psrld $16, %xmm1
    209 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
    210 ; X86-SSE2-NEXT:    movd %xmm1, %eax
    211 ; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
    212 ; X86-SSE2-NEXT:    retl
    213 ;
    214 ; X86-SSE42-LABEL: test_reduce_v8i16:
    215 ; X86-SSE42:       ## %bb.0:
    216 ; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    217 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
    218 ; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
    219 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
    220 ; X86-SSE42-NEXT:    movd %xmm0, %eax
    221 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
    222 ; X86-SSE42-NEXT:    retl
    223 ;
    224 ; X86-AVX-LABEL: test_reduce_v8i16:
    225 ; X86-AVX:       ## %bb.0:
    226 ; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    227 ; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    228 ; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
    229 ; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    230 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
    231 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
    232 ; X86-AVX-NEXT:    retl
    233 ;
    234 ; X64-SSE2-LABEL: test_reduce_v8i16:
    235 ; X64-SSE2:       ## %bb.0:
    236 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    237 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
    238 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    239 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
    240 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    241 ; X64-SSE2-NEXT:    psrld $16, %xmm1
    242 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
    243 ; X64-SSE2-NEXT:    movd %xmm1, %eax
    244 ; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
    245 ; X64-SSE2-NEXT:    retq
    246 ;
    247 ; X64-SSE42-LABEL: test_reduce_v8i16:
    248 ; X64-SSE42:       ## %bb.0:
    249 ; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    250 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
    251 ; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
    252 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
    253 ; X64-SSE42-NEXT:    movd %xmm0, %eax
    254 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
    255 ; X64-SSE42-NEXT:    retq
    256 ;
    257 ; X64-AVX-LABEL: test_reduce_v8i16:
    258 ; X64-AVX:       ## %bb.0:
    259 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    260 ; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    261 ; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
    262 ; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    263 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
    264 ; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
    265 ; X64-AVX-NEXT:    retq
    266   %1  = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    267   %2  = icmp slt <8 x i16> %a0, %1
    268   %3  = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
    269   %4  = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    270   %5  = icmp slt <8 x i16> %3, %4
    271   %6  = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
    272   %7  = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    273   %8  = icmp slt <8 x i16> %6, %7
    274   %9  = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
    275   %10 = extractelement <8 x i16> %9, i32 0
    276   ret i16 %10
    277 }
    278 
    279 define i8 @test_reduce_v16i8(<16 x i8> %a0) {
    280 ; X86-SSE2-LABEL: test_reduce_v16i8:
    281 ; X86-SSE2:       ## %bb.0:
    282 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    283 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
    284 ; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
    285 ; X86-SSE2-NEXT:    pand %xmm2, %xmm0
    286 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
    287 ; X86-SSE2-NEXT:    por %xmm0, %xmm2
    288 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
    289 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    290 ; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
    291 ; X86-SSE2-NEXT:    pand %xmm1, %xmm2
    292 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
    293 ; X86-SSE2-NEXT:    por %xmm2, %xmm1
    294 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
    295 ; X86-SSE2-NEXT:    psrld $16, %xmm0
    296 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
    297 ; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
    298 ; X86-SSE2-NEXT:    pand %xmm2, %xmm1
    299 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
    300 ; X86-SSE2-NEXT:    por %xmm1, %xmm2
    301 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
    302 ; X86-SSE2-NEXT:    psrlw $8, %xmm0
    303 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    304 ; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
    305 ; X86-SSE2-NEXT:    pand %xmm1, %xmm2
    306 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
    307 ; X86-SSE2-NEXT:    por %xmm2, %xmm1
    308 ; X86-SSE2-NEXT:    movd %xmm1, %eax
    309 ; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
    310 ; X86-SSE2-NEXT:    retl
    311 ;
    312 ; X86-SSE42-LABEL: test_reduce_v16i8:
    313 ; X86-SSE42:       ## %bb.0:
    314 ; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
    315 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
    316 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm2
    317 ; X86-SSE42-NEXT:    psrlw $8, %xmm2
    318 ; X86-SSE42-NEXT:    pminub %xmm0, %xmm2
    319 ; X86-SSE42-NEXT:    phminposuw %xmm2, %xmm0
    320 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
    321 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
    322 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
    323 ; X86-SSE42-NEXT:    retl
    324 ;
    325 ; X86-AVX-LABEL: test_reduce_v16i8:
    326 ; X86-AVX:       ## %bb.0:
    327 ; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
    328 ; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    329 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm2
    330 ; X86-AVX-NEXT:    vpminub %xmm2, %xmm0, %xmm0
    331 ; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
    332 ; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    333 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
    334 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
    335 ; X86-AVX-NEXT:    retl
    336 ;
    337 ; X64-SSE2-LABEL: test_reduce_v16i8:
    338 ; X64-SSE2:       ## %bb.0:
    339 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    340 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
    341 ; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
    342 ; X64-SSE2-NEXT:    pand %xmm2, %xmm0
    343 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
    344 ; X64-SSE2-NEXT:    por %xmm0, %xmm2
    345 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
    346 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    347 ; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
    348 ; X64-SSE2-NEXT:    pand %xmm1, %xmm2
    349 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
    350 ; X64-SSE2-NEXT:    por %xmm2, %xmm1
    351 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
    352 ; X64-SSE2-NEXT:    psrld $16, %xmm0
    353 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
    354 ; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
    355 ; X64-SSE2-NEXT:    pand %xmm2, %xmm1
    356 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
    357 ; X64-SSE2-NEXT:    por %xmm1, %xmm2
    358 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
    359 ; X64-SSE2-NEXT:    psrlw $8, %xmm0
    360 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    361 ; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
    362 ; X64-SSE2-NEXT:    pand %xmm1, %xmm2
    363 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
    364 ; X64-SSE2-NEXT:    por %xmm2, %xmm1
    365 ; X64-SSE2-NEXT:    movd %xmm1, %eax
    366 ; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
    367 ; X64-SSE2-NEXT:    retq
    368 ;
    369 ; X64-SSE42-LABEL: test_reduce_v16i8:
    370 ; X64-SSE42:       ## %bb.0:
    371 ; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
    372 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
    373 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm2
    374 ; X64-SSE42-NEXT:    psrlw $8, %xmm2
    375 ; X64-SSE42-NEXT:    pminub %xmm0, %xmm2
    376 ; X64-SSE42-NEXT:    phminposuw %xmm2, %xmm0
    377 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
    378 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
    379 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
    380 ; X64-SSE42-NEXT:    retq
    381 ;
    382 ; X64-AVX-LABEL: test_reduce_v16i8:
    383 ; X64-AVX:       ## %bb.0:
    384 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
    385 ; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    386 ; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm2
    387 ; X64-AVX-NEXT:    vpminub %xmm2, %xmm0, %xmm0
    388 ; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
    389 ; X64-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    390 ; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
    391 ; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
    392 ; X64-AVX-NEXT:    retq
    393   %1  = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    394   %2  = icmp slt <16 x i8> %a0, %1
    395   %3  = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
    396   %4  = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    397   %5  = icmp slt <16 x i8> %3, %4
    398   %6  = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
    399   %7  = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    400   %8  = icmp slt <16 x i8> %6, %7
    401   %9  = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
    402   %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    403   %11 = icmp slt <16 x i8> %9, %10
    404   %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
    405   %13 = extractelement <16 x i8> %12, i32 0
    406   ret i8 %13
    407 }
    408 
    409 ;
    410 ; 256-bit Vectors
    411 ;
    412 
    413 define i64 @test_reduce_v4i64(<4 x i64> %a0) {
    414 ; X86-SSE2-LABEL: test_reduce_v4i64:
    415 ; X86-SSE2:       ## %bb.0:
    416 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
    417 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
    418 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
    419 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
    420 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
    421 ; X86-SSE2-NEXT:    movdqa %xmm4, %xmm5
    422 ; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
    423 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
    424 ; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
    425 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
    426 ; X86-SSE2-NEXT:    pand %xmm6, %xmm3
    427 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
    428 ; X86-SSE2-NEXT:    por %xmm3, %xmm4
    429 ; X86-SSE2-NEXT:    pand %xmm4, %xmm0
    430 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
    431 ; X86-SSE2-NEXT:    por %xmm0, %xmm4
    432 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
    433 ; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
    434 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    435 ; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
    436 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
    437 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
    438 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
    439 ; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
    440 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
    441 ; X86-SSE2-NEXT:    pand %xmm5, %xmm1
    442 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
    443 ; X86-SSE2-NEXT:    por %xmm1, %xmm2
    444 ; X86-SSE2-NEXT:    pand %xmm2, %xmm4
    445 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
    446 ; X86-SSE2-NEXT:    por %xmm4, %xmm2
    447 ; X86-SSE2-NEXT:    movd %xmm2, %eax
    448 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
    449 ; X86-SSE2-NEXT:    movd %xmm0, %edx
    450 ; X86-SSE2-NEXT:    retl
    451 ;
    452 ; X86-SSE42-LABEL: test_reduce_v4i64:
    453 ; X86-SSE42:       ## %bb.0:
    454 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm2
    455 ; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
    456 ; X86-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
    457 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
    458 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
    459 ; X86-SSE42-NEXT:    movdqa %xmm2, %xmm0
    460 ; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
    461 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
    462 ; X86-SSE42-NEXT:    movd %xmm2, %eax
    463 ; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
    464 ; X86-SSE42-NEXT:    retl
    465 ;
    466 ; X86-AVX1-LABEL: test_reduce_v4i64:
    467 ; X86-AVX1:       ## %bb.0:
    468 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    469 ; X86-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
    470 ; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
    471 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
    472 ; X86-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    473 ; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    474 ; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
    475 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    476 ; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm3
    477 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
    478 ; X86-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    479 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
    480 ; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
    481 ; X86-AVX1-NEXT:    vzeroupper
    482 ; X86-AVX1-NEXT:    retl
    483 ;
    484 ; X86-AVX2-LABEL: test_reduce_v4i64:
    485 ; X86-AVX2:       ## %bb.0:
    486 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    487 ; X86-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
    488 ; X86-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    489 ; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    490 ; X86-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
    491 ; X86-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    492 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
    493 ; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
    494 ; X86-AVX2-NEXT:    vzeroupper
    495 ; X86-AVX2-NEXT:    retl
    496 ;
    497 ; X64-SSE2-LABEL: test_reduce_v4i64:
    498 ; X64-SSE2:       ## %bb.0:
    499 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
    500 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
    501 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
    502 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
    503 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
    504 ; X64-SSE2-NEXT:    movdqa %xmm4, %xmm5
    505 ; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
    506 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
    507 ; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
    508 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
    509 ; X64-SSE2-NEXT:    pand %xmm6, %xmm3
    510 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
    511 ; X64-SSE2-NEXT:    por %xmm3, %xmm4
    512 ; X64-SSE2-NEXT:    pand %xmm4, %xmm0
    513 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
    514 ; X64-SSE2-NEXT:    por %xmm0, %xmm4
    515 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
    516 ; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
    517 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    518 ; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
    519 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
    520 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
    521 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
    522 ; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
    523 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
    524 ; X64-SSE2-NEXT:    pand %xmm5, %xmm1
    525 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
    526 ; X64-SSE2-NEXT:    por %xmm1, %xmm2
    527 ; X64-SSE2-NEXT:    pand %xmm2, %xmm4
    528 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
    529 ; X64-SSE2-NEXT:    por %xmm4, %xmm2
    530 ; X64-SSE2-NEXT:    movq %xmm2, %rax
    531 ; X64-SSE2-NEXT:    retq
    532 ;
    533 ; X64-SSE42-LABEL: test_reduce_v4i64:
    534 ; X64-SSE42:       ## %bb.0:
    535 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm2
    536 ; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
    537 ; X64-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
    538 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
    539 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
    540 ; X64-SSE42-NEXT:    movdqa %xmm2, %xmm0
    541 ; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
    542 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
    543 ; X64-SSE42-NEXT:    movq %xmm2, %rax
    544 ; X64-SSE42-NEXT:    retq
    545 ;
    546 ; X64-AVX1-LABEL: test_reduce_v4i64:
    547 ; X64-AVX1:       ## %bb.0:
    548 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    549 ; X64-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
    550 ; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
    551 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
    552 ; X64-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    553 ; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    554 ; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
    555 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
    556 ; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm3
    557 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
    558 ; X64-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    559 ; X64-AVX1-NEXT:    vmovq %xmm0, %rax
    560 ; X64-AVX1-NEXT:    vzeroupper
    561 ; X64-AVX1-NEXT:    retq
    562 ;
    563 ; X64-AVX2-LABEL: test_reduce_v4i64:
    564 ; X64-AVX2:       ## %bb.0:
    565 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    566 ; X64-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
    567 ; X64-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    568 ; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    569 ; X64-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
    570 ; X64-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    571 ; X64-AVX2-NEXT:    vmovq %xmm0, %rax
    572 ; X64-AVX2-NEXT:    vzeroupper
    573 ; X64-AVX2-NEXT:    retq
    574 ;
    575 ; X64-AVX512-LABEL: test_reduce_v4i64:
    576 ; X64-AVX512:       ## %bb.0:
    577 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    578 ; X64-AVX512-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
    579 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    580 ; X64-AVX512-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
    581 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
    582 ; X64-AVX512-NEXT:    vzeroupper
    583 ; X64-AVX512-NEXT:    retq
    584   %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
    585   %2 = icmp slt <4 x i64> %a0, %1
    586   %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
    587   %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    588   %5 = icmp slt <4 x i64> %3, %4
    589   %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
    590   %7 = extractelement <4 x i64> %6, i32 0
    591   ret i64 %7
    592 }
    593 
    594 define i32 @test_reduce_v8i32(<8 x i32> %a0) {
    595 ; X86-SSE2-LABEL: test_reduce_v8i32:
    596 ; X86-SSE2:       ## %bb.0:
    597 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
    598 ; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
    599 ; X86-SSE2-NEXT:    pand %xmm2, %xmm0
    600 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
    601 ; X86-SSE2-NEXT:    por %xmm0, %xmm2
    602 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
    603 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    604 ; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
    605 ; X86-SSE2-NEXT:    pand %xmm1, %xmm2
    606 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
    607 ; X86-SSE2-NEXT:    por %xmm2, %xmm1
    608 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    609 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
    610 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
    611 ; X86-SSE2-NEXT:    pand %xmm2, %xmm1
    612 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
    613 ; X86-SSE2-NEXT:    por %xmm1, %xmm2
    614 ; X86-SSE2-NEXT:    movd %xmm2, %eax
    615 ; X86-SSE2-NEXT:    retl
    616 ;
    617 ; X86-SSE42-LABEL: test_reduce_v8i32:
    618 ; X86-SSE42:       ## %bb.0:
    619 ; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
    620 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    621 ; X86-SSE42-NEXT:    pminsd %xmm0, %xmm1
    622 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    623 ; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
    624 ; X86-SSE42-NEXT:    movd %xmm0, %eax
    625 ; X86-SSE42-NEXT:    retl
    626 ;
    627 ; X86-AVX1-LABEL: test_reduce_v8i32:
    628 ; X86-AVX1:       ## %bb.0:
    629 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    630 ; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    631 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    632 ; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    633 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    634 ; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    635 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
    636 ; X86-AVX1-NEXT:    vzeroupper
    637 ; X86-AVX1-NEXT:    retl
    638 ;
    639 ; X86-AVX2-LABEL: test_reduce_v8i32:
    640 ; X86-AVX2:       ## %bb.0:
    641 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    642 ; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    643 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    644 ; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    645 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    646 ; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    647 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
    648 ; X86-AVX2-NEXT:    vzeroupper
    649 ; X86-AVX2-NEXT:    retl
    650 ;
    651 ; X64-SSE2-LABEL: test_reduce_v8i32:
    652 ; X64-SSE2:       ## %bb.0:
    653 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
    654 ; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
    655 ; X64-SSE2-NEXT:    pand %xmm2, %xmm0
    656 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
    657 ; X64-SSE2-NEXT:    por %xmm0, %xmm2
    658 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
    659 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    660 ; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
    661 ; X64-SSE2-NEXT:    pand %xmm1, %xmm2
    662 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
    663 ; X64-SSE2-NEXT:    por %xmm2, %xmm1
    664 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    665 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
    666 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
    667 ; X64-SSE2-NEXT:    pand %xmm2, %xmm1
    668 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
    669 ; X64-SSE2-NEXT:    por %xmm1, %xmm2
    670 ; X64-SSE2-NEXT:    movd %xmm2, %eax
    671 ; X64-SSE2-NEXT:    retq
    672 ;
    673 ; X64-SSE42-LABEL: test_reduce_v8i32:
    674 ; X64-SSE42:       ## %bb.0:
    675 ; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
    676 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    677 ; X64-SSE42-NEXT:    pminsd %xmm0, %xmm1
    678 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    679 ; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
    680 ; X64-SSE42-NEXT:    movd %xmm0, %eax
    681 ; X64-SSE42-NEXT:    retq
    682 ;
    683 ; X64-AVX1-LABEL: test_reduce_v8i32:
    684 ; X64-AVX1:       ## %bb.0:
    685 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    686 ; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    687 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    688 ; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    689 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    690 ; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
    691 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
    692 ; X64-AVX1-NEXT:    vzeroupper
    693 ; X64-AVX1-NEXT:    retq
    694 ;
    695 ; X64-AVX2-LABEL: test_reduce_v8i32:
    696 ; X64-AVX2:       ## %bb.0:
    697 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    698 ; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    699 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    700 ; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    701 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    702 ; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    703 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
    704 ; X64-AVX2-NEXT:    vzeroupper
    705 ; X64-AVX2-NEXT:    retq
    706 ;
    707 ; X64-AVX512-LABEL: test_reduce_v8i32:
    708 ; X64-AVX512:       ## %bb.0:
    709 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    710 ; X64-AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    711 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    712 ; X64-AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    713 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    714 ; X64-AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    715 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
    716 ; X64-AVX512-NEXT:    vzeroupper
    717 ; X64-AVX512-NEXT:    retq
    718   %1  = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    719   %2  = icmp slt <8 x i32> %a0, %1
    720   %3  = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
    721   %4  = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    722   %5  = icmp slt <8 x i32> %3, %4
    723   %6  = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
    724   %7  = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    725   %8  = icmp slt <8 x i32> %6, %7
    726   %9  = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
    727   %10 = extractelement <8 x i32> %9, i32 0
    728   ret i32 %10
    729 }
    730 
    731 define i16 @test_reduce_v16i16(<16 x i16> %a0) {
    732 ; X86-SSE2-LABEL: test_reduce_v16i16:
    733 ; X86-SSE2:       ## %bb.0:
    734 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
    735 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    736 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
    737 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    738 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
    739 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    740 ; X86-SSE2-NEXT:    psrld $16, %xmm1
    741 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
    742 ; X86-SSE2-NEXT:    movd %xmm1, %eax
    743 ; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
    744 ; X86-SSE2-NEXT:    retl
    745 ;
    746 ; X86-SSE42-LABEL: test_reduce_v16i16:
    747 ; X86-SSE42:       ## %bb.0:
    748 ; X86-SSE42-NEXT:    pminsw %xmm1, %xmm0
    749 ; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    750 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
    751 ; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
    752 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
    753 ; X86-SSE42-NEXT:    movd %xmm0, %eax
    754 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
    755 ; X86-SSE42-NEXT:    retl
    756 ;
    757 ; X86-AVX1-LABEL: test_reduce_v16i16:
    758 ; X86-AVX1:       ## %bb.0:
    759 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    760 ; X86-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
    761 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    762 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    763 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
    764 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    765 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
    766 ; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
    767 ; X86-AVX1-NEXT:    vzeroupper
    768 ; X86-AVX1-NEXT:    retl
    769 ;
    770 ; X86-AVX2-LABEL: test_reduce_v16i16:
    771 ; X86-AVX2:       ## %bb.0:
    772 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    773 ; X86-AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
    774 ; X86-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    775 ; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    776 ; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
    777 ; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    778 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
    779 ; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
    780 ; X86-AVX2-NEXT:    vzeroupper
    781 ; X86-AVX2-NEXT:    retl
    782 ;
    783 ; X64-SSE2-LABEL: test_reduce_v16i16:
    784 ; X64-SSE2:       ## %bb.0:
    785 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
    786 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    787 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
    788 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    789 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
    790 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    791 ; X64-SSE2-NEXT:    psrld $16, %xmm1
    792 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
    793 ; X64-SSE2-NEXT:    movd %xmm1, %eax
    794 ; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
    795 ; X64-SSE2-NEXT:    retq
    796 ;
    797 ; X64-SSE42-LABEL: test_reduce_v16i16:
    798 ; X64-SSE42:       ## %bb.0:
    799 ; X64-SSE42-NEXT:    pminsw %xmm1, %xmm0
    800 ; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    801 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
    802 ; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
    803 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
    804 ; X64-SSE42-NEXT:    movd %xmm0, %eax
    805 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
    806 ; X64-SSE42-NEXT:    retq
    807 ;
    808 ; X64-AVX1-LABEL: test_reduce_v16i16:
    809 ; X64-AVX1:       ## %bb.0:
    810 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    811 ; X64-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
    812 ; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    813 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    814 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
    815 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    816 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
    817 ; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
    818 ; X64-AVX1-NEXT:    vzeroupper
    819 ; X64-AVX1-NEXT:    retq
    820 ;
    821 ; X64-AVX2-LABEL: test_reduce_v16i16:
    822 ; X64-AVX2:       ## %bb.0:
    823 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    824 ; X64-AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
    825 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    826 ; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    827 ; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
    828 ; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    829 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
    830 ; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
    831 ; X64-AVX2-NEXT:    vzeroupper
    832 ; X64-AVX2-NEXT:    retq
    833 ;
    834 ; X64-AVX512-LABEL: test_reduce_v16i16:
    835 ; X64-AVX512:       ## %bb.0:
    836 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    837 ; X64-AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
    838 ; X64-AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
    839 ; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    840 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
    841 ; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    842 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
    843 ; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
    844 ; X64-AVX512-NEXT:    vzeroupper
    845 ; X64-AVX512-NEXT:    retq
    846   %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    847   %2  = icmp slt <16 x i16> %a0, %1
    848   %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
    849   %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    850   %5  = icmp slt <16 x i16> %3, %4
    851   %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
    852   %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    853   %8  = icmp slt <16 x i16> %6, %7
    854   %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
    855   %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    856   %11 = icmp slt <16 x i16> %9, %10
    857   %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
    858   %13 = extractelement <16 x i16> %12, i32 0
    859   ret i16 %13
    860 }
    861 
    862 define i8 @test_reduce_v32i8(<32 x i8> %a0) {
    863 ; X86-SSE2-LABEL: test_reduce_v32i8:
    864 ; X86-SSE2:       ## %bb.0:
    865 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
    866 ; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
    867 ; X86-SSE2-NEXT:    pand %xmm2, %xmm0
    868 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
    869 ; X86-SSE2-NEXT:    por %xmm0, %xmm2
    870 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
    871 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    872 ; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
    873 ; X86-SSE2-NEXT:    pand %xmm1, %xmm2
    874 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
    875 ; X86-SSE2-NEXT:    por %xmm2, %xmm1
    876 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    877 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
    878 ; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
    879 ; X86-SSE2-NEXT:    pand %xmm2, %xmm1
    880 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
    881 ; X86-SSE2-NEXT:    por %xmm1, %xmm2
    882 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
    883 ; X86-SSE2-NEXT:    psrld $16, %xmm0
    884 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    885 ; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
    886 ; X86-SSE2-NEXT:    pand %xmm1, %xmm2
    887 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
    888 ; X86-SSE2-NEXT:    por %xmm2, %xmm1
    889 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
    890 ; X86-SSE2-NEXT:    psrlw $8, %xmm0
    891 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
    892 ; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
    893 ; X86-SSE2-NEXT:    pand %xmm2, %xmm1
    894 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
    895 ; X86-SSE2-NEXT:    por %xmm1, %xmm2
    896 ; X86-SSE2-NEXT:    movd %xmm2, %eax
    897 ; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
    898 ; X86-SSE2-NEXT:    retl
    899 ;
    900 ; X86-SSE42-LABEL: test_reduce_v32i8:
    901 ; X86-SSE42:       ## %bb.0:
    902 ; X86-SSE42-NEXT:    pminsb %xmm1, %xmm0
    903 ; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
    904 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
    905 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm2
    906 ; X86-SSE42-NEXT:    psrlw $8, %xmm2
    907 ; X86-SSE42-NEXT:    pminub %xmm0, %xmm2
    908 ; X86-SSE42-NEXT:    phminposuw %xmm2, %xmm0
    909 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
    910 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
    911 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
    912 ; X86-SSE42-NEXT:    retl
    913 ;
    914 ; X86-AVX1-LABEL: test_reduce_v32i8:
    915 ; X86-AVX1:       ## %bb.0:
    916 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    917 ; X86-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
    918 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
    919 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    920 ; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
    921 ; X86-AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
    922 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
    923 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    924 ; X86-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
    925 ; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
    926 ; X86-AVX1-NEXT:    vzeroupper
    927 ; X86-AVX1-NEXT:    retl
    928 ;
    929 ; X86-AVX2-LABEL: test_reduce_v32i8:
    930 ; X86-AVX2:       ## %bb.0:
    931 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    932 ; X86-AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
    933 ; X86-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
    934 ; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    935 ; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm2
    936 ; X86-AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
    937 ; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
    938 ; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
    939 ; X86-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
    940 ; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
    941 ; X86-AVX2-NEXT:    vzeroupper
    942 ; X86-AVX2-NEXT:    retl
    943 ;
    944 ; X64-SSE2-LABEL: test_reduce_v32i8:
    945 ; X64-SSE2:       ## %bb.0:
    946 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
    947 ; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
    948 ; X64-SSE2-NEXT:    pand %xmm2, %xmm0
    949 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm2
    950 ; X64-SSE2-NEXT:    por %xmm0, %xmm2
    951 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
    952 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    953 ; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
    954 ; X64-SSE2-NEXT:    pand %xmm1, %xmm2
    955 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
    956 ; X64-SSE2-NEXT:    por %xmm2, %xmm1
    957 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    958 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
    959 ; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
    960 ; X64-SSE2-NEXT:    pand %xmm2, %xmm1
    961 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
    962 ; X64-SSE2-NEXT:    por %xmm1, %xmm2
    963 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
    964 ; X64-SSE2-NEXT:    psrld $16, %xmm0
    965 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    966 ; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
    967 ; X64-SSE2-NEXT:    pand %xmm1, %xmm2
    968 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
    969 ; X64-SSE2-NEXT:    por %xmm2, %xmm1
    970 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
    971 ; X64-SSE2-NEXT:    psrlw $8, %xmm0
    972 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
    973 ; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
    974 ; X64-SSE2-NEXT:    pand %xmm2, %xmm1
    975 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
    976 ; X64-SSE2-NEXT:    por %xmm1, %xmm2
    977 ; X64-SSE2-NEXT:    movd %xmm2, %eax
    978 ; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
    979 ; X64-SSE2-NEXT:    retq
    980 ;
    981 ; X64-SSE42-LABEL: test_reduce_v32i8:
    982 ; X64-SSE42:       ## %bb.0:
    983 ; X64-SSE42-NEXT:    pminsb %xmm1, %xmm0
    984 ; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
    985 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
    986 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm2
    987 ; X64-SSE42-NEXT:    psrlw $8, %xmm2
    988 ; X64-SSE42-NEXT:    pminub %xmm0, %xmm2
    989 ; X64-SSE42-NEXT:    phminposuw %xmm2, %xmm0
    990 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
    991 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
    992 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
    993 ; X64-SSE42-NEXT:    retq
    994 ;
    995 ; X64-AVX1-LABEL: test_reduce_v32i8:
    996 ; X64-AVX1:       ## %bb.0:
    997 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    998 ; X64-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
    999 ; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1000 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1001 ; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
   1002 ; X64-AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1003 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1004 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1005 ; X64-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
   1006 ; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
   1007 ; X64-AVX1-NEXT:    vzeroupper
   1008 ; X64-AVX1-NEXT:    retq
   1009 ;
   1010 ; X64-AVX2-LABEL: test_reduce_v32i8:
   1011 ; X64-AVX2:       ## %bb.0:
   1012 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1013 ; X64-AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
   1014 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1015 ; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1016 ; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm2
   1017 ; X64-AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1018 ; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1019 ; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1020 ; X64-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
   1021 ; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
   1022 ; X64-AVX2-NEXT:    vzeroupper
   1023 ; X64-AVX2-NEXT:    retq
   1024 ;
   1025 ; X64-AVX512-LABEL: test_reduce_v32i8:
   1026 ; X64-AVX512:       ## %bb.0:
   1027 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1028 ; X64-AVX512-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
   1029 ; X64-AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1030 ; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1031 ; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm2
   1032 ; X64-AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1033 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
   1034 ; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1035 ; X64-AVX512-NEXT:    vpextrb $0, %xmm0, %eax
   1036 ; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
   1037 ; X64-AVX512-NEXT:    vzeroupper
   1038 ; X64-AVX512-NEXT:    retq
   1039   %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1040   %2  = icmp slt <32 x i8> %a0, %1
   1041   %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
   1042   %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1043   %5  = icmp slt <32 x i8> %3, %4
   1044   %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
   1045   %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1046   %8  = icmp slt <32 x i8> %6, %7
   1047   %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
   1048   %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1049   %11 = icmp slt <32 x i8> %9, %10
   1050   %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
   1051   %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1052   %14 = icmp slt <32 x i8> %12, %13
   1053   %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
   1054   %16 = extractelement <32 x i8> %15, i32 0
   1055   ret i8 %16
   1056 }
   1057 
   1058 ;
   1059 ; 512-bit Vectors
   1060 ;
   1061 
   1062 define i64 @test_reduce_v8i64(<8 x i64> %a0) {
   1063 ; X86-SSE2-LABEL: test_reduce_v8i64:
   1064 ; X86-SSE2:       ## %bb.0:
   1065 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
   1066 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm5
   1067 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm5
   1068 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1069 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm6
   1070 ; X86-SSE2-NEXT:    movdqa %xmm6, %xmm7
   1071 ; X86-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
   1072 ; X86-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
   1073 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
   1074 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
   1075 ; X86-SSE2-NEXT:    pand %xmm5, %xmm6
   1076 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
   1077 ; X86-SSE2-NEXT:    por %xmm6, %xmm5
   1078 ; X86-SSE2-NEXT:    pand %xmm5, %xmm1
   1079 ; X86-SSE2-NEXT:    pandn %xmm3, %xmm5
   1080 ; X86-SSE2-NEXT:    por %xmm1, %xmm5
   1081 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1082 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
   1083 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
   1084 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
   1085 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1086 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
   1087 ; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
   1088 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2]
   1089 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
   1090 ; X86-SSE2-NEXT:    pand %xmm1, %xmm3
   1091 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
   1092 ; X86-SSE2-NEXT:    por %xmm3, %xmm1
   1093 ; X86-SSE2-NEXT:    pand %xmm1, %xmm0
   1094 ; X86-SSE2-NEXT:    pandn %xmm2, %xmm1
   1095 ; X86-SSE2-NEXT:    por %xmm0, %xmm1
   1096 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
   1097 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
   1098 ; X86-SSE2-NEXT:    movdqa %xmm5, %xmm2
   1099 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
   1100 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
   1101 ; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
   1102 ; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
   1103 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
   1104 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
   1105 ; X86-SSE2-NEXT:    pand %xmm0, %xmm2
   1106 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
   1107 ; X86-SSE2-NEXT:    por %xmm2, %xmm0
   1108 ; X86-SSE2-NEXT:    pand %xmm0, %xmm1
   1109 ; X86-SSE2-NEXT:    pandn %xmm5, %xmm0
   1110 ; X86-SSE2-NEXT:    por %xmm1, %xmm0
   1111 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1112 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
   1113 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
   1114 ; X86-SSE2-NEXT:    pxor %xmm1, %xmm4
   1115 ; X86-SSE2-NEXT:    movdqa %xmm4, %xmm3
   1116 ; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
   1117 ; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
   1118 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
   1119 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
   1120 ; X86-SSE2-NEXT:    pand %xmm2, %xmm4
   1121 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
   1122 ; X86-SSE2-NEXT:    por %xmm4, %xmm2
   1123 ; X86-SSE2-NEXT:    pand %xmm2, %xmm0
   1124 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
   1125 ; X86-SSE2-NEXT:    por %xmm0, %xmm2
   1126 ; X86-SSE2-NEXT:    movd %xmm2, %eax
   1127 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
   1128 ; X86-SSE2-NEXT:    movd %xmm0, %edx
   1129 ; X86-SSE2-NEXT:    retl
   1130 ;
   1131 ; X86-SSE42-LABEL: test_reduce_v8i64:
   1132 ; X86-SSE42:       ## %bb.0:
   1133 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm4
   1134 ; X86-SSE42-NEXT:    movdqa %xmm2, %xmm0
   1135 ; X86-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
   1136 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
   1137 ; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
   1138 ; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
   1139 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
   1140 ; X86-SSE42-NEXT:    movapd %xmm3, %xmm0
   1141 ; X86-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
   1142 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
   1143 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
   1144 ; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
   1145 ; X86-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
   1146 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
   1147 ; X86-SSE42-NEXT:    movd %xmm1, %eax
   1148 ; X86-SSE42-NEXT:    pextrd $1, %xmm1, %edx
   1149 ; X86-SSE42-NEXT:    retl
   1150 ;
   1151 ; X86-AVX1-LABEL: test_reduce_v8i64:
   1152 ; X86-AVX1:       ## %bb.0:
   1153 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1154 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1155 ; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
   1156 ; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
   1157 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
   1158 ; X86-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1159 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1160 ; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
   1161 ; X86-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
   1162 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
   1163 ; X86-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1164 ; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1165 ; X86-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
   1166 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1167 ; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm3
   1168 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
   1169 ; X86-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1170 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
   1171 ; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
   1172 ; X86-AVX1-NEXT:    vzeroupper
   1173 ; X86-AVX1-NEXT:    retl
   1174 ;
   1175 ; X86-AVX2-LABEL: test_reduce_v8i64:
   1176 ; X86-AVX2:       ## %bb.0:
   1177 ; X86-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
   1178 ; X86-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1179 ; X86-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1180 ; X86-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
   1181 ; X86-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1182 ; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1183 ; X86-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
   1184 ; X86-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1185 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
   1186 ; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
   1187 ; X86-AVX2-NEXT:    vzeroupper
   1188 ; X86-AVX2-NEXT:    retl
   1189 ;
   1190 ; X64-SSE2-LABEL: test_reduce_v8i64:
   1191 ; X64-SSE2:       ## %bb.0:
   1192 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
   1193 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm5
   1194 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm5
   1195 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1196 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm6
   1197 ; X64-SSE2-NEXT:    movdqa %xmm6, %xmm7
   1198 ; X64-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
   1199 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
   1200 ; X64-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
   1201 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
   1202 ; X64-SSE2-NEXT:    pand %xmm8, %xmm6
   1203 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
   1204 ; X64-SSE2-NEXT:    por %xmm6, %xmm5
   1205 ; X64-SSE2-NEXT:    pand %xmm5, %xmm1
   1206 ; X64-SSE2-NEXT:    pandn %xmm3, %xmm5
   1207 ; X64-SSE2-NEXT:    por %xmm1, %xmm5
   1208 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1209 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1210 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
   1211 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
   1212 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1213 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
   1214 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
   1215 ; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
   1216 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
   1217 ; X64-SSE2-NEXT:    pand %xmm7, %xmm1
   1218 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
   1219 ; X64-SSE2-NEXT:    por %xmm1, %xmm3
   1220 ; X64-SSE2-NEXT:    pand %xmm3, %xmm0
   1221 ; X64-SSE2-NEXT:    pandn %xmm2, %xmm3
   1222 ; X64-SSE2-NEXT:    por %xmm0, %xmm3
   1223 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
   1224 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
   1225 ; X64-SSE2-NEXT:    movdqa %xmm5, %xmm1
   1226 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1227 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
   1228 ; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
   1229 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
   1230 ; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
   1231 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
   1232 ; X64-SSE2-NEXT:    pand %xmm6, %xmm0
   1233 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
   1234 ; X64-SSE2-NEXT:    por %xmm0, %xmm1
   1235 ; X64-SSE2-NEXT:    pand %xmm1, %xmm3
   1236 ; X64-SSE2-NEXT:    pandn %xmm5, %xmm1
   1237 ; X64-SSE2-NEXT:    por %xmm3, %xmm1
   1238 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
   1239 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
   1240 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
   1241 ; X64-SSE2-NEXT:    pxor %xmm0, %xmm4
   1242 ; X64-SSE2-NEXT:    movdqa %xmm4, %xmm3
   1243 ; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
   1244 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
   1245 ; X64-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
   1246 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
   1247 ; X64-SSE2-NEXT:    pand %xmm5, %xmm2
   1248 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
   1249 ; X64-SSE2-NEXT:    por %xmm2, %xmm3
   1250 ; X64-SSE2-NEXT:    pand %xmm3, %xmm1
   1251 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
   1252 ; X64-SSE2-NEXT:    por %xmm1, %xmm3
   1253 ; X64-SSE2-NEXT:    movq %xmm3, %rax
   1254 ; X64-SSE2-NEXT:    retq
   1255 ;
   1256 ; X64-SSE42-LABEL: test_reduce_v8i64:
   1257 ; X64-SSE42:       ## %bb.0:
   1258 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm4
   1259 ; X64-SSE42-NEXT:    movdqa %xmm2, %xmm0
   1260 ; X64-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
   1261 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
   1262 ; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
   1263 ; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
   1264 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
   1265 ; X64-SSE42-NEXT:    movapd %xmm3, %xmm0
   1266 ; X64-SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
   1267 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
   1268 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
   1269 ; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
   1270 ; X64-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
   1271 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
   1272 ; X64-SSE42-NEXT:    movq %xmm1, %rax
   1273 ; X64-SSE42-NEXT:    retq
   1274 ;
   1275 ; X64-AVX1-LABEL: test_reduce_v8i64:
   1276 ; X64-AVX1:       ## %bb.0:
   1277 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1278 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
   1279 ; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
   1280 ; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
   1281 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
   1282 ; X64-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1283 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1284 ; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
   1285 ; X64-AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
   1286 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
   1287 ; X64-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1288 ; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1289 ; X64-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
   1290 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1291 ; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm3
   1292 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
   1293 ; X64-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1294 ; X64-AVX1-NEXT:    vmovq %xmm0, %rax
   1295 ; X64-AVX1-NEXT:    vzeroupper
   1296 ; X64-AVX1-NEXT:    retq
   1297 ;
   1298 ; X64-AVX2-LABEL: test_reduce_v8i64:
   1299 ; X64-AVX2:       ## %bb.0:
   1300 ; X64-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
   1301 ; X64-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1302 ; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1303 ; X64-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
   1304 ; X64-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1305 ; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1306 ; X64-AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
   1307 ; X64-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1308 ; X64-AVX2-NEXT:    vmovq %xmm0, %rax
   1309 ; X64-AVX2-NEXT:    vzeroupper
   1310 ; X64-AVX2-NEXT:    retq
   1311 ;
   1312 ; X64-AVX512-LABEL: test_reduce_v8i64:
   1313 ; X64-AVX512:       ## %bb.0:
   1314 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
   1315 ; X64-AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
   1316 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1317 ; X64-AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
   1318 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1319 ; X64-AVX512-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
   1320 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
   1321 ; X64-AVX512-NEXT:    vzeroupper
   1322 ; X64-AVX512-NEXT:    retq
   1323   %1  = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
   1324   %2  = icmp slt <8 x i64> %a0, %1
   1325   %3  = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
   1326   %4  = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1327   %5  = icmp slt <8 x i64> %3, %4
   1328   %6  = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
   1329   %7  = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1330   %8  = icmp slt <8 x i64> %6, %7
   1331   %9  = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
   1332   %10 = extractelement <8 x i64> %9, i32 0
   1333   ret i64 %10
   1334 }
   1335 
   1336 define i32 @test_reduce_v16i32(<16 x i32> %a0) {
   1337 ; X86-SSE2-LABEL: test_reduce_v16i32:
   1338 ; X86-SSE2:       ## %bb.0:
   1339 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
   1340 ; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
   1341 ; X86-SSE2-NEXT:    pand %xmm4, %xmm0
   1342 ; X86-SSE2-NEXT:    pandn %xmm2, %xmm4
   1343 ; X86-SSE2-NEXT:    por %xmm0, %xmm4
   1344 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm0
   1345 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
   1346 ; X86-SSE2-NEXT:    pand %xmm0, %xmm1
   1347 ; X86-SSE2-NEXT:    pandn %xmm3, %xmm0
   1348 ; X86-SSE2-NEXT:    por %xmm1, %xmm0
   1349 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1350 ; X86-SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
   1351 ; X86-SSE2-NEXT:    pand %xmm1, %xmm4
   1352 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
   1353 ; X86-SSE2-NEXT:    por %xmm4, %xmm1
   1354 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
   1355 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
   1356 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
   1357 ; X86-SSE2-NEXT:    pand %xmm2, %xmm1
   1358 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
   1359 ; X86-SSE2-NEXT:    por %xmm1, %xmm2
   1360 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
   1361 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1362 ; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
   1363 ; X86-SSE2-NEXT:    pand %xmm1, %xmm2
   1364 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
   1365 ; X86-SSE2-NEXT:    por %xmm2, %xmm1
   1366 ; X86-SSE2-NEXT:    movd %xmm1, %eax
   1367 ; X86-SSE2-NEXT:    retl
   1368 ;
   1369 ; X86-SSE42-LABEL: test_reduce_v16i32:
   1370 ; X86-SSE42:       ## %bb.0:
   1371 ; X86-SSE42-NEXT:    pminsd %xmm3, %xmm1
   1372 ; X86-SSE42-NEXT:    pminsd %xmm2, %xmm0
   1373 ; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
   1374 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1375 ; X86-SSE42-NEXT:    pminsd %xmm0, %xmm1
   1376 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
   1377 ; X86-SSE42-NEXT:    pminsd %xmm1, %xmm0
   1378 ; X86-SSE42-NEXT:    movd %xmm0, %eax
   1379 ; X86-SSE42-NEXT:    retl
   1380 ;
   1381 ; X86-AVX1-LABEL: test_reduce_v16i32:
   1382 ; X86-AVX1:       ## %bb.0:
   1383 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1384 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1385 ; X86-AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
   1386 ; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
   1387 ; X86-AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
   1388 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1389 ; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
   1390 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1391 ; X86-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
   1392 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
   1393 ; X86-AVX1-NEXT:    vzeroupper
   1394 ; X86-AVX1-NEXT:    retl
   1395 ;
   1396 ; X86-AVX2-LABEL: test_reduce_v16i32:
   1397 ; X86-AVX2:       ## %bb.0:
   1398 ; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
   1399 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1400 ; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
   1401 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1402 ; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
   1403 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1404 ; X86-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
   1405 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
   1406 ; X86-AVX2-NEXT:    vzeroupper
   1407 ; X86-AVX2-NEXT:    retl
   1408 ;
   1409 ; X64-SSE2-LABEL: test_reduce_v16i32:
   1410 ; X64-SSE2:       ## %bb.0:
   1411 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm4
   1412 ; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm4
   1413 ; X64-SSE2-NEXT:    pand %xmm4, %xmm0
   1414 ; X64-SSE2-NEXT:    pandn %xmm2, %xmm4
   1415 ; X64-SSE2-NEXT:    por %xmm0, %xmm4
   1416 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
   1417 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
   1418 ; X64-SSE2-NEXT:    pand %xmm0, %xmm1
   1419 ; X64-SSE2-NEXT:    pandn %xmm3, %xmm0
   1420 ; X64-SSE2-NEXT:    por %xmm1, %xmm0
   1421 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1422 ; X64-SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
   1423 ; X64-SSE2-NEXT:    pand %xmm1, %xmm4
   1424 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
   1425 ; X64-SSE2-NEXT:    por %xmm4, %xmm1
   1426 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
   1427 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
   1428 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
   1429 ; X64-SSE2-NEXT:    pand %xmm2, %xmm1
   1430 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
   1431 ; X64-SSE2-NEXT:    por %xmm1, %xmm2
   1432 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
   1433 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1434 ; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
   1435 ; X64-SSE2-NEXT:    pand %xmm1, %xmm2
   1436 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
   1437 ; X64-SSE2-NEXT:    por %xmm2, %xmm1
   1438 ; X64-SSE2-NEXT:    movd %xmm1, %eax
   1439 ; X64-SSE2-NEXT:    retq
   1440 ;
   1441 ; X64-SSE42-LABEL: test_reduce_v16i32:
   1442 ; X64-SSE42:       ## %bb.0:
   1443 ; X64-SSE42-NEXT:    pminsd %xmm3, %xmm1
   1444 ; X64-SSE42-NEXT:    pminsd %xmm2, %xmm0
   1445 ; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
   1446 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1447 ; X64-SSE42-NEXT:    pminsd %xmm0, %xmm1
   1448 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
   1449 ; X64-SSE42-NEXT:    pminsd %xmm1, %xmm0
   1450 ; X64-SSE42-NEXT:    movd %xmm0, %eax
   1451 ; X64-SSE42-NEXT:    retq
   1452 ;
   1453 ; X64-AVX1-LABEL: test_reduce_v16i32:
   1454 ; X64-AVX1:       ## %bb.0:
   1455 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1456 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1457 ; X64-AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
   1458 ; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
   1459 ; X64-AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
   1460 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1461 ; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
   1462 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1463 ; X64-AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
   1464 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
   1465 ; X64-AVX1-NEXT:    vzeroupper
   1466 ; X64-AVX1-NEXT:    retq
   1467 ;
   1468 ; X64-AVX2-LABEL: test_reduce_v16i32:
   1469 ; X64-AVX2:       ## %bb.0:
   1470 ; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
   1471 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1472 ; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
   1473 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1474 ; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
   1475 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1476 ; X64-AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
   1477 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
   1478 ; X64-AVX2-NEXT:    vzeroupper
   1479 ; X64-AVX2-NEXT:    retq
   1480 ;
   1481 ; X64-AVX512-LABEL: test_reduce_v16i32:
   1482 ; X64-AVX512:       ## %bb.0:
   1483 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
   1484 ; X64-AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
   1485 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1486 ; X64-AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
   1487 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1488 ; X64-AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
   1489 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1490 ; X64-AVX512-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
   1491 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
   1492 ; X64-AVX512-NEXT:    vzeroupper
   1493 ; X64-AVX512-NEXT:    retq
   1494   %1  = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1495   %2  = icmp slt <16 x i32> %a0, %1
   1496   %3  = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
   1497   %4  = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1498   %5  = icmp slt <16 x i32> %3, %4
   1499   %6  = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
   1500   %7  = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1501   %8  = icmp slt <16 x i32> %6, %7
   1502   %9  = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
   1503   %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1504   %11 = icmp slt <16 x i32> %9, %10
   1505   %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
   1506   %13 = extractelement <16 x i32> %12, i32 0
   1507   ret i32 %13
   1508 }
   1509 
   1510 define i16 @test_reduce_v32i16(<32 x i16> %a0) {
   1511 ; X86-SSE2-LABEL: test_reduce_v32i16:
   1512 ; X86-SSE2:       ## %bb.0:
   1513 ; X86-SSE2-NEXT:    pminsw %xmm3, %xmm1
   1514 ; X86-SSE2-NEXT:    pminsw %xmm2, %xmm0
   1515 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
   1516 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1517 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
   1518 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
   1519 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
   1520 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1521 ; X86-SSE2-NEXT:    psrld $16, %xmm1
   1522 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
   1523 ; X86-SSE2-NEXT:    movd %xmm1, %eax
   1524 ; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
   1525 ; X86-SSE2-NEXT:    retl
   1526 ;
   1527 ; X86-SSE42-LABEL: test_reduce_v32i16:
   1528 ; X86-SSE42:       ## %bb.0:
   1529 ; X86-SSE42-NEXT:    pminsw %xmm3, %xmm1
   1530 ; X86-SSE42-NEXT:    pminsw %xmm2, %xmm0
   1531 ; X86-SSE42-NEXT:    pminsw %xmm1, %xmm0
   1532 ; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
   1533 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
   1534 ; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
   1535 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
   1536 ; X86-SSE42-NEXT:    movd %xmm0, %eax
   1537 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
   1538 ; X86-SSE42-NEXT:    retl
   1539 ;
   1540 ; X86-AVX1-LABEL: test_reduce_v32i16:
   1541 ; X86-AVX1:       ## %bb.0:
   1542 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1543 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1544 ; X86-AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
   1545 ; X86-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
   1546 ; X86-AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
   1547 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
   1548 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1549 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1550 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1551 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
   1552 ; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
   1553 ; X86-AVX1-NEXT:    vzeroupper
   1554 ; X86-AVX1-NEXT:    retl
   1555 ;
   1556 ; X86-AVX2-LABEL: test_reduce_v32i16:
   1557 ; X86-AVX2:       ## %bb.0:
   1558 ; X86-AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
   1559 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1560 ; X86-AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
   1561 ; X86-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
   1562 ; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1563 ; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1564 ; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1565 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
   1566 ; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
   1567 ; X86-AVX2-NEXT:    vzeroupper
   1568 ; X86-AVX2-NEXT:    retl
   1569 ;
   1570 ; X64-SSE2-LABEL: test_reduce_v32i16:
   1571 ; X64-SSE2:       ## %bb.0:
   1572 ; X64-SSE2-NEXT:    pminsw %xmm3, %xmm1
   1573 ; X64-SSE2-NEXT:    pminsw %xmm2, %xmm0
   1574 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
   1575 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1576 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
   1577 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
   1578 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
   1579 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1580 ; X64-SSE2-NEXT:    psrld $16, %xmm1
   1581 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
   1582 ; X64-SSE2-NEXT:    movd %xmm1, %eax
   1583 ; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
   1584 ; X64-SSE2-NEXT:    retq
   1585 ;
   1586 ; X64-SSE42-LABEL: test_reduce_v32i16:
   1587 ; X64-SSE42:       ## %bb.0:
   1588 ; X64-SSE42-NEXT:    pminsw %xmm3, %xmm1
   1589 ; X64-SSE42-NEXT:    pminsw %xmm2, %xmm0
   1590 ; X64-SSE42-NEXT:    pminsw %xmm1, %xmm0
   1591 ; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
   1592 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
   1593 ; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
   1594 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
   1595 ; X64-SSE42-NEXT:    movd %xmm0, %eax
   1596 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
   1597 ; X64-SSE42-NEXT:    retq
   1598 ;
   1599 ; X64-AVX1-LABEL: test_reduce_v32i16:
   1600 ; X64-AVX1:       ## %bb.0:
   1601 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1602 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1603 ; X64-AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
   1604 ; X64-AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
   1605 ; X64-AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
   1606 ; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
   1607 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1608 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1609 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1610 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
   1611 ; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
   1612 ; X64-AVX1-NEXT:    vzeroupper
   1613 ; X64-AVX1-NEXT:    retq
   1614 ;
   1615 ; X64-AVX2-LABEL: test_reduce_v32i16:
   1616 ; X64-AVX2:       ## %bb.0:
   1617 ; X64-AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
   1618 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1619 ; X64-AVX2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
   1620 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
   1621 ; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1622 ; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1623 ; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1624 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
   1625 ; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
   1626 ; X64-AVX2-NEXT:    vzeroupper
   1627 ; X64-AVX2-NEXT:    retq
   1628 ;
   1629 ; X64-AVX512-LABEL: test_reduce_v32i16:
   1630 ; X64-AVX512:       ## %bb.0:
   1631 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
   1632 ; X64-AVX512-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
   1633 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1634 ; X64-AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
   1635 ; X64-AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
   1636 ; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1637 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
   1638 ; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1639 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
   1640 ; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
   1641 ; X64-AVX512-NEXT:    vzeroupper
   1642 ; X64-AVX512-NEXT:    retq
   1643   %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1644   %2  = icmp slt <32 x i16> %a0, %1
   1645   %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
   1646   %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1647   %5  = icmp slt <32 x i16> %3, %4
   1648   %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
   1649   %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1650   %8  = icmp slt <32 x i16> %6, %7
   1651   %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
   1652   %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1653   %11 = icmp slt <32 x i16> %9, %10
   1654   %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
   1655   %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1656   %14 = icmp slt <32 x i16> %12, %13
   1657   %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
   1658   %16 = extractelement <32 x i16> %15, i32 0
   1659   ret i16 %16
   1660 }
   1661 
   1662 define i8 @test_reduce_v64i8(<64 x i8> %a0) {
   1663 ; X86-SSE2-LABEL: test_reduce_v64i8:
   1664 ; X86-SSE2:       ## %bb.0:
   1665 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
   1666 ; X86-SSE2-NEXT:    pcmpgtb %xmm0, %xmm4
   1667 ; X86-SSE2-NEXT:    pand %xmm4, %xmm0
   1668 ; X86-SSE2-NEXT:    pandn %xmm2, %xmm4
   1669 ; X86-SSE2-NEXT:    por %xmm0, %xmm4
   1670 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm0
   1671 ; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
   1672 ; X86-SSE2-NEXT:    pand %xmm0, %xmm1
   1673 ; X86-SSE2-NEXT:    pandn %xmm3, %xmm0
   1674 ; X86-SSE2-NEXT:    por %xmm1, %xmm0
   1675 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1676 ; X86-SSE2-NEXT:    pcmpgtb %xmm4, %xmm1
   1677 ; X86-SSE2-NEXT:    pand %xmm1, %xmm4
   1678 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
   1679 ; X86-SSE2-NEXT:    por %xmm4, %xmm1
   1680 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
   1681 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
   1682 ; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
   1683 ; X86-SSE2-NEXT:    pand %xmm2, %xmm1
   1684 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
   1685 ; X86-SSE2-NEXT:    por %xmm1, %xmm2
   1686 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
   1687 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1688 ; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
   1689 ; X86-SSE2-NEXT:    pand %xmm1, %xmm2
   1690 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
   1691 ; X86-SSE2-NEXT:    por %xmm2, %xmm1
   1692 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
   1693 ; X86-SSE2-NEXT:    psrld $16, %xmm0
   1694 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
   1695 ; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
   1696 ; X86-SSE2-NEXT:    pand %xmm2, %xmm1
   1697 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
   1698 ; X86-SSE2-NEXT:    por %xmm1, %xmm2
   1699 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm0
   1700 ; X86-SSE2-NEXT:    psrlw $8, %xmm0
   1701 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1702 ; X86-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
   1703 ; X86-SSE2-NEXT:    pand %xmm1, %xmm2
   1704 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm1
   1705 ; X86-SSE2-NEXT:    por %xmm2, %xmm1
   1706 ; X86-SSE2-NEXT:    movd %xmm1, %eax
   1707 ; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
   1708 ; X86-SSE2-NEXT:    retl
   1709 ;
   1710 ; X86-SSE42-LABEL: test_reduce_v64i8:
   1711 ; X86-SSE42:       ## %bb.0:
   1712 ; X86-SSE42-NEXT:    pminsb %xmm3, %xmm1
   1713 ; X86-SSE42-NEXT:    pminsb %xmm2, %xmm0
   1714 ; X86-SSE42-NEXT:    pminsb %xmm1, %xmm0
   1715 ; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1716 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
   1717 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm2
   1718 ; X86-SSE42-NEXT:    psrlw $8, %xmm2
   1719 ; X86-SSE42-NEXT:    pminub %xmm0, %xmm2
   1720 ; X86-SSE42-NEXT:    phminposuw %xmm2, %xmm0
   1721 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm0
   1722 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
   1723 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
   1724 ; X86-SSE42-NEXT:    retl
   1725 ;
   1726 ; X86-AVX1-LABEL: test_reduce_v64i8:
   1727 ; X86-AVX1:       ## %bb.0:
   1728 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1729 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1730 ; X86-AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
   1731 ; X86-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
   1732 ; X86-AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
   1733 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1734 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1735 ; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
   1736 ; X86-AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1737 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1738 ; X86-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1739 ; X86-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
   1740 ; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
   1741 ; X86-AVX1-NEXT:    vzeroupper
   1742 ; X86-AVX1-NEXT:    retl
   1743 ;
   1744 ; X86-AVX2-LABEL: test_reduce_v64i8:
   1745 ; X86-AVX2:       ## %bb.0:
   1746 ; X86-AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
   1747 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1748 ; X86-AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
   1749 ; X86-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1750 ; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1751 ; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm2
   1752 ; X86-AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1753 ; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1754 ; X86-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1755 ; X86-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
   1756 ; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
   1757 ; X86-AVX2-NEXT:    vzeroupper
   1758 ; X86-AVX2-NEXT:    retl
   1759 ;
   1760 ; X64-SSE2-LABEL: test_reduce_v64i8:
   1761 ; X64-SSE2:       ## %bb.0:
   1762 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm4
   1763 ; X64-SSE2-NEXT:    pcmpgtb %xmm0, %xmm4
   1764 ; X64-SSE2-NEXT:    pand %xmm4, %xmm0
   1765 ; X64-SSE2-NEXT:    pandn %xmm2, %xmm4
   1766 ; X64-SSE2-NEXT:    por %xmm0, %xmm4
   1767 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
   1768 ; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
   1769 ; X64-SSE2-NEXT:    pand %xmm0, %xmm1
   1770 ; X64-SSE2-NEXT:    pandn %xmm3, %xmm0
   1771 ; X64-SSE2-NEXT:    por %xmm1, %xmm0
   1772 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1773 ; X64-SSE2-NEXT:    pcmpgtb %xmm4, %xmm1
   1774 ; X64-SSE2-NEXT:    pand %xmm1, %xmm4
   1775 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
   1776 ; X64-SSE2-NEXT:    por %xmm4, %xmm1
   1777 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
   1778 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
   1779 ; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
   1780 ; X64-SSE2-NEXT:    pand %xmm2, %xmm1
   1781 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
   1782 ; X64-SSE2-NEXT:    por %xmm1, %xmm2
   1783 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
   1784 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1785 ; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
   1786 ; X64-SSE2-NEXT:    pand %xmm1, %xmm2
   1787 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
   1788 ; X64-SSE2-NEXT:    por %xmm2, %xmm1
   1789 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
   1790 ; X64-SSE2-NEXT:    psrld $16, %xmm0
   1791 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm2
   1792 ; X64-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
   1793 ; X64-SSE2-NEXT:    pand %xmm2, %xmm1
   1794 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
   1795 ; X64-SSE2-NEXT:    por %xmm1, %xmm2
   1796 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm0
   1797 ; X64-SSE2-NEXT:    psrlw $8, %xmm0
   1798 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1799 ; X64-SSE2-NEXT:    pcmpgtb %xmm2, %xmm1
   1800 ; X64-SSE2-NEXT:    pand %xmm1, %xmm2
   1801 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm1
   1802 ; X64-SSE2-NEXT:    por %xmm2, %xmm1
   1803 ; X64-SSE2-NEXT:    movd %xmm1, %eax
   1804 ; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
   1805 ; X64-SSE2-NEXT:    retq
   1806 ;
   1807 ; X64-SSE42-LABEL: test_reduce_v64i8:
   1808 ; X64-SSE42:       ## %bb.0:
   1809 ; X64-SSE42-NEXT:    pminsb %xmm3, %xmm1
   1810 ; X64-SSE42-NEXT:    pminsb %xmm2, %xmm0
   1811 ; X64-SSE42-NEXT:    pminsb %xmm1, %xmm0
   1812 ; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1813 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
   1814 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm2
   1815 ; X64-SSE42-NEXT:    psrlw $8, %xmm2
   1816 ; X64-SSE42-NEXT:    pminub %xmm0, %xmm2
   1817 ; X64-SSE42-NEXT:    phminposuw %xmm2, %xmm0
   1818 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm0
   1819 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
   1820 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
   1821 ; X64-SSE42-NEXT:    retq
   1822 ;
   1823 ; X64-AVX1-LABEL: test_reduce_v64i8:
   1824 ; X64-AVX1:       ## %bb.0:
   1825 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1826 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1827 ; X64-AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
   1828 ; X64-AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
   1829 ; X64-AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
   1830 ; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1831 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1832 ; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
   1833 ; X64-AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1834 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1835 ; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1836 ; X64-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
   1837 ; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
   1838 ; X64-AVX1-NEXT:    vzeroupper
   1839 ; X64-AVX1-NEXT:    retq
   1840 ;
   1841 ; X64-AVX2-LABEL: test_reduce_v64i8:
   1842 ; X64-AVX2:       ## %bb.0:
   1843 ; X64-AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
   1844 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1845 ; X64-AVX2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
   1846 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1847 ; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1848 ; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm2
   1849 ; X64-AVX2-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1850 ; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1851 ; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1852 ; X64-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
   1853 ; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
   1854 ; X64-AVX2-NEXT:    vzeroupper
   1855 ; X64-AVX2-NEXT:    retq
   1856 ;
   1857 ; X64-AVX512-LABEL: test_reduce_v64i8:
   1858 ; X64-AVX512:       ## %bb.0:
   1859 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
   1860 ; X64-AVX512-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
   1861 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1862 ; X64-AVX512-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
   1863 ; X64-AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   1864 ; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1865 ; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm2
   1866 ; X64-AVX512-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1867 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
   1868 ; X64-AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
   1869 ; X64-AVX512-NEXT:    vpextrb $0, %xmm0, %eax
   1870 ; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
   1871 ; X64-AVX512-NEXT:    vzeroupper
   1872 ; X64-AVX512-NEXT:    retq
   1873   %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1874   %2  = icmp slt <64 x i8> %a0, %1
   1875   %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
   1876   %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1877   %5  = icmp slt <64 x i8> %3, %4
   1878   %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
   1879   %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1880   %8  = icmp slt <64 x i8> %6, %7
   1881   %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
   1882   %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1883   %11 = icmp slt <64 x i8> %9, %10
   1884   %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
   1885   %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1886   %14 = icmp slt <64 x i8> %12, %13
   1887   %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
   1888   %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1889   %17 = icmp slt <64 x i8> %15, %16
   1890   %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
   1891   %19 = extractelement <64 x i8> %18, i32 0
   1892   ret i8 %19
   1893 }
   1894