Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2     | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2
      3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2   | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE42
      4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx      | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1
      5 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2     | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2
      6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2   | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2
      7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE42
      8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx    | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
      9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2   | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
     10 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512
     11 
     12 ;
     13 ; 128-bit Vectors
     14 ;
     15 
     16 define i64 @test_reduce_v2i64(<2 x i64> %a0) {
     17 ; X86-SSE2-LABEL: test_reduce_v2i64:
     18 ; X86-SSE2:       ## %bb.0:
     19 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     20 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
     21 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
     22 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
     23 ; X86-SSE2-NEXT:    pxor %xmm1, %xmm2
     24 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
     25 ; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
     26 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
     27 ; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
     28 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
     29 ; X86-SSE2-NEXT:    pand %xmm5, %xmm2
     30 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
     31 ; X86-SSE2-NEXT:    por %xmm2, %xmm3
     32 ; X86-SSE2-NEXT:    pand %xmm3, %xmm0
     33 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm3
     34 ; X86-SSE2-NEXT:    por %xmm0, %xmm3
     35 ; X86-SSE2-NEXT:    movd %xmm3, %eax
     36 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
     37 ; X86-SSE2-NEXT:    movd %xmm0, %edx
     38 ; X86-SSE2-NEXT:    retl
     39 ;
     40 ; X86-SSE42-LABEL: test_reduce_v2i64:
     41 ; X86-SSE42:       ## %bb.0:
     42 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
     43 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
     44 ; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648]
     45 ; X86-SSE42-NEXT:    movdqa %xmm1, %xmm3
     46 ; X86-SSE42-NEXT:    pxor %xmm0, %xmm3
     47 ; X86-SSE42-NEXT:    pxor %xmm2, %xmm0
     48 ; X86-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
     49 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
     50 ; X86-SSE42-NEXT:    movd %xmm2, %eax
     51 ; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
     52 ; X86-SSE42-NEXT:    retl
     53 ;
     54 ; X86-AVX-LABEL: test_reduce_v2i64:
     55 ; X86-AVX:       ## %bb.0:
     56 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     57 ; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
     58 ; X86-AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm3
     59 ; X86-AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm2
     60 ; X86-AVX-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
     61 ; X86-AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
     62 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
     63 ; X86-AVX-NEXT:    vpextrd $1, %xmm0, %edx
     64 ; X86-AVX-NEXT:    retl
     65 ;
     66 ; X64-SSE2-LABEL: test_reduce_v2i64:
     67 ; X64-SSE2:       ## %bb.0:
     68 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     69 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
     70 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
     71 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
     72 ; X64-SSE2-NEXT:    pxor %xmm1, %xmm2
     73 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm4
     74 ; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
     75 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
     76 ; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
     77 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
     78 ; X64-SSE2-NEXT:    pand %xmm5, %xmm2
     79 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
     80 ; X64-SSE2-NEXT:    por %xmm2, %xmm3
     81 ; X64-SSE2-NEXT:    pand %xmm3, %xmm0
     82 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm3
     83 ; X64-SSE2-NEXT:    por %xmm0, %xmm3
     84 ; X64-SSE2-NEXT:    movq %xmm3, %rax
     85 ; X64-SSE2-NEXT:    retq
     86 ;
     87 ; X64-SSE42-LABEL: test_reduce_v2i64:
     88 ; X64-SSE42:       ## %bb.0:
     89 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
     90 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
     91 ; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
     92 ; X64-SSE42-NEXT:    movdqa %xmm1, %xmm3
     93 ; X64-SSE42-NEXT:    pxor %xmm0, %xmm3
     94 ; X64-SSE42-NEXT:    pxor %xmm2, %xmm0
     95 ; X64-SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
     96 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
     97 ; X64-SSE42-NEXT:    movq %xmm2, %rax
     98 ; X64-SSE42-NEXT:    retq
     99 ;
    100 ; X64-AVX1-LABEL: test_reduce_v2i64:
    101 ; X64-AVX1:       ## %bb.0:
    102 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    103 ; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
    104 ; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
    105 ; X64-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
    106 ; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
    107 ; X64-AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
    108 ; X64-AVX1-NEXT:    vmovq %xmm0, %rax
    109 ; X64-AVX1-NEXT:    retq
    110 ;
    111 ; X64-AVX2-LABEL: test_reduce_v2i64:
    112 ; X64-AVX2:       ## %bb.0:
    113 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    114 ; X64-AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
    115 ; X64-AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
    116 ; X64-AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
    117 ; X64-AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
    118 ; X64-AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
    119 ; X64-AVX2-NEXT:    vmovq %xmm0, %rax
    120 ; X64-AVX2-NEXT:    retq
    121 ;
    122 ; X64-AVX512-LABEL: test_reduce_v2i64:
    123 ; X64-AVX512:       ## %bb.0:
    124 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    125 ; X64-AVX512-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
    126 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
    127 ; X64-AVX512-NEXT:    retq
    128   %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
    129   %2 = icmp ult <2 x i64> %a0, %1
    130   %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
    131   %4 = extractelement <2 x i64> %3, i32 0
    132   ret i64 %4
    133 }
    134 
    135 define i32 @test_reduce_v4i32(<4 x i32> %a0) {
    136 ; X86-SSE2-LABEL: test_reduce_v4i32:
    137 ; X86-SSE2:       ## %bb.0:
    138 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    139 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
    140 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
    141 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
    142 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
    143 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
    144 ; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
    145 ; X86-SSE2-NEXT:    pand %xmm4, %xmm0
    146 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
    147 ; X86-SSE2-NEXT:    por %xmm0, %xmm4
    148 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,2,3]
    149 ; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
    150 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    151 ; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
    152 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
    153 ; X86-SSE2-NEXT:    pand %xmm2, %xmm4
    154 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
    155 ; X86-SSE2-NEXT:    por %xmm4, %xmm2
    156 ; X86-SSE2-NEXT:    movd %xmm2, %eax
    157 ; X86-SSE2-NEXT:    retl
    158 ;
    159 ; X86-SSE42-LABEL: test_reduce_v4i32:
    160 ; X86-SSE42:       ## %bb.0:
    161 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    162 ; X86-SSE42-NEXT:    pminud %xmm0, %xmm1
    163 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    164 ; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
    165 ; X86-SSE42-NEXT:    movd %xmm0, %eax
    166 ; X86-SSE42-NEXT:    retl
    167 ;
    168 ; X86-AVX-LABEL: test_reduce_v4i32:
    169 ; X86-AVX:       ## %bb.0:
    170 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    171 ; X86-AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    172 ; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    173 ; X86-AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    174 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
    175 ; X86-AVX-NEXT:    retl
    176 ;
    177 ; X64-SSE2-LABEL: test_reduce_v4i32:
    178 ; X64-SSE2:       ## %bb.0:
    179 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    180 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
    181 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
    182 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
    183 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
    184 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
    185 ; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
    186 ; X64-SSE2-NEXT:    pand %xmm4, %xmm0
    187 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
    188 ; X64-SSE2-NEXT:    por %xmm0, %xmm4
    189 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,2,3]
    190 ; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
    191 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    192 ; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
    193 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
    194 ; X64-SSE2-NEXT:    pand %xmm2, %xmm4
    195 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
    196 ; X64-SSE2-NEXT:    por %xmm4, %xmm2
    197 ; X64-SSE2-NEXT:    movd %xmm2, %eax
    198 ; X64-SSE2-NEXT:    retq
    199 ;
    200 ; X64-SSE42-LABEL: test_reduce_v4i32:
    201 ; X64-SSE42:       ## %bb.0:
    202 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    203 ; X64-SSE42-NEXT:    pminud %xmm0, %xmm1
    204 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    205 ; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
    206 ; X64-SSE42-NEXT:    movd %xmm0, %eax
    207 ; X64-SSE42-NEXT:    retq
    208 ;
    209 ; X64-AVX-LABEL: test_reduce_v4i32:
    210 ; X64-AVX:       ## %bb.0:
    211 ; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    212 ; X64-AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    213 ; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    214 ; X64-AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    215 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
    216 ; X64-AVX-NEXT:    retq
    217   %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
    218   %2 = icmp ult <4 x i32> %a0, %1
    219   %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
    220   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    221   %5 = icmp ult <4 x i32> %3, %4
    222   %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
    223   %7 = extractelement <4 x i32> %6, i32 0
    224   ret i32 %7
    225 }
    226 
    227 define i16 @test_reduce_v8i16(<8 x i16> %a0) {
    228 ; X86-SSE2-LABEL: test_reduce_v8i16:
    229 ; X86-SSE2:       ## %bb.0:
    230 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    231 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
    232 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    233 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    234 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
    235 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    236 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    237 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    238 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    239 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
    240 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    241 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    242 ; X86-SSE2-NEXT:    psrld $16, %xmm1
    243 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    244 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    245 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
    246 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    247 ; X86-SSE2-NEXT:    movd %xmm1, %eax
    248 ; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
    249 ; X86-SSE2-NEXT:    retl
    250 ;
    251 ; X86-SSE42-LABEL: test_reduce_v8i16:
    252 ; X86-SSE42:       ## %bb.0:
    253 ; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
    254 ; X86-SSE42-NEXT:    movd %xmm0, %eax
    255 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
    256 ; X86-SSE42-NEXT:    retl
    257 ;
    258 ; X86-AVX-LABEL: test_reduce_v8i16:
    259 ; X86-AVX:       ## %bb.0:
    260 ; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
    261 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
    262 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
    263 ; X86-AVX-NEXT:    retl
    264 ;
    265 ; X64-SSE2-LABEL: test_reduce_v8i16:
    266 ; X64-SSE2:       ## %bb.0:
    267 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    268 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
    269 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    270 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    271 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
    272 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    273 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    274 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    275 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    276 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
    277 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    278 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    279 ; X64-SSE2-NEXT:    psrld $16, %xmm1
    280 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    281 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    282 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
    283 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    284 ; X64-SSE2-NEXT:    movd %xmm1, %eax
    285 ; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
    286 ; X64-SSE2-NEXT:    retq
    287 ;
    288 ; X64-SSE42-LABEL: test_reduce_v8i16:
    289 ; X64-SSE42:       ## %bb.0:
    290 ; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
    291 ; X64-SSE42-NEXT:    movd %xmm0, %eax
    292 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
    293 ; X64-SSE42-NEXT:    retq
    294 ;
    295 ; X64-AVX-LABEL: test_reduce_v8i16:
    296 ; X64-AVX:       ## %bb.0:
    297 ; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
    298 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
    299 ; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
    300 ; X64-AVX-NEXT:    retq
    301   %1  = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    302   %2  = icmp ult <8 x i16> %a0, %1
    303   %3  = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
    304   %4  = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    305   %5  = icmp ult <8 x i16> %3, %4
    306   %6  = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
    307   %7  = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    308   %8  = icmp ult <8 x i16> %6, %7
    309   %9  = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
    310   %10 = extractelement <8 x i16> %9, i32 0
    311   ret i16 %10
    312 }
    313 
    314 define i8 @test_reduce_v16i8(<16 x i8> %a0) {
    315 ; X86-SSE2-LABEL: test_reduce_v16i8:
    316 ; X86-SSE2:       ## %bb.0:
    317 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    318 ; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
    319 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    320 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
    321 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    322 ; X86-SSE2-NEXT:    psrld $16, %xmm1
    323 ; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
    324 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
    325 ; X86-SSE2-NEXT:    psrlw $8, %xmm0
    326 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
    327 ; X86-SSE2-NEXT:    movd %xmm0, %eax
    328 ; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
    329 ; X86-SSE2-NEXT:    retl
    330 ;
    331 ; X86-SSE42-LABEL: test_reduce_v16i8:
    332 ; X86-SSE42:       ## %bb.0:
    333 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
    334 ; X86-SSE42-NEXT:    psrlw $8, %xmm1
    335 ; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
    336 ; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
    337 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
    338 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
    339 ; X86-SSE42-NEXT:    retl
    340 ;
    341 ; X86-AVX-LABEL: test_reduce_v16i8:
    342 ; X86-AVX:       ## %bb.0:
    343 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
    344 ; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    345 ; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
    346 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
    347 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
    348 ; X86-AVX-NEXT:    retl
    349 ;
    350 ; X64-SSE2-LABEL: test_reduce_v16i8:
    351 ; X64-SSE2:       ## %bb.0:
    352 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    353 ; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
    354 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    355 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
    356 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    357 ; X64-SSE2-NEXT:    psrld $16, %xmm1
    358 ; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
    359 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
    360 ; X64-SSE2-NEXT:    psrlw $8, %xmm0
    361 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
    362 ; X64-SSE2-NEXT:    movd %xmm0, %eax
    363 ; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
    364 ; X64-SSE2-NEXT:    retq
    365 ;
    366 ; X64-SSE42-LABEL: test_reduce_v16i8:
    367 ; X64-SSE42:       ## %bb.0:
    368 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
    369 ; X64-SSE42-NEXT:    psrlw $8, %xmm1
    370 ; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
    371 ; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
    372 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
    373 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
    374 ; X64-SSE42-NEXT:    retq
    375 ;
    376 ; X64-AVX-LABEL: test_reduce_v16i8:
    377 ; X64-AVX:       ## %bb.0:
    378 ; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
    379 ; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    380 ; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
    381 ; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
    382 ; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
    383 ; X64-AVX-NEXT:    retq
    384   %1  = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    385   %2  = icmp ult <16 x i8> %a0, %1
    386   %3  = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
    387   %4  = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    388   %5  = icmp ult <16 x i8> %3, %4
    389   %6  = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
    390   %7  = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    391   %8  = icmp ult <16 x i8> %6, %7
    392   %9  = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
    393   %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    394   %11 = icmp ult <16 x i8> %9, %10
    395   %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
    396   %13 = extractelement <16 x i8> %12, i32 0
    397   ret i8 %13
    398 }
    399 
    400 ;
    401 ; 256-bit Vectors
    402 ;
    403 
    404 define i64 @test_reduce_v4i64(<4 x i64> %a0) {
    405 ; X86-SSE2-LABEL: test_reduce_v4i64:
    406 ; X86-SSE2:       ## %bb.0:
    407 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
    408 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
    409 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
    410 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
    411 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
    412 ; X86-SSE2-NEXT:    movdqa %xmm4, %xmm5
    413 ; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
    414 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
    415 ; X86-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
    416 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
    417 ; X86-SSE2-NEXT:    pand %xmm6, %xmm3
    418 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
    419 ; X86-SSE2-NEXT:    por %xmm3, %xmm4
    420 ; X86-SSE2-NEXT:    pand %xmm4, %xmm0
    421 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
    422 ; X86-SSE2-NEXT:    por %xmm0, %xmm4
    423 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
    424 ; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
    425 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    426 ; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
    427 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
    428 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
    429 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
    430 ; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
    431 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
    432 ; X86-SSE2-NEXT:    pand %xmm5, %xmm1
    433 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
    434 ; X86-SSE2-NEXT:    por %xmm1, %xmm2
    435 ; X86-SSE2-NEXT:    pand %xmm2, %xmm4
    436 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
    437 ; X86-SSE2-NEXT:    por %xmm4, %xmm2
    438 ; X86-SSE2-NEXT:    movd %xmm2, %eax
    439 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
    440 ; X86-SSE2-NEXT:    movd %xmm0, %edx
    441 ; X86-SSE2-NEXT:    retl
    442 ;
    443 ; X86-SSE42-LABEL: test_reduce_v4i64:
    444 ; X86-SSE42:       ## %bb.0:
    445 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm2
    446 ; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
    447 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm4
    448 ; X86-SSE42-NEXT:    pxor %xmm3, %xmm4
    449 ; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
    450 ; X86-SSE42-NEXT:    pxor %xmm3, %xmm0
    451 ; X86-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
    452 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
    453 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
    454 ; X86-SSE42-NEXT:    movdqa %xmm1, %xmm0
    455 ; X86-SSE42-NEXT:    pxor %xmm3, %xmm0
    456 ; X86-SSE42-NEXT:    pxor %xmm2, %xmm3
    457 ; X86-SSE42-NEXT:    pcmpgtq %xmm0, %xmm3
    458 ; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
    459 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
    460 ; X86-SSE42-NEXT:    movd %xmm2, %eax
    461 ; X86-SSE42-NEXT:    pextrd $1, %xmm2, %edx
    462 ; X86-SSE42-NEXT:    retl
    463 ;
    464 ; X86-AVX1-LABEL: test_reduce_v4i64:
    465 ; X86-AVX1:       ## %bb.0:
    466 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    467 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
    468 ; X86-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
    469 ; X86-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm4
    470 ; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
    471 ; X86-AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm4
    472 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
    473 ; X86-AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
    474 ; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    475 ; X86-AVX1-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
    476 ; X86-AVX1-NEXT:    vxorpd %xmm2, %xmm1, %xmm4
    477 ; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
    478 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
    479 ; X86-AVX1-NEXT:    vpxor %xmm2, %xmm4, %xmm2
    480 ; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm2
    481 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
    482 ; X86-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    483 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
    484 ; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
    485 ; X86-AVX1-NEXT:    vzeroupper
    486 ; X86-AVX1-NEXT:    retl
    487 ;
    488 ; X86-AVX2-LABEL: test_reduce_v4i64:
    489 ; X86-AVX2:       ## %bb.0:
    490 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    491 ; X86-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
    492 ; X86-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
    493 ; X86-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm4
    494 ; X86-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
    495 ; X86-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
    496 ; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    497 ; X86-AVX2-NEXT:    vxorpd %ymm2, %ymm0, %ymm3
    498 ; X86-AVX2-NEXT:    vxorpd %ymm2, %ymm1, %ymm2
    499 ; X86-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
    500 ; X86-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    501 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
    502 ; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
    503 ; X86-AVX2-NEXT:    vzeroupper
    504 ; X86-AVX2-NEXT:    retl
    505 ;
    506 ; X64-SSE2-LABEL: test_reduce_v4i64:
    507 ; X64-SSE2:       ## %bb.0:
    508 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
    509 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
    510 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
    511 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
    512 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
    513 ; X64-SSE2-NEXT:    movdqa %xmm4, %xmm5
    514 ; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm5
    515 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
    516 ; X64-SSE2-NEXT:    pcmpeqd %xmm3, %xmm4
    517 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
    518 ; X64-SSE2-NEXT:    pand %xmm6, %xmm3
    519 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
    520 ; X64-SSE2-NEXT:    por %xmm3, %xmm4
    521 ; X64-SSE2-NEXT:    pand %xmm4, %xmm0
    522 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
    523 ; X64-SSE2-NEXT:    por %xmm0, %xmm4
    524 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
    525 ; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
    526 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    527 ; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
    528 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
    529 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
    530 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
    531 ; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm2
    532 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
    533 ; X64-SSE2-NEXT:    pand %xmm5, %xmm1
    534 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
    535 ; X64-SSE2-NEXT:    por %xmm1, %xmm2
    536 ; X64-SSE2-NEXT:    pand %xmm2, %xmm4
    537 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
    538 ; X64-SSE2-NEXT:    por %xmm4, %xmm2
    539 ; X64-SSE2-NEXT:    movq %xmm2, %rax
    540 ; X64-SSE2-NEXT:    retq
    541 ;
    542 ; X64-SSE42-LABEL: test_reduce_v4i64:
    543 ; X64-SSE42:       ## %bb.0:
    544 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm2
    545 ; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
    546 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm4
    547 ; X64-SSE42-NEXT:    pxor %xmm3, %xmm4
    548 ; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
    549 ; X64-SSE42-NEXT:    pxor %xmm3, %xmm0
    550 ; X64-SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
    551 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
    552 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
    553 ; X64-SSE42-NEXT:    movdqa %xmm1, %xmm0
    554 ; X64-SSE42-NEXT:    pxor %xmm3, %xmm0
    555 ; X64-SSE42-NEXT:    pxor %xmm2, %xmm3
    556 ; X64-SSE42-NEXT:    pcmpgtq %xmm0, %xmm3
    557 ; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
    558 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm2
    559 ; X64-SSE42-NEXT:    movq %xmm2, %rax
    560 ; X64-SSE42-NEXT:    retq
    561 ;
    562 ; X64-AVX1-LABEL: test_reduce_v4i64:
    563 ; X64-AVX1:       ## %bb.0:
    564 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    565 ; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
    566 ; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
    567 ; X64-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm4
    568 ; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
    569 ; X64-AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm4
    570 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
    571 ; X64-AVX1-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
    572 ; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    573 ; X64-AVX1-NEXT:    vxorpd %xmm2, %xmm0, %xmm3
    574 ; X64-AVX1-NEXT:    vxorpd %xmm2, %xmm1, %xmm4
    575 ; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm4, %xmm3
    576 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
    577 ; X64-AVX1-NEXT:    vpxor %xmm2, %xmm4, %xmm2
    578 ; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm2
    579 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
    580 ; X64-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    581 ; X64-AVX1-NEXT:    vmovq %xmm0, %rax
    582 ; X64-AVX1-NEXT:    vzeroupper
    583 ; X64-AVX1-NEXT:    retq
    584 ;
    585 ; X64-AVX2-LABEL: test_reduce_v4i64:
    586 ; X64-AVX2:       ## %bb.0:
    587 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    588 ; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
    589 ; X64-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
    590 ; X64-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm4
    591 ; X64-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
    592 ; X64-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
    593 ; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    594 ; X64-AVX2-NEXT:    vxorpd %ymm2, %ymm0, %ymm3
    595 ; X64-AVX2-NEXT:    vxorpd %ymm2, %ymm1, %ymm2
    596 ; X64-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
    597 ; X64-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
    598 ; X64-AVX2-NEXT:    vmovq %xmm0, %rax
    599 ; X64-AVX2-NEXT:    vzeroupper
    600 ; X64-AVX2-NEXT:    retq
    601 ;
    602 ; X64-AVX512-LABEL: test_reduce_v4i64:
    603 ; X64-AVX512:       ## %bb.0:
    604 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    605 ; X64-AVX512-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
    606 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    607 ; X64-AVX512-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
    608 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
    609 ; X64-AVX512-NEXT:    vzeroupper
    610 ; X64-AVX512-NEXT:    retq
    611   %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
    612   %2 = icmp ult <4 x i64> %a0, %1
    613   %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
    614   %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    615   %5 = icmp ult <4 x i64> %3, %4
    616   %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
    617   %7 = extractelement <4 x i64> %6, i32 0
    618   ret i64 %7
    619 }
    620 
    621 define i32 @test_reduce_v8i32(<8 x i32> %a0) {
    622 ; X86-SSE2-LABEL: test_reduce_v8i32:
    623 ; X86-SSE2:       ## %bb.0:
    624 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
    625 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
    626 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
    627 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm4
    628 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm4
    629 ; X86-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
    630 ; X86-SSE2-NEXT:    pand %xmm4, %xmm0
    631 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm4
    632 ; X86-SSE2-NEXT:    por %xmm0, %xmm4
    633 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
    634 ; X86-SSE2-NEXT:    movdqa %xmm4, %xmm1
    635 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    636 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
    637 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm3
    638 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
    639 ; X86-SSE2-NEXT:    pand %xmm3, %xmm4
    640 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm3
    641 ; X86-SSE2-NEXT:    por %xmm4, %xmm3
    642 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
    643 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm1
    644 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    645 ; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
    646 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
    647 ; X86-SSE2-NEXT:    pand %xmm2, %xmm3
    648 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm2
    649 ; X86-SSE2-NEXT:    por %xmm3, %xmm2
    650 ; X86-SSE2-NEXT:    movd %xmm2, %eax
    651 ; X86-SSE2-NEXT:    retl
    652 ;
    653 ; X86-SSE42-LABEL: test_reduce_v8i32:
    654 ; X86-SSE42:       ## %bb.0:
    655 ; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
    656 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    657 ; X86-SSE42-NEXT:    pminud %xmm0, %xmm1
    658 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    659 ; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
    660 ; X86-SSE42-NEXT:    movd %xmm0, %eax
    661 ; X86-SSE42-NEXT:    retl
    662 ;
    663 ; X86-AVX1-LABEL: test_reduce_v8i32:
    664 ; X86-AVX1:       ## %bb.0:
    665 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    666 ; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    667 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    668 ; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    669 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    670 ; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    671 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
    672 ; X86-AVX1-NEXT:    vzeroupper
    673 ; X86-AVX1-NEXT:    retl
    674 ;
    675 ; X86-AVX2-LABEL: test_reduce_v8i32:
    676 ; X86-AVX2:       ## %bb.0:
    677 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    678 ; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    679 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    680 ; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    681 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    682 ; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    683 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
    684 ; X86-AVX2-NEXT:    vzeroupper
    685 ; X86-AVX2-NEXT:    retl
    686 ;
    687 ; X64-SSE2-LABEL: test_reduce_v8i32:
    688 ; X64-SSE2:       ## %bb.0:
    689 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
    690 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
    691 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
    692 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm4
    693 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm4
    694 ; X64-SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
    695 ; X64-SSE2-NEXT:    pand %xmm4, %xmm0
    696 ; X64-SSE2-NEXT:    pandn %xmm1, %xmm4
    697 ; X64-SSE2-NEXT:    por %xmm0, %xmm4
    698 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
    699 ; X64-SSE2-NEXT:    movdqa %xmm4, %xmm1
    700 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    701 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
    702 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm3
    703 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
    704 ; X64-SSE2-NEXT:    pand %xmm3, %xmm4
    705 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
    706 ; X64-SSE2-NEXT:    por %xmm4, %xmm3
    707 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
    708 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm1
    709 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    710 ; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
    711 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
    712 ; X64-SSE2-NEXT:    pand %xmm2, %xmm3
    713 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm2
    714 ; X64-SSE2-NEXT:    por %xmm3, %xmm2
    715 ; X64-SSE2-NEXT:    movd %xmm2, %eax
    716 ; X64-SSE2-NEXT:    retq
    717 ;
    718 ; X64-SSE42-LABEL: test_reduce_v8i32:
    719 ; X64-SSE42:       ## %bb.0:
    720 ; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
    721 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    722 ; X64-SSE42-NEXT:    pminud %xmm0, %xmm1
    723 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    724 ; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
    725 ; X64-SSE42-NEXT:    movd %xmm0, %eax
    726 ; X64-SSE42-NEXT:    retq
    727 ;
    728 ; X64-AVX1-LABEL: test_reduce_v8i32:
    729 ; X64-AVX1:       ## %bb.0:
    730 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    731 ; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    732 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    733 ; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    734 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    735 ; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
    736 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
    737 ; X64-AVX1-NEXT:    vzeroupper
    738 ; X64-AVX1-NEXT:    retq
    739 ;
    740 ; X64-AVX2-LABEL: test_reduce_v8i32:
    741 ; X64-AVX2:       ## %bb.0:
    742 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    743 ; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    744 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    745 ; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    746 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    747 ; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    748 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
    749 ; X64-AVX2-NEXT:    vzeroupper
    750 ; X64-AVX2-NEXT:    retq
    751 ;
    752 ; X64-AVX512-LABEL: test_reduce_v8i32:
    753 ; X64-AVX512:       ## %bb.0:
    754 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    755 ; X64-AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    756 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    757 ; X64-AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    758 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    759 ; X64-AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    760 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
    761 ; X64-AVX512-NEXT:    vzeroupper
    762 ; X64-AVX512-NEXT:    retq
    763   %1  = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
    764   %2  = icmp ult <8 x i32> %a0, %1
    765   %3  = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
    766   %4  = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    767   %5  = icmp ult <8 x i32> %3, %4
    768   %6  = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
    769   %7  = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    770   %8  = icmp ult <8 x i32> %6, %7
    771   %9  = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
    772   %10 = extractelement <8 x i32> %9, i32 0
    773   ret i32 %10
    774 }
    775 
    776 define i16 @test_reduce_v16i16(<16 x i16> %a0) {
    777 ; X86-SSE2-LABEL: test_reduce_v16i16:
    778 ; X86-SSE2:       ## %bb.0:
    779 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
    780 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    781 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    782 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
    783 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    784 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    785 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    786 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    787 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
    788 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    789 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    790 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    791 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    792 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
    793 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    794 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    795 ; X86-SSE2-NEXT:    psrld $16, %xmm1
    796 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm0
    797 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    798 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
    799 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
    800 ; X86-SSE2-NEXT:    movd %xmm1, %eax
    801 ; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
    802 ; X86-SSE2-NEXT:    retl
    803 ;
    804 ; X86-SSE42-LABEL: test_reduce_v16i16:
    805 ; X86-SSE42:       ## %bb.0:
    806 ; X86-SSE42-NEXT:    pminuw %xmm1, %xmm0
    807 ; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
    808 ; X86-SSE42-NEXT:    movd %xmm0, %eax
    809 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
    810 ; X86-SSE42-NEXT:    retl
    811 ;
    812 ; X86-AVX1-LABEL: test_reduce_v16i16:
    813 ; X86-AVX1:       ## %bb.0:
    814 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    815 ; X86-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
    816 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
    817 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
    818 ; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
    819 ; X86-AVX1-NEXT:    vzeroupper
    820 ; X86-AVX1-NEXT:    retl
    821 ;
    822 ; X86-AVX2-LABEL: test_reduce_v16i16:
    823 ; X86-AVX2:       ## %bb.0:
    824 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    825 ; X86-AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
    826 ; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
    827 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
    828 ; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
    829 ; X86-AVX2-NEXT:    vzeroupper
    830 ; X86-AVX2-NEXT:    retl
    831 ;
    832 ; X64-SSE2-LABEL: test_reduce_v16i16:
    833 ; X64-SSE2:       ## %bb.0:
    834 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
    835 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    836 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    837 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
    838 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    839 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    840 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    841 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    842 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
    843 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    844 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    845 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    846 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    847 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
    848 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    849 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    850 ; X64-SSE2-NEXT:    psrld $16, %xmm1
    851 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm0
    852 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    853 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
    854 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
    855 ; X64-SSE2-NEXT:    movd %xmm1, %eax
    856 ; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
    857 ; X64-SSE2-NEXT:    retq
    858 ;
    859 ; X64-SSE42-LABEL: test_reduce_v16i16:
    860 ; X64-SSE42:       ## %bb.0:
    861 ; X64-SSE42-NEXT:    pminuw %xmm1, %xmm0
    862 ; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
    863 ; X64-SSE42-NEXT:    movd %xmm0, %eax
    864 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
    865 ; X64-SSE42-NEXT:    retq
    866 ;
    867 ; X64-AVX1-LABEL: test_reduce_v16i16:
    868 ; X64-AVX1:       ## %bb.0:
    869 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    870 ; X64-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
    871 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
    872 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
    873 ; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
    874 ; X64-AVX1-NEXT:    vzeroupper
    875 ; X64-AVX1-NEXT:    retq
    876 ;
    877 ; X64-AVX2-LABEL: test_reduce_v16i16:
    878 ; X64-AVX2:       ## %bb.0:
    879 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    880 ; X64-AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
    881 ; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
    882 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
    883 ; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
    884 ; X64-AVX2-NEXT:    vzeroupper
    885 ; X64-AVX2-NEXT:    retq
    886 ;
    887 ; X64-AVX512-LABEL: test_reduce_v16i16:
    888 ; X64-AVX512:       ## %bb.0:
    889 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    890 ; X64-AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
    891 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
    892 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
    893 ; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
    894 ; X64-AVX512-NEXT:    vzeroupper
    895 ; X64-AVX512-NEXT:    retq
    896   %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    897   %2  = icmp ult <16 x i16> %a0, %1
    898   %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
    899   %4  = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    900   %5  = icmp ult <16 x i16> %3, %4
    901   %6  = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
    902   %7  = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    903   %8  = icmp ult <16 x i16> %6, %7
    904   %9  = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
    905   %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    906   %11 = icmp ult <16 x i16> %9, %10
    907   %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
    908   %13 = extractelement <16 x i16> %12, i32 0
    909   ret i16 %13
    910 }
    911 
    912 define i8 @test_reduce_v32i8(<32 x i8> %a0) {
    913 ; X86-SSE2-LABEL: test_reduce_v32i8:
    914 ; X86-SSE2:       ## %bb.0:
    915 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
    916 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    917 ; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
    918 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    919 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
    920 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
    921 ; X86-SSE2-NEXT:    psrld $16, %xmm1
    922 ; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
    923 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
    924 ; X86-SSE2-NEXT:    psrlw $8, %xmm0
    925 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
    926 ; X86-SSE2-NEXT:    movd %xmm0, %eax
    927 ; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
    928 ; X86-SSE2-NEXT:    retl
    929 ;
    930 ; X86-SSE42-LABEL: test_reduce_v32i8:
    931 ; X86-SSE42:       ## %bb.0:
    932 ; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
    933 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
    934 ; X86-SSE42-NEXT:    psrlw $8, %xmm1
    935 ; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
    936 ; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
    937 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
    938 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
    939 ; X86-SSE42-NEXT:    retl
    940 ;
    941 ; X86-AVX1-LABEL: test_reduce_v32i8:
    942 ; X86-AVX1:       ## %bb.0:
    943 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    944 ; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    945 ; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
    946 ; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    947 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
    948 ; X86-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
    949 ; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
    950 ; X86-AVX1-NEXT:    vzeroupper
    951 ; X86-AVX1-NEXT:    retl
    952 ;
    953 ; X86-AVX2-LABEL: test_reduce_v32i8:
    954 ; X86-AVX2:       ## %bb.0:
    955 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    956 ; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    957 ; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
    958 ; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    959 ; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
    960 ; X86-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
    961 ; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
    962 ; X86-AVX2-NEXT:    vzeroupper
    963 ; X86-AVX2-NEXT:    retl
    964 ;
    965 ; X64-SSE2-LABEL: test_reduce_v32i8:
    966 ; X64-SSE2:       ## %bb.0:
    967 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
    968 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    969 ; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
    970 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    971 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
    972 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
    973 ; X64-SSE2-NEXT:    psrld $16, %xmm1
    974 ; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
    975 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
    976 ; X64-SSE2-NEXT:    psrlw $8, %xmm0
    977 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
    978 ; X64-SSE2-NEXT:    movd %xmm0, %eax
    979 ; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
    980 ; X64-SSE2-NEXT:    retq
    981 ;
    982 ; X64-SSE42-LABEL: test_reduce_v32i8:
    983 ; X64-SSE42:       ## %bb.0:
    984 ; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
    985 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
    986 ; X64-SSE42-NEXT:    psrlw $8, %xmm1
    987 ; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
    988 ; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
    989 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
    990 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
    991 ; X64-SSE42-NEXT:    retq
    992 ;
    993 ; X64-AVX1-LABEL: test_reduce_v32i8:
    994 ; X64-AVX1:       ## %bb.0:
    995 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    996 ; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    997 ; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
    998 ; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    999 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1000 ; X64-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
   1001 ; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
   1002 ; X64-AVX1-NEXT:    vzeroupper
   1003 ; X64-AVX1-NEXT:    retq
   1004 ;
   1005 ; X64-AVX2-LABEL: test_reduce_v32i8:
   1006 ; X64-AVX2:       ## %bb.0:
   1007 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1008 ; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1009 ; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
   1010 ; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1011 ; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1012 ; X64-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
   1013 ; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
   1014 ; X64-AVX2-NEXT:    vzeroupper
   1015 ; X64-AVX2-NEXT:    retq
   1016 ;
   1017 ; X64-AVX512-LABEL: test_reduce_v32i8:
   1018 ; X64-AVX512:       ## %bb.0:
   1019 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1020 ; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1021 ; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
   1022 ; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1023 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
   1024 ; X64-AVX512-NEXT:    vpextrb $0, %xmm0, %eax
   1025 ; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
   1026 ; X64-AVX512-NEXT:    vzeroupper
   1027 ; X64-AVX512-NEXT:    retq
   1028   %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1029   %2  = icmp ult <32 x i8> %a0, %1
   1030   %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
   1031   %4  = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1032   %5  = icmp ult <32 x i8> %3, %4
   1033   %6  = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
   1034   %7  = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1035   %8  = icmp ult <32 x i8> %6, %7
   1036   %9  = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
   1037   %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1038   %11 = icmp ult <32 x i8> %9, %10
   1039   %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
   1040   %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1041   %14 = icmp ult <32 x i8> %12, %13
   1042   %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
   1043   %16 = extractelement <32 x i8> %15, i32 0
   1044   ret i8 %16
   1045 }
   1046 
   1047 ;
   1048 ; 512-bit Vectors
   1049 ;
   1050 
   1051 define i64 @test_reduce_v8i64(<8 x i64> %a0) {
   1052 ; X86-SSE2-LABEL: test_reduce_v8i64:
   1053 ; X86-SSE2:       ## %bb.0:
   1054 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
   1055 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm5
   1056 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm5
   1057 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1058 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm6
   1059 ; X86-SSE2-NEXT:    movdqa %xmm6, %xmm7
   1060 ; X86-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
   1061 ; X86-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
   1062 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
   1063 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
   1064 ; X86-SSE2-NEXT:    pand %xmm5, %xmm6
   1065 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
   1066 ; X86-SSE2-NEXT:    por %xmm6, %xmm5
   1067 ; X86-SSE2-NEXT:    pand %xmm5, %xmm1
   1068 ; X86-SSE2-NEXT:    pandn %xmm3, %xmm5
   1069 ; X86-SSE2-NEXT:    por %xmm1, %xmm5
   1070 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1071 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
   1072 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
   1073 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
   1074 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1075 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
   1076 ; X86-SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
   1077 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2]
   1078 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
   1079 ; X86-SSE2-NEXT:    pand %xmm1, %xmm3
   1080 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
   1081 ; X86-SSE2-NEXT:    por %xmm3, %xmm1
   1082 ; X86-SSE2-NEXT:    pand %xmm1, %xmm0
   1083 ; X86-SSE2-NEXT:    pandn %xmm2, %xmm1
   1084 ; X86-SSE2-NEXT:    por %xmm0, %xmm1
   1085 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
   1086 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
   1087 ; X86-SSE2-NEXT:    movdqa %xmm5, %xmm2
   1088 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
   1089 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
   1090 ; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
   1091 ; X86-SSE2-NEXT:    pcmpeqd %xmm0, %xmm2
   1092 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
   1093 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
   1094 ; X86-SSE2-NEXT:    pand %xmm0, %xmm2
   1095 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
   1096 ; X86-SSE2-NEXT:    por %xmm2, %xmm0
   1097 ; X86-SSE2-NEXT:    pand %xmm0, %xmm1
   1098 ; X86-SSE2-NEXT:    pandn %xmm5, %xmm0
   1099 ; X86-SSE2-NEXT:    por %xmm1, %xmm0
   1100 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1101 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
   1102 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
   1103 ; X86-SSE2-NEXT:    pxor %xmm1, %xmm4
   1104 ; X86-SSE2-NEXT:    movdqa %xmm4, %xmm3
   1105 ; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
   1106 ; X86-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
   1107 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
   1108 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
   1109 ; X86-SSE2-NEXT:    pand %xmm2, %xmm4
   1110 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
   1111 ; X86-SSE2-NEXT:    por %xmm4, %xmm2
   1112 ; X86-SSE2-NEXT:    pand %xmm2, %xmm0
   1113 ; X86-SSE2-NEXT:    pandn %xmm1, %xmm2
   1114 ; X86-SSE2-NEXT:    por %xmm0, %xmm2
   1115 ; X86-SSE2-NEXT:    movd %xmm2, %eax
   1116 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
   1117 ; X86-SSE2-NEXT:    movd %xmm0, %edx
   1118 ; X86-SSE2-NEXT:    retl
   1119 ;
   1120 ; X86-SSE42-LABEL: test_reduce_v8i64:
   1121 ; X86-SSE42:       ## %bb.0:
   1122 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm5
   1123 ; X86-SSE42-NEXT:    movdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648]
   1124 ; X86-SSE42-NEXT:    movdqa %xmm1, %xmm6
   1125 ; X86-SSE42-NEXT:    pxor %xmm4, %xmm6
   1126 ; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
   1127 ; X86-SSE42-NEXT:    pxor %xmm4, %xmm0
   1128 ; X86-SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
   1129 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
   1130 ; X86-SSE42-NEXT:    movdqa %xmm5, %xmm1
   1131 ; X86-SSE42-NEXT:    pxor %xmm4, %xmm1
   1132 ; X86-SSE42-NEXT:    movdqa %xmm2, %xmm0
   1133 ; X86-SSE42-NEXT:    pxor %xmm4, %xmm0
   1134 ; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
   1135 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm5, %xmm2
   1136 ; X86-SSE42-NEXT:    movapd %xmm2, %xmm1
   1137 ; X86-SSE42-NEXT:    xorpd %xmm4, %xmm1
   1138 ; X86-SSE42-NEXT:    movapd %xmm3, %xmm0
   1139 ; X86-SSE42-NEXT:    xorpd %xmm4, %xmm0
   1140 ; X86-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
   1141 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
   1142 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
   1143 ; X86-SSE42-NEXT:    movdqa %xmm3, %xmm0
   1144 ; X86-SSE42-NEXT:    pxor %xmm4, %xmm0
   1145 ; X86-SSE42-NEXT:    pxor %xmm1, %xmm4
   1146 ; X86-SSE42-NEXT:    pcmpgtq %xmm0, %xmm4
   1147 ; X86-SSE42-NEXT:    movdqa %xmm4, %xmm0
   1148 ; X86-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
   1149 ; X86-SSE42-NEXT:    movd %xmm1, %eax
   1150 ; X86-SSE42-NEXT:    pextrd $1, %xmm1, %edx
   1151 ; X86-SSE42-NEXT:    retl
   1152 ;
   1153 ; X86-AVX1-LABEL: test_reduce_v8i64:
   1154 ; X86-AVX1:       ## %bb.0:
   1155 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1156 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
   1157 ; X86-AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
   1158 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
   1159 ; X86-AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
   1160 ; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
   1161 ; X86-AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
   1162 ; X86-AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm5
   1163 ; X86-AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
   1164 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm4, %ymm2
   1165 ; X86-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1166 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1167 ; X86-AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm2
   1168 ; X86-AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
   1169 ; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
   1170 ; X86-AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm4
   1171 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
   1172 ; X86-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1173 ; X86-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1174 ; X86-AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm2
   1175 ; X86-AVX1-NEXT:    vxorpd %xmm3, %xmm1, %xmm4
   1176 ; X86-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
   1177 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   1178 ; X86-AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
   1179 ; X86-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm3
   1180 ; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
   1181 ; X86-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1182 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
   1183 ; X86-AVX1-NEXT:    vpextrd $1, %xmm0, %edx
   1184 ; X86-AVX1-NEXT:    vzeroupper
   1185 ; X86-AVX1-NEXT:    retl
   1186 ;
   1187 ; X86-AVX2-LABEL: test_reduce_v8i64:
   1188 ; X86-AVX2:       ## %bb.0:
   1189 ; X86-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
   1190 ; X86-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
   1191 ; X86-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm4
   1192 ; X86-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
   1193 ; X86-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
   1194 ; X86-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1195 ; X86-AVX2-NEXT:    vxorpd %ymm2, %ymm0, %ymm3
   1196 ; X86-AVX2-NEXT:    vxorpd %ymm2, %ymm1, %ymm4
   1197 ; X86-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
   1198 ; X86-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
   1199 ; X86-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1200 ; X86-AVX2-NEXT:    vxorpd %ymm2, %ymm0, %ymm3
   1201 ; X86-AVX2-NEXT:    vxorpd %ymm2, %ymm1, %ymm2
   1202 ; X86-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
   1203 ; X86-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1204 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
   1205 ; X86-AVX2-NEXT:    vpextrd $1, %xmm0, %edx
   1206 ; X86-AVX2-NEXT:    vzeroupper
   1207 ; X86-AVX2-NEXT:    retl
   1208 ;
   1209 ; X64-SSE2-LABEL: test_reduce_v8i64:
   1210 ; X64-SSE2:       ## %bb.0:
   1211 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
   1212 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm5
   1213 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm5
   1214 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1215 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm6
   1216 ; X64-SSE2-NEXT:    movdqa %xmm6, %xmm7
   1217 ; X64-SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
   1218 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
   1219 ; X64-SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
   1220 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
   1221 ; X64-SSE2-NEXT:    pand %xmm8, %xmm6
   1222 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
   1223 ; X64-SSE2-NEXT:    por %xmm6, %xmm5
   1224 ; X64-SSE2-NEXT:    pand %xmm5, %xmm1
   1225 ; X64-SSE2-NEXT:    pandn %xmm3, %xmm5
   1226 ; X64-SSE2-NEXT:    por %xmm1, %xmm5
   1227 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1228 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1229 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
   1230 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
   1231 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1232 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
   1233 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
   1234 ; X64-SSE2-NEXT:    pcmpeqd %xmm1, %xmm3
   1235 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
   1236 ; X64-SSE2-NEXT:    pand %xmm7, %xmm1
   1237 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
   1238 ; X64-SSE2-NEXT:    por %xmm1, %xmm3
   1239 ; X64-SSE2-NEXT:    pand %xmm3, %xmm0
   1240 ; X64-SSE2-NEXT:    pandn %xmm2, %xmm3
   1241 ; X64-SSE2-NEXT:    por %xmm0, %xmm3
   1242 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
   1243 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
   1244 ; X64-SSE2-NEXT:    movdqa %xmm5, %xmm1
   1245 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1246 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
   1247 ; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
   1248 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
   1249 ; X64-SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
   1250 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
   1251 ; X64-SSE2-NEXT:    pand %xmm6, %xmm0
   1252 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
   1253 ; X64-SSE2-NEXT:    por %xmm0, %xmm1
   1254 ; X64-SSE2-NEXT:    pand %xmm1, %xmm3
   1255 ; X64-SSE2-NEXT:    pandn %xmm5, %xmm1
   1256 ; X64-SSE2-NEXT:    por %xmm3, %xmm1
   1257 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
   1258 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
   1259 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
   1260 ; X64-SSE2-NEXT:    pxor %xmm0, %xmm4
   1261 ; X64-SSE2-NEXT:    movdqa %xmm4, %xmm3
   1262 ; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
   1263 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
   1264 ; X64-SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
   1265 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
   1266 ; X64-SSE2-NEXT:    pand %xmm5, %xmm2
   1267 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
   1268 ; X64-SSE2-NEXT:    por %xmm2, %xmm3
   1269 ; X64-SSE2-NEXT:    pand %xmm3, %xmm1
   1270 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
   1271 ; X64-SSE2-NEXT:    por %xmm1, %xmm3
   1272 ; X64-SSE2-NEXT:    movq %xmm3, %rax
   1273 ; X64-SSE2-NEXT:    retq
   1274 ;
   1275 ; X64-SSE42-LABEL: test_reduce_v8i64:
   1276 ; X64-SSE42:       ## %bb.0:
   1277 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm5
   1278 ; X64-SSE42-NEXT:    movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
   1279 ; X64-SSE42-NEXT:    movdqa %xmm1, %xmm6
   1280 ; X64-SSE42-NEXT:    pxor %xmm4, %xmm6
   1281 ; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
   1282 ; X64-SSE42-NEXT:    pxor %xmm4, %xmm0
   1283 ; X64-SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
   1284 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
   1285 ; X64-SSE42-NEXT:    movdqa %xmm5, %xmm1
   1286 ; X64-SSE42-NEXT:    pxor %xmm4, %xmm1
   1287 ; X64-SSE42-NEXT:    movdqa %xmm2, %xmm0
   1288 ; X64-SSE42-NEXT:    pxor %xmm4, %xmm0
   1289 ; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
   1290 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm5, %xmm2
   1291 ; X64-SSE42-NEXT:    movapd %xmm2, %xmm1
   1292 ; X64-SSE42-NEXT:    xorpd %xmm4, %xmm1
   1293 ; X64-SSE42-NEXT:    movapd %xmm3, %xmm0
   1294 ; X64-SSE42-NEXT:    xorpd %xmm4, %xmm0
   1295 ; X64-SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
   1296 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm3
   1297 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
   1298 ; X64-SSE42-NEXT:    movdqa %xmm3, %xmm0
   1299 ; X64-SSE42-NEXT:    pxor %xmm4, %xmm0
   1300 ; X64-SSE42-NEXT:    pxor %xmm1, %xmm4
   1301 ; X64-SSE42-NEXT:    pcmpgtq %xmm0, %xmm4
   1302 ; X64-SSE42-NEXT:    movdqa %xmm4, %xmm0
   1303 ; X64-SSE42-NEXT:    blendvpd %xmm0, %xmm3, %xmm1
   1304 ; X64-SSE42-NEXT:    movq %xmm1, %rax
   1305 ; X64-SSE42-NEXT:    retq
   1306 ;
   1307 ; X64-AVX1-LABEL: test_reduce_v8i64:
   1308 ; X64-AVX1:       ## %bb.0:
   1309 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   1310 ; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
   1311 ; X64-AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
   1312 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
   1313 ; X64-AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
   1314 ; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
   1315 ; X64-AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
   1316 ; X64-AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm5
   1317 ; X64-AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
   1318 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm4, %ymm2
   1319 ; X64-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1320 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1321 ; X64-AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm2
   1322 ; X64-AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
   1323 ; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
   1324 ; X64-AVX1-NEXT:    vpcmpgtq %xmm4, %xmm0, %xmm4
   1325 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
   1326 ; X64-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1327 ; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1328 ; X64-AVX1-NEXT:    vxorpd %xmm3, %xmm0, %xmm2
   1329 ; X64-AVX1-NEXT:    vxorpd %xmm3, %xmm1, %xmm4
   1330 ; X64-AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
   1331 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   1332 ; X64-AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
   1333 ; X64-AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm3
   1334 ; X64-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
   1335 ; X64-AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1336 ; X64-AVX1-NEXT:    vmovq %xmm0, %rax
   1337 ; X64-AVX1-NEXT:    vzeroupper
   1338 ; X64-AVX1-NEXT:    retq
   1339 ;
   1340 ; X64-AVX2-LABEL: test_reduce_v8i64:
   1341 ; X64-AVX2:       ## %bb.0:
   1342 ; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
   1343 ; X64-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
   1344 ; X64-AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm4
   1345 ; X64-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
   1346 ; X64-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
   1347 ; X64-AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1348 ; X64-AVX2-NEXT:    vxorpd %ymm2, %ymm0, %ymm3
   1349 ; X64-AVX2-NEXT:    vxorpd %ymm2, %ymm1, %ymm4
   1350 ; X64-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm4, %ymm3
   1351 ; X64-AVX2-NEXT:    vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
   1352 ; X64-AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1353 ; X64-AVX2-NEXT:    vxorpd %ymm2, %ymm0, %ymm3
   1354 ; X64-AVX2-NEXT:    vxorpd %ymm2, %ymm1, %ymm2
   1355 ; X64-AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
   1356 ; X64-AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
   1357 ; X64-AVX2-NEXT:    vmovq %xmm0, %rax
   1358 ; X64-AVX2-NEXT:    vzeroupper
   1359 ; X64-AVX2-NEXT:    retq
   1360 ;
   1361 ; X64-AVX512-LABEL: test_reduce_v8i64:
   1362 ; X64-AVX512:       ## %bb.0:
   1363 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
   1364 ; X64-AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
   1365 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1366 ; X64-AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
   1367 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1368 ; X64-AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
   1369 ; X64-AVX512-NEXT:    vmovq %xmm0, %rax
   1370 ; X64-AVX512-NEXT:    vzeroupper
   1371 ; X64-AVX512-NEXT:    retq
   1372   %1  = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
   1373   %2  = icmp ult <8 x i64> %a0, %1
   1374   %3  = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
   1375   %4  = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1376   %5  = icmp ult <8 x i64> %3, %4
   1377   %6  = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
   1378   %7  = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1379   %8  = icmp ult <8 x i64> %6, %7
   1380   %9  = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
   1381   %10 = extractelement <8 x i64> %9, i32 0
   1382   ret i64 %10
   1383 }
   1384 
   1385 define i32 @test_reduce_v16i32(<16 x i32> %a0) {
   1386 ; X86-SSE2-LABEL: test_reduce_v16i32:
   1387 ; X86-SSE2:       ## %bb.0:
   1388 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
   1389 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm5
   1390 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm5
   1391 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1392 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm6
   1393 ; X86-SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
   1394 ; X86-SSE2-NEXT:    pand %xmm6, %xmm1
   1395 ; X86-SSE2-NEXT:    pandn %xmm3, %xmm6
   1396 ; X86-SSE2-NEXT:    por %xmm1, %xmm6
   1397 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1398 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
   1399 ; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
   1400 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
   1401 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
   1402 ; X86-SSE2-NEXT:    pand %xmm3, %xmm0
   1403 ; X86-SSE2-NEXT:    pandn %xmm2, %xmm3
   1404 ; X86-SSE2-NEXT:    por %xmm0, %xmm3
   1405 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm0
   1406 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
   1407 ; X86-SSE2-NEXT:    movdqa %xmm6, %xmm1
   1408 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
   1409 ; X86-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
   1410 ; X86-SSE2-NEXT:    pand %xmm1, %xmm3
   1411 ; X86-SSE2-NEXT:    pandn %xmm6, %xmm1
   1412 ; X86-SSE2-NEXT:    por %xmm3, %xmm1
   1413 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
   1414 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
   1415 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
   1416 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
   1417 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
   1418 ; X86-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
   1419 ; X86-SSE2-NEXT:    pand %xmm3, %xmm1
   1420 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm3
   1421 ; X86-SSE2-NEXT:    por %xmm1, %xmm3
   1422 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
   1423 ; X86-SSE2-NEXT:    movdqa %xmm3, %xmm1
   1424 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
   1425 ; X86-SSE2-NEXT:    pxor %xmm0, %xmm4
   1426 ; X86-SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
   1427 ; X86-SSE2-NEXT:    pand %xmm4, %xmm3
   1428 ; X86-SSE2-NEXT:    pandn %xmm0, %xmm4
   1429 ; X86-SSE2-NEXT:    por %xmm3, %xmm4
   1430 ; X86-SSE2-NEXT:    movd %xmm4, %eax
   1431 ; X86-SSE2-NEXT:    retl
   1432 ;
   1433 ; X86-SSE42-LABEL: test_reduce_v16i32:
   1434 ; X86-SSE42:       ## %bb.0:
   1435 ; X86-SSE42-NEXT:    pminud %xmm3, %xmm1
   1436 ; X86-SSE42-NEXT:    pminud %xmm2, %xmm0
   1437 ; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
   1438 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1439 ; X86-SSE42-NEXT:    pminud %xmm0, %xmm1
   1440 ; X86-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
   1441 ; X86-SSE42-NEXT:    pminud %xmm1, %xmm0
   1442 ; X86-SSE42-NEXT:    movd %xmm0, %eax
   1443 ; X86-SSE42-NEXT:    retl
   1444 ;
   1445 ; X86-AVX1-LABEL: test_reduce_v16i32:
   1446 ; X86-AVX1:       ## %bb.0:
   1447 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1448 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1449 ; X86-AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
   1450 ; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
   1451 ; X86-AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
   1452 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1453 ; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
   1454 ; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1455 ; X86-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
   1456 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
   1457 ; X86-AVX1-NEXT:    vzeroupper
   1458 ; X86-AVX1-NEXT:    retl
   1459 ;
   1460 ; X86-AVX2-LABEL: test_reduce_v16i32:
   1461 ; X86-AVX2:       ## %bb.0:
   1462 ; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
   1463 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1464 ; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
   1465 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1466 ; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
   1467 ; X86-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1468 ; X86-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
   1469 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
   1470 ; X86-AVX2-NEXT:    vzeroupper
   1471 ; X86-AVX2-NEXT:    retl
   1472 ;
   1473 ; X64-SSE2-LABEL: test_reduce_v16i32:
   1474 ; X64-SSE2:       ## %bb.0:
   1475 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
   1476 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm5
   1477 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm5
   1478 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm6
   1479 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm6
   1480 ; X64-SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
   1481 ; X64-SSE2-NEXT:    pand %xmm6, %xmm1
   1482 ; X64-SSE2-NEXT:    pandn %xmm3, %xmm6
   1483 ; X64-SSE2-NEXT:    por %xmm1, %xmm6
   1484 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1485 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1486 ; X64-SSE2-NEXT:    movdqa %xmm2, %xmm3
   1487 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
   1488 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm3
   1489 ; X64-SSE2-NEXT:    pand %xmm3, %xmm0
   1490 ; X64-SSE2-NEXT:    pandn %xmm2, %xmm3
   1491 ; X64-SSE2-NEXT:    por %xmm0, %xmm3
   1492 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm0
   1493 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
   1494 ; X64-SSE2-NEXT:    movdqa %xmm6, %xmm1
   1495 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1496 ; X64-SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
   1497 ; X64-SSE2-NEXT:    pand %xmm1, %xmm3
   1498 ; X64-SSE2-NEXT:    pandn %xmm6, %xmm1
   1499 ; X64-SSE2-NEXT:    por %xmm3, %xmm1
   1500 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
   1501 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm2
   1502 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
   1503 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm3
   1504 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
   1505 ; X64-SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
   1506 ; X64-SSE2-NEXT:    pand %xmm3, %xmm1
   1507 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm3
   1508 ; X64-SSE2-NEXT:    por %xmm1, %xmm3
   1509 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
   1510 ; X64-SSE2-NEXT:    movdqa %xmm3, %xmm1
   1511 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1512 ; X64-SSE2-NEXT:    pxor %xmm0, %xmm4
   1513 ; X64-SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
   1514 ; X64-SSE2-NEXT:    pand %xmm4, %xmm3
   1515 ; X64-SSE2-NEXT:    pandn %xmm0, %xmm4
   1516 ; X64-SSE2-NEXT:    por %xmm3, %xmm4
   1517 ; X64-SSE2-NEXT:    movd %xmm4, %eax
   1518 ; X64-SSE2-NEXT:    retq
   1519 ;
   1520 ; X64-SSE42-LABEL: test_reduce_v16i32:
   1521 ; X64-SSE42:       ## %bb.0:
   1522 ; X64-SSE42-NEXT:    pminud %xmm3, %xmm1
   1523 ; X64-SSE42-NEXT:    pminud %xmm2, %xmm0
   1524 ; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
   1525 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1526 ; X64-SSE42-NEXT:    pminud %xmm0, %xmm1
   1527 ; X64-SSE42-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
   1528 ; X64-SSE42-NEXT:    pminud %xmm1, %xmm0
   1529 ; X64-SSE42-NEXT:    movd %xmm0, %eax
   1530 ; X64-SSE42-NEXT:    retq
   1531 ;
   1532 ; X64-AVX1-LABEL: test_reduce_v16i32:
   1533 ; X64-AVX1:       ## %bb.0:
   1534 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1535 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1536 ; X64-AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
   1537 ; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
   1538 ; X64-AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
   1539 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1540 ; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
   1541 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1542 ; X64-AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
   1543 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
   1544 ; X64-AVX1-NEXT:    vzeroupper
   1545 ; X64-AVX1-NEXT:    retq
   1546 ;
   1547 ; X64-AVX2-LABEL: test_reduce_v16i32:
   1548 ; X64-AVX2:       ## %bb.0:
   1549 ; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
   1550 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1551 ; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
   1552 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1553 ; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
   1554 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1555 ; X64-AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
   1556 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
   1557 ; X64-AVX2-NEXT:    vzeroupper
   1558 ; X64-AVX2-NEXT:    retq
   1559 ;
   1560 ; X64-AVX512-LABEL: test_reduce_v16i32:
   1561 ; X64-AVX512:       ## %bb.0:
   1562 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
   1563 ; X64-AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
   1564 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1565 ; X64-AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
   1566 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1567 ; X64-AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
   1568 ; X64-AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1569 ; X64-AVX512-NEXT:    vpminud %zmm1, %zmm0, %zmm0
   1570 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
   1571 ; X64-AVX512-NEXT:    vzeroupper
   1572 ; X64-AVX512-NEXT:    retq
   1573   %1  = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1574   %2  = icmp ult <16 x i32> %a0, %1
   1575   %3  = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
   1576   %4  = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1577   %5  = icmp ult <16 x i32> %3, %4
   1578   %6  = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
   1579   %7  = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1580   %8  = icmp ult <16 x i32> %6, %7
   1581   %9  = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
   1582   %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1583   %11 = icmp ult <16 x i32> %9, %10
   1584   %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
   1585   %13 = extractelement <16 x i32> %12, i32 0
   1586   ret i32 %13
   1587 }
   1588 
   1589 define i16 @test_reduce_v32i16(<32 x i16> %a0) {
   1590 ; X86-SSE2-LABEL: test_reduce_v32i16:
   1591 ; X86-SSE2:       ## %bb.0:
   1592 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
   1593 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
   1594 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
   1595 ; X86-SSE2-NEXT:    pminsw %xmm2, %xmm0
   1596 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm3
   1597 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
   1598 ; X86-SSE2-NEXT:    pminsw %xmm3, %xmm1
   1599 ; X86-SSE2-NEXT:    movdqa %xmm4, %xmm2
   1600 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
   1601 ; X86-SSE2-NEXT:    pxor %xmm2, %xmm1
   1602 ; X86-SSE2-NEXT:    pxor %xmm0, %xmm2
   1603 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm2
   1604 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
   1605 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
   1606 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm2
   1607 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
   1608 ; X86-SSE2-NEXT:    pminsw %xmm2, %xmm0
   1609 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
   1610 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1611 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
   1612 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
   1613 ; X86-SSE2-NEXT:    pminsw %xmm0, %xmm1
   1614 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
   1615 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
   1616 ; X86-SSE2-NEXT:    psrld $16, %xmm0
   1617 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm1
   1618 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
   1619 ; X86-SSE2-NEXT:    pminsw %xmm1, %xmm0
   1620 ; X86-SSE2-NEXT:    pxor %xmm4, %xmm0
   1621 ; X86-SSE2-NEXT:    movd %xmm0, %eax
   1622 ; X86-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
   1623 ; X86-SSE2-NEXT:    retl
   1624 ;
   1625 ; X86-SSE42-LABEL: test_reduce_v32i16:
   1626 ; X86-SSE42:       ## %bb.0:
   1627 ; X86-SSE42-NEXT:    pminuw %xmm3, %xmm1
   1628 ; X86-SSE42-NEXT:    pminuw %xmm2, %xmm0
   1629 ; X86-SSE42-NEXT:    pminuw %xmm1, %xmm0
   1630 ; X86-SSE42-NEXT:    phminposuw %xmm0, %xmm0
   1631 ; X86-SSE42-NEXT:    movd %xmm0, %eax
   1632 ; X86-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
   1633 ; X86-SSE42-NEXT:    retl
   1634 ;
   1635 ; X86-AVX1-LABEL: test_reduce_v32i16:
   1636 ; X86-AVX1:       ## %bb.0:
   1637 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1638 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1639 ; X86-AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
   1640 ; X86-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
   1641 ; X86-AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
   1642 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1643 ; X86-AVX1-NEXT:    vmovd %xmm0, %eax
   1644 ; X86-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
   1645 ; X86-AVX1-NEXT:    vzeroupper
   1646 ; X86-AVX1-NEXT:    retl
   1647 ;
   1648 ; X86-AVX2-LABEL: test_reduce_v32i16:
   1649 ; X86-AVX2:       ## %bb.0:
   1650 ; X86-AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
   1651 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1652 ; X86-AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
   1653 ; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1654 ; X86-AVX2-NEXT:    vmovd %xmm0, %eax
   1655 ; X86-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
   1656 ; X86-AVX2-NEXT:    vzeroupper
   1657 ; X86-AVX2-NEXT:    retl
   1658 ;
   1659 ; X64-SSE2-LABEL: test_reduce_v32i16:
   1660 ; X64-SSE2:       ## %bb.0:
   1661 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
   1662 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
   1663 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
   1664 ; X64-SSE2-NEXT:    pminsw %xmm2, %xmm0
   1665 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm3
   1666 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1667 ; X64-SSE2-NEXT:    pminsw %xmm3, %xmm1
   1668 ; X64-SSE2-NEXT:    movdqa %xmm4, %xmm2
   1669 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
   1670 ; X64-SSE2-NEXT:    pxor %xmm2, %xmm1
   1671 ; X64-SSE2-NEXT:    pxor %xmm0, %xmm2
   1672 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm2
   1673 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
   1674 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
   1675 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm2
   1676 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
   1677 ; X64-SSE2-NEXT:    pminsw %xmm2, %xmm0
   1678 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
   1679 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
   1680 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
   1681 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1682 ; X64-SSE2-NEXT:    pminsw %xmm0, %xmm1
   1683 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1684 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
   1685 ; X64-SSE2-NEXT:    psrld $16, %xmm0
   1686 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm1
   1687 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
   1688 ; X64-SSE2-NEXT:    pminsw %xmm1, %xmm0
   1689 ; X64-SSE2-NEXT:    pxor %xmm4, %xmm0
   1690 ; X64-SSE2-NEXT:    movd %xmm0, %eax
   1691 ; X64-SSE2-NEXT:    ## kill: def $ax killed $ax killed $eax
   1692 ; X64-SSE2-NEXT:    retq
   1693 ;
   1694 ; X64-SSE42-LABEL: test_reduce_v32i16:
   1695 ; X64-SSE42:       ## %bb.0:
   1696 ; X64-SSE42-NEXT:    pminuw %xmm3, %xmm1
   1697 ; X64-SSE42-NEXT:    pminuw %xmm2, %xmm0
   1698 ; X64-SSE42-NEXT:    pminuw %xmm1, %xmm0
   1699 ; X64-SSE42-NEXT:    phminposuw %xmm0, %xmm0
   1700 ; X64-SSE42-NEXT:    movd %xmm0, %eax
   1701 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
   1702 ; X64-SSE42-NEXT:    retq
   1703 ;
   1704 ; X64-AVX1-LABEL: test_reduce_v32i16:
   1705 ; X64-AVX1:       ## %bb.0:
   1706 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1707 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1708 ; X64-AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
   1709 ; X64-AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
   1710 ; X64-AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
   1711 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1712 ; X64-AVX1-NEXT:    vmovd %xmm0, %eax
   1713 ; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
   1714 ; X64-AVX1-NEXT:    vzeroupper
   1715 ; X64-AVX1-NEXT:    retq
   1716 ;
   1717 ; X64-AVX2-LABEL: test_reduce_v32i16:
   1718 ; X64-AVX2:       ## %bb.0:
   1719 ; X64-AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
   1720 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1721 ; X64-AVX2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
   1722 ; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1723 ; X64-AVX2-NEXT:    vmovd %xmm0, %eax
   1724 ; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
   1725 ; X64-AVX2-NEXT:    vzeroupper
   1726 ; X64-AVX2-NEXT:    retq
   1727 ;
   1728 ; X64-AVX512-LABEL: test_reduce_v32i16:
   1729 ; X64-AVX512:       ## %bb.0:
   1730 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
   1731 ; X64-AVX512-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
   1732 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1733 ; X64-AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
   1734 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
   1735 ; X64-AVX512-NEXT:    vmovd %xmm0, %eax
   1736 ; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
   1737 ; X64-AVX512-NEXT:    vzeroupper
   1738 ; X64-AVX512-NEXT:    retq
   1739   %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1740   %2  = icmp ult <32 x i16> %a0, %1
   1741   %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
   1742   %4  = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1743   %5  = icmp ult <32 x i16> %3, %4
   1744   %6  = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
   1745   %7  = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1746   %8  = icmp ult <32 x i16> %6, %7
   1747   %9  = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
   1748   %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1749   %11 = icmp ult <32 x i16> %9, %10
   1750   %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
   1751   %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1752   %14 = icmp ult <32 x i16> %12, %13
   1753   %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
   1754   %16 = extractelement <32 x i16> %15, i32 0
   1755   ret i16 %16
   1756 }
   1757 
   1758 define i8 @test_reduce_v64i8(<64 x i8> %a0) {
   1759 ; X86-SSE2-LABEL: test_reduce_v64i8:
   1760 ; X86-SSE2:       ## %bb.0:
   1761 ; X86-SSE2-NEXT:    pminub %xmm3, %xmm1
   1762 ; X86-SSE2-NEXT:    pminub %xmm2, %xmm0
   1763 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
   1764 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1765 ; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
   1766 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
   1767 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
   1768 ; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1769 ; X86-SSE2-NEXT:    psrld $16, %xmm1
   1770 ; X86-SSE2-NEXT:    pminub %xmm0, %xmm1
   1771 ; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
   1772 ; X86-SSE2-NEXT:    psrlw $8, %xmm0
   1773 ; X86-SSE2-NEXT:    pminub %xmm1, %xmm0
   1774 ; X86-SSE2-NEXT:    movd %xmm0, %eax
   1775 ; X86-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
   1776 ; X86-SSE2-NEXT:    retl
   1777 ;
   1778 ; X86-SSE42-LABEL: test_reduce_v64i8:
   1779 ; X86-SSE42:       ## %bb.0:
   1780 ; X86-SSE42-NEXT:    pminub %xmm3, %xmm1
   1781 ; X86-SSE42-NEXT:    pminub %xmm2, %xmm0
   1782 ; X86-SSE42-NEXT:    pminub %xmm1, %xmm0
   1783 ; X86-SSE42-NEXT:    movdqa %xmm0, %xmm1
   1784 ; X86-SSE42-NEXT:    psrlw $8, %xmm1
   1785 ; X86-SSE42-NEXT:    pminub %xmm0, %xmm1
   1786 ; X86-SSE42-NEXT:    phminposuw %xmm1, %xmm0
   1787 ; X86-SSE42-NEXT:    pextrb $0, %xmm0, %eax
   1788 ; X86-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
   1789 ; X86-SSE42-NEXT:    retl
   1790 ;
   1791 ; X86-AVX1-LABEL: test_reduce_v64i8:
   1792 ; X86-AVX1:       ## %bb.0:
   1793 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1794 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1795 ; X86-AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
   1796 ; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1797 ; X86-AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1798 ; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
   1799 ; X86-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1800 ; X86-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1801 ; X86-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
   1802 ; X86-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
   1803 ; X86-AVX1-NEXT:    vzeroupper
   1804 ; X86-AVX1-NEXT:    retl
   1805 ;
   1806 ; X86-AVX2-LABEL: test_reduce_v64i8:
   1807 ; X86-AVX2:       ## %bb.0:
   1808 ; X86-AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
   1809 ; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1810 ; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1811 ; X86-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
   1812 ; X86-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1813 ; X86-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1814 ; X86-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
   1815 ; X86-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
   1816 ; X86-AVX2-NEXT:    vzeroupper
   1817 ; X86-AVX2-NEXT:    retl
   1818 ;
   1819 ; X64-SSE2-LABEL: test_reduce_v64i8:
   1820 ; X64-SSE2:       ## %bb.0:
   1821 ; X64-SSE2-NEXT:    pminub %xmm3, %xmm1
   1822 ; X64-SSE2-NEXT:    pminub %xmm2, %xmm0
   1823 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
   1824 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
   1825 ; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
   1826 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
   1827 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
   1828 ; X64-SSE2-NEXT:    movdqa %xmm0, %xmm1
   1829 ; X64-SSE2-NEXT:    psrld $16, %xmm1
   1830 ; X64-SSE2-NEXT:    pminub %xmm0, %xmm1
   1831 ; X64-SSE2-NEXT:    movdqa %xmm1, %xmm0
   1832 ; X64-SSE2-NEXT:    psrlw $8, %xmm0
   1833 ; X64-SSE2-NEXT:    pminub %xmm1, %xmm0
   1834 ; X64-SSE2-NEXT:    movd %xmm0, %eax
   1835 ; X64-SSE2-NEXT:    ## kill: def $al killed $al killed $eax
   1836 ; X64-SSE2-NEXT:    retq
   1837 ;
   1838 ; X64-SSE42-LABEL: test_reduce_v64i8:
   1839 ; X64-SSE42:       ## %bb.0:
   1840 ; X64-SSE42-NEXT:    pminub %xmm3, %xmm1
   1841 ; X64-SSE42-NEXT:    pminub %xmm2, %xmm0
   1842 ; X64-SSE42-NEXT:    pminub %xmm1, %xmm0
   1843 ; X64-SSE42-NEXT:    movdqa %xmm0, %xmm1
   1844 ; X64-SSE42-NEXT:    psrlw $8, %xmm1
   1845 ; X64-SSE42-NEXT:    pminub %xmm0, %xmm1
   1846 ; X64-SSE42-NEXT:    phminposuw %xmm1, %xmm0
   1847 ; X64-SSE42-NEXT:    pextrb $0, %xmm0, %eax
   1848 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
   1849 ; X64-SSE42-NEXT:    retq
   1850 ;
   1851 ; X64-AVX1-LABEL: test_reduce_v64i8:
   1852 ; X64-AVX1:       ## %bb.0:
   1853 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   1854 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
   1855 ; X64-AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
   1856 ; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1857 ; X64-AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
   1858 ; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
   1859 ; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1860 ; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
   1861 ; X64-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
   1862 ; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
   1863 ; X64-AVX1-NEXT:    vzeroupper
   1864 ; X64-AVX1-NEXT:    retq
   1865 ;
   1866 ; X64-AVX2-LABEL: test_reduce_v64i8:
   1867 ; X64-AVX2:       ## %bb.0:
   1868 ; X64-AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
   1869 ; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1870 ; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1871 ; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
   1872 ; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1873 ; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
   1874 ; X64-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
   1875 ; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
   1876 ; X64-AVX2-NEXT:    vzeroupper
   1877 ; X64-AVX2-NEXT:    retq
   1878 ;
   1879 ; X64-AVX512-LABEL: test_reduce_v64i8:
   1880 ; X64-AVX512:       ## %bb.0:
   1881 ; X64-AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
   1882 ; X64-AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm0
   1883 ; X64-AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1884 ; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1885 ; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
   1886 ; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
   1887 ; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
   1888 ; X64-AVX512-NEXT:    vpextrb $0, %xmm0, %eax
   1889 ; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
   1890 ; X64-AVX512-NEXT:    vzeroupper
   1891 ; X64-AVX512-NEXT:    retq
   1892   %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1893   %2  = icmp ult <64 x i8> %a0, %1
   1894   %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
   1895   %4  = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1896   %5  = icmp ult <64 x i8> %3, %4
   1897   %6  = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
   1898   %7  = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1899   %8  = icmp ult <64 x i8> %6, %7
   1900   %9  = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
   1901   %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1902   %11 = icmp ult <64 x i8> %9, %10
   1903   %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
   1904   %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1905   %14 = icmp ult <64 x i8> %12, %13
   1906   %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
   1907   %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   1908   %17 = icmp ult <64 x i8> %15, %16
   1909   %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
   1910   %19 = extractelement <64 x i8> %18, i32 0
   1911   ret i8 %19
   1912 }
   1913