Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
      8 
      9 ;
     10 ; vXi64
     11 ;
     12 
     13 define i64 @test_v2i64(<2 x i64> %a0) {
     14 ; SSE-LABEL: test_v2i64:
     15 ; SSE:       # %bb.0:
     16 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     17 ; SSE-NEXT:    pand %xmm0, %xmm1
     18 ; SSE-NEXT:    movq %xmm1, %rax
     19 ; SSE-NEXT:    retq
     20 ;
     21 ; AVX-LABEL: test_v2i64:
     22 ; AVX:       # %bb.0:
     23 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     24 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
     25 ; AVX-NEXT:    vmovq %xmm0, %rax
     26 ; AVX-NEXT:    retq
     27 ;
     28 ; AVX512-LABEL: test_v2i64:
     29 ; AVX512:       # %bb.0:
     30 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     31 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
     32 ; AVX512-NEXT:    vmovq %xmm0, %rax
     33 ; AVX512-NEXT:    retq
     34   %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %a0)
     35   ret i64 %1
     36 }
     37 
     38 define i64 @test_v4i64(<4 x i64> %a0) {
     39 ; SSE-LABEL: test_v4i64:
     40 ; SSE:       # %bb.0:
     41 ; SSE-NEXT:    pand %xmm1, %xmm0
     42 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     43 ; SSE-NEXT:    pand %xmm0, %xmm1
     44 ; SSE-NEXT:    movq %xmm1, %rax
     45 ; SSE-NEXT:    retq
     46 ;
     47 ; AVX1-LABEL: test_v4i64:
     48 ; AVX1:       # %bb.0:
     49 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
     50 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
     51 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
     52 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
     53 ; AVX1-NEXT:    vmovq %xmm0, %rax
     54 ; AVX1-NEXT:    vzeroupper
     55 ; AVX1-NEXT:    retq
     56 ;
     57 ; AVX2-LABEL: test_v4i64:
     58 ; AVX2:       # %bb.0:
     59 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
     60 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
     61 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     62 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
     63 ; AVX2-NEXT:    vmovq %xmm0, %rax
     64 ; AVX2-NEXT:    vzeroupper
     65 ; AVX2-NEXT:    retq
     66 ;
     67 ; AVX512-LABEL: test_v4i64:
     68 ; AVX512:       # %bb.0:
     69 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
     70 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
     71 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
     72 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
     73 ; AVX512-NEXT:    vmovq %xmm0, %rax
     74 ; AVX512-NEXT:    vzeroupper
     75 ; AVX512-NEXT:    retq
     76   %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> %a0)
     77   ret i64 %1
     78 }
     79 
     80 define i64 @test_v8i64(<8 x i64> %a0) {
     81 ; SSE-LABEL: test_v8i64:
     82 ; SSE:       # %bb.0:
     83 ; SSE-NEXT:    pand %xmm3, %xmm1
     84 ; SSE-NEXT:    pand %xmm2, %xmm1
     85 ; SSE-NEXT:    pand %xmm0, %xmm1
     86 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
     87 ; SSE-NEXT:    pand %xmm1, %xmm0
     88 ; SSE-NEXT:    movq %xmm0, %rax
     89 ; SSE-NEXT:    retq
     90 ;
     91 ; AVX1-LABEL: test_v8i64:
     92 ; AVX1:       # %bb.0:
     93 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
     94 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
     95 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
     96 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
     97 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
     98 ; AVX1-NEXT:    vmovq %xmm0, %rax
     99 ; AVX1-NEXT:    vzeroupper
    100 ; AVX1-NEXT:    retq
    101 ;
    102 ; AVX2-LABEL: test_v8i64:
    103 ; AVX2:       # %bb.0:
    104 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    105 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    106 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    107 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    108 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    109 ; AVX2-NEXT:    vmovq %xmm0, %rax
    110 ; AVX2-NEXT:    vzeroupper
    111 ; AVX2-NEXT:    retq
    112 ;
    113 ; AVX512-LABEL: test_v8i64:
    114 ; AVX512:       # %bb.0:
    115 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    116 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    117 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    118 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    119 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    120 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    121 ; AVX512-NEXT:    vmovq %xmm0, %rax
    122 ; AVX512-NEXT:    vzeroupper
    123 ; AVX512-NEXT:    retq
    124   %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> %a0)
    125   ret i64 %1
    126 }
    127 
    128 define i64 @test_v16i64(<16 x i64> %a0) {
    129 ; SSE-LABEL: test_v16i64:
    130 ; SSE:       # %bb.0:
    131 ; SSE-NEXT:    pand %xmm6, %xmm2
    132 ; SSE-NEXT:    pand %xmm7, %xmm3
    133 ; SSE-NEXT:    pand %xmm5, %xmm3
    134 ; SSE-NEXT:    pand %xmm1, %xmm3
    135 ; SSE-NEXT:    pand %xmm4, %xmm2
    136 ; SSE-NEXT:    pand %xmm3, %xmm2
    137 ; SSE-NEXT:    pand %xmm0, %xmm2
    138 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
    139 ; SSE-NEXT:    pand %xmm2, %xmm0
    140 ; SSE-NEXT:    movq %xmm0, %rax
    141 ; SSE-NEXT:    retq
    142 ;
    143 ; AVX1-LABEL: test_v16i64:
    144 ; AVX1:       # %bb.0:
    145 ; AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
    146 ; AVX1-NEXT:    vandps %ymm1, %ymm2, %ymm1
    147 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    148 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    149 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    150 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    151 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    152 ; AVX1-NEXT:    vmovq %xmm0, %rax
    153 ; AVX1-NEXT:    vzeroupper
    154 ; AVX1-NEXT:    retq
    155 ;
    156 ; AVX2-LABEL: test_v16i64:
    157 ; AVX2:       # %bb.0:
    158 ; AVX2-NEXT:    vpand %ymm3, %ymm1, %ymm1
    159 ; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
    160 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    161 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    162 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    163 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    164 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    165 ; AVX2-NEXT:    vmovq %xmm0, %rax
    166 ; AVX2-NEXT:    vzeroupper
    167 ; AVX2-NEXT:    retq
    168 ;
    169 ; AVX512-LABEL: test_v16i64:
    170 ; AVX512:       # %bb.0:
    171 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    172 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    173 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    174 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    175 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    176 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    177 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    178 ; AVX512-NEXT:    vmovq %xmm0, %rax
    179 ; AVX512-NEXT:    vzeroupper
    180 ; AVX512-NEXT:    retq
    181   %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> %a0)
    182   ret i64 %1
    183 }
    184 
    185 ;
    186 ; vXi32
    187 ;
    188 
    189 define i32 @test_v4i32(<4 x i32> %a0) {
    190 ; SSE-LABEL: test_v4i32:
    191 ; SSE:       # %bb.0:
    192 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    193 ; SSE-NEXT:    pand %xmm0, %xmm1
    194 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    195 ; SSE-NEXT:    pand %xmm1, %xmm0
    196 ; SSE-NEXT:    movd %xmm0, %eax
    197 ; SSE-NEXT:    retq
    198 ;
    199 ; AVX-LABEL: test_v4i32:
    200 ; AVX:       # %bb.0:
    201 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    202 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
    203 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    204 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
    205 ; AVX-NEXT:    vmovd %xmm0, %eax
    206 ; AVX-NEXT:    retq
    207 ;
    208 ; AVX512-LABEL: test_v4i32:
    209 ; AVX512:       # %bb.0:
    210 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    211 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
    212 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    213 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
    214 ; AVX512-NEXT:    vmovd %xmm0, %eax
    215 ; AVX512-NEXT:    retq
    216   %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> %a0)
    217   ret i32 %1
    218 }
    219 
    220 define i32 @test_v8i32(<8 x i32> %a0) {
    221 ; SSE-LABEL: test_v8i32:
    222 ; SSE:       # %bb.0:
    223 ; SSE-NEXT:    pand %xmm1, %xmm0
    224 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    225 ; SSE-NEXT:    pand %xmm0, %xmm1
    226 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    227 ; SSE-NEXT:    pand %xmm1, %xmm0
    228 ; SSE-NEXT:    movd %xmm0, %eax
    229 ; SSE-NEXT:    retq
    230 ;
    231 ; AVX1-LABEL: test_v8i32:
    232 ; AVX1:       # %bb.0:
    233 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    234 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    235 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    236 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    237 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
    238 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    239 ; AVX1-NEXT:    vmovd %xmm0, %eax
    240 ; AVX1-NEXT:    vzeroupper
    241 ; AVX1-NEXT:    retq
    242 ;
    243 ; AVX2-LABEL: test_v8i32:
    244 ; AVX2:       # %bb.0:
    245 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    246 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    247 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    248 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    249 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    250 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    251 ; AVX2-NEXT:    vmovd %xmm0, %eax
    252 ; AVX2-NEXT:    vzeroupper
    253 ; AVX2-NEXT:    retq
    254 ;
    255 ; AVX512-LABEL: test_v8i32:
    256 ; AVX512:       # %bb.0:
    257 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    258 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    259 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    260 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    261 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    262 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    263 ; AVX512-NEXT:    vmovd %xmm0, %eax
    264 ; AVX512-NEXT:    vzeroupper
    265 ; AVX512-NEXT:    retq
    266   %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> %a0)
    267   ret i32 %1
    268 }
    269 
    270 define i32 @test_v16i32(<16 x i32> %a0) {
    271 ; SSE-LABEL: test_v16i32:
    272 ; SSE:       # %bb.0:
    273 ; SSE-NEXT:    pand %xmm3, %xmm1
    274 ; SSE-NEXT:    pand %xmm2, %xmm1
    275 ; SSE-NEXT:    pand %xmm0, %xmm1
    276 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
    277 ; SSE-NEXT:    pand %xmm1, %xmm0
    278 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    279 ; SSE-NEXT:    pand %xmm0, %xmm1
    280 ; SSE-NEXT:    movd %xmm1, %eax
    281 ; SSE-NEXT:    retq
    282 ;
    283 ; AVX1-LABEL: test_v16i32:
    284 ; AVX1:       # %bb.0:
    285 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    286 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    287 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    288 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    289 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    290 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
    291 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    292 ; AVX1-NEXT:    vmovd %xmm0, %eax
    293 ; AVX1-NEXT:    vzeroupper
    294 ; AVX1-NEXT:    retq
    295 ;
    296 ; AVX2-LABEL: test_v16i32:
    297 ; AVX2:       # %bb.0:
    298 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    299 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    300 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    301 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    302 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    303 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    304 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    305 ; AVX2-NEXT:    vmovd %xmm0, %eax
    306 ; AVX2-NEXT:    vzeroupper
    307 ; AVX2-NEXT:    retq
    308 ;
    309 ; AVX512-LABEL: test_v16i32:
    310 ; AVX512:       # %bb.0:
    311 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    312 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    313 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    314 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    315 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    316 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    317 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    318 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    319 ; AVX512-NEXT:    vmovd %xmm0, %eax
    320 ; AVX512-NEXT:    vzeroupper
    321 ; AVX512-NEXT:    retq
    322   %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> %a0)
    323   ret i32 %1
    324 }
    325 
    326 define i32 @test_v32i32(<32 x i32> %a0) {
    327 ; SSE-LABEL: test_v32i32:
    328 ; SSE:       # %bb.0:
    329 ; SSE-NEXT:    pand %xmm6, %xmm2
    330 ; SSE-NEXT:    pand %xmm7, %xmm3
    331 ; SSE-NEXT:    pand %xmm5, %xmm3
    332 ; SSE-NEXT:    pand %xmm1, %xmm3
    333 ; SSE-NEXT:    pand %xmm4, %xmm2
    334 ; SSE-NEXT:    pand %xmm3, %xmm2
    335 ; SSE-NEXT:    pand %xmm0, %xmm2
    336 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
    337 ; SSE-NEXT:    pand %xmm2, %xmm0
    338 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    339 ; SSE-NEXT:    pand %xmm0, %xmm1
    340 ; SSE-NEXT:    movd %xmm1, %eax
    341 ; SSE-NEXT:    retq
    342 ;
    343 ; AVX1-LABEL: test_v32i32:
    344 ; AVX1:       # %bb.0:
    345 ; AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
    346 ; AVX1-NEXT:    vandps %ymm1, %ymm2, %ymm1
    347 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    348 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    349 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    350 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    351 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    352 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
    353 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    354 ; AVX1-NEXT:    vmovd %xmm0, %eax
    355 ; AVX1-NEXT:    vzeroupper
    356 ; AVX1-NEXT:    retq
    357 ;
    358 ; AVX2-LABEL: test_v32i32:
    359 ; AVX2:       # %bb.0:
    360 ; AVX2-NEXT:    vpand %ymm3, %ymm1, %ymm1
    361 ; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
    362 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    363 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    364 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    365 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    366 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    367 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    368 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    369 ; AVX2-NEXT:    vmovd %xmm0, %eax
    370 ; AVX2-NEXT:    vzeroupper
    371 ; AVX2-NEXT:    retq
    372 ;
    373 ; AVX512-LABEL: test_v32i32:
    374 ; AVX512:       # %bb.0:
    375 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    376 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    377 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    378 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    379 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    380 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    381 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    382 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    383 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    384 ; AVX512-NEXT:    vmovd %xmm0, %eax
    385 ; AVX512-NEXT:    vzeroupper
    386 ; AVX512-NEXT:    retq
    387   %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> %a0)
    388   ret i32 %1
    389 }
    390 
    391 ;
    392 ; vXi16
    393 ;
    394 
    395 define i16 @test_v8i16(<8 x i16> %a0) {
    396 ; SSE-LABEL: test_v8i16:
    397 ; SSE:       # %bb.0:
    398 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    399 ; SSE-NEXT:    pand %xmm0, %xmm1
    400 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    401 ; SSE-NEXT:    pand %xmm1, %xmm0
    402 ; SSE-NEXT:    movdqa %xmm0, %xmm1
    403 ; SSE-NEXT:    psrld $16, %xmm1
    404 ; SSE-NEXT:    pand %xmm0, %xmm1
    405 ; SSE-NEXT:    movd %xmm1, %eax
    406 ; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
    407 ; SSE-NEXT:    retq
    408 ;
    409 ; AVX-LABEL: test_v8i16:
    410 ; AVX:       # %bb.0:
    411 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    412 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
    413 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    414 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
    415 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
    416 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
    417 ; AVX-NEXT:    vmovd %xmm0, %eax
    418 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
    419 ; AVX-NEXT:    retq
    420 ;
    421 ; AVX512-LABEL: test_v8i16:
    422 ; AVX512:       # %bb.0:
    423 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    424 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
    425 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    426 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
    427 ; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
    428 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
    429 ; AVX512-NEXT:    vmovd %xmm0, %eax
    430 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
    431 ; AVX512-NEXT:    retq
    432   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> %a0)
    433   ret i16 %1
    434 }
    435 
    436 define i16 @test_v16i16(<16 x i16> %a0) {
    437 ; SSE-LABEL: test_v16i16:
    438 ; SSE:       # %bb.0:
    439 ; SSE-NEXT:    pand %xmm1, %xmm0
    440 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    441 ; SSE-NEXT:    pand %xmm0, %xmm1
    442 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    443 ; SSE-NEXT:    pand %xmm1, %xmm0
    444 ; SSE-NEXT:    movdqa %xmm0, %xmm1
    445 ; SSE-NEXT:    psrld $16, %xmm1
    446 ; SSE-NEXT:    pand %xmm0, %xmm1
    447 ; SSE-NEXT:    movd %xmm1, %eax
    448 ; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
    449 ; SSE-NEXT:    retq
    450 ;
    451 ; AVX1-LABEL: test_v16i16:
    452 ; AVX1:       # %bb.0:
    453 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    454 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    455 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    456 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    457 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
    458 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    459 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
    460 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    461 ; AVX1-NEXT:    vmovd %xmm0, %eax
    462 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
    463 ; AVX1-NEXT:    vzeroupper
    464 ; AVX1-NEXT:    retq
    465 ;
    466 ; AVX2-LABEL: test_v16i16:
    467 ; AVX2:       # %bb.0:
    468 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    469 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    470 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    471 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    472 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    473 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    474 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
    475 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    476 ; AVX2-NEXT:    vmovd %xmm0, %eax
    477 ; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
    478 ; AVX2-NEXT:    vzeroupper
    479 ; AVX2-NEXT:    retq
    480 ;
    481 ; AVX512-LABEL: test_v16i16:
    482 ; AVX512:       # %bb.0:
    483 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    484 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    485 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    486 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    487 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    488 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    489 ; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
    490 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    491 ; AVX512-NEXT:    vmovd %xmm0, %eax
    492 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
    493 ; AVX512-NEXT:    vzeroupper
    494 ; AVX512-NEXT:    retq
    495   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> %a0)
    496   ret i16 %1
    497 }
    498 
    499 define i16 @test_v32i16(<32 x i16> %a0) {
    500 ; SSE-LABEL: test_v32i16:
    501 ; SSE:       # %bb.0:
    502 ; SSE-NEXT:    pand %xmm3, %xmm1
    503 ; SSE-NEXT:    pand %xmm2, %xmm1
    504 ; SSE-NEXT:    pand %xmm0, %xmm1
    505 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
    506 ; SSE-NEXT:    pand %xmm1, %xmm0
    507 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    508 ; SSE-NEXT:    pand %xmm0, %xmm1
    509 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    510 ; SSE-NEXT:    psrld $16, %xmm0
    511 ; SSE-NEXT:    pand %xmm1, %xmm0
    512 ; SSE-NEXT:    movd %xmm0, %eax
    513 ; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
    514 ; SSE-NEXT:    retq
    515 ;
    516 ; AVX1-LABEL: test_v32i16:
    517 ; AVX1:       # %bb.0:
    518 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    519 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    520 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    521 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    522 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    523 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
    524 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    525 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
    526 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    527 ; AVX1-NEXT:    vmovd %xmm0, %eax
    528 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
    529 ; AVX1-NEXT:    vzeroupper
    530 ; AVX1-NEXT:    retq
    531 ;
    532 ; AVX2-LABEL: test_v32i16:
    533 ; AVX2:       # %bb.0:
    534 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    535 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    536 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    537 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    538 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    539 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    540 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    541 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
    542 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    543 ; AVX2-NEXT:    vmovd %xmm0, %eax
    544 ; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
    545 ; AVX2-NEXT:    vzeroupper
    546 ; AVX2-NEXT:    retq
    547 ;
    548 ; AVX512-LABEL: test_v32i16:
    549 ; AVX512:       # %bb.0:
    550 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    551 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    552 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    553 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    554 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    555 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    556 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    557 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    558 ; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
    559 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    560 ; AVX512-NEXT:    vmovd %xmm0, %eax
    561 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
    562 ; AVX512-NEXT:    vzeroupper
    563 ; AVX512-NEXT:    retq
    564   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> %a0)
    565   ret i16 %1
    566 }
    567 
    568 define i16 @test_v64i16(<64 x i16> %a0) {
    569 ; SSE-LABEL: test_v64i16:
    570 ; SSE:       # %bb.0:
    571 ; SSE-NEXT:    pand %xmm6, %xmm2
    572 ; SSE-NEXT:    pand %xmm7, %xmm3
    573 ; SSE-NEXT:    pand %xmm5, %xmm3
    574 ; SSE-NEXT:    pand %xmm1, %xmm3
    575 ; SSE-NEXT:    pand %xmm4, %xmm2
    576 ; SSE-NEXT:    pand %xmm3, %xmm2
    577 ; SSE-NEXT:    pand %xmm0, %xmm2
    578 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
    579 ; SSE-NEXT:    pand %xmm2, %xmm0
    580 ; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    581 ; SSE-NEXT:    pand %xmm0, %xmm1
    582 ; SSE-NEXT:    movdqa %xmm1, %xmm0
    583 ; SSE-NEXT:    psrld $16, %xmm0
    584 ; SSE-NEXT:    pand %xmm1, %xmm0
    585 ; SSE-NEXT:    movd %xmm0, %eax
    586 ; SSE-NEXT:    # kill: def $ax killed $ax killed $eax
    587 ; SSE-NEXT:    retq
    588 ;
    589 ; AVX1-LABEL: test_v64i16:
    590 ; AVX1:       # %bb.0:
    591 ; AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
    592 ; AVX1-NEXT:    vandps %ymm1, %ymm2, %ymm1
    593 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    594 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    595 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    596 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    597 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    598 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
    599 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    600 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
    601 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    602 ; AVX1-NEXT:    vmovd %xmm0, %eax
    603 ; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
    604 ; AVX1-NEXT:    vzeroupper
    605 ; AVX1-NEXT:    retq
    606 ;
    607 ; AVX2-LABEL: test_v64i16:
    608 ; AVX2:       # %bb.0:
    609 ; AVX2-NEXT:    vpand %ymm3, %ymm1, %ymm1
    610 ; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
    611 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    612 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    613 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    614 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    615 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    616 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    617 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    618 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
    619 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    620 ; AVX2-NEXT:    vmovd %xmm0, %eax
    621 ; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
    622 ; AVX2-NEXT:    vzeroupper
    623 ; AVX2-NEXT:    retq
    624 ;
    625 ; AVX512-LABEL: test_v64i16:
    626 ; AVX512:       # %bb.0:
    627 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    628 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    629 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    630 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    631 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    632 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    633 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    634 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    635 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    636 ; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
    637 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    638 ; AVX512-NEXT:    vmovd %xmm0, %eax
    639 ; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
    640 ; AVX512-NEXT:    vzeroupper
    641 ; AVX512-NEXT:    retq
    642   %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> %a0)
    643   ret i16 %1
    644 }
    645 
    646 ;
    647 ; vXi8
    648 ;
    649 
    650 define i8 @test_v16i8(<16 x i8> %a0) {
    651 ; SSE2-LABEL: test_v16i8:
    652 ; SSE2:       # %bb.0:
    653 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    654 ; SSE2-NEXT:    pand %xmm0, %xmm1
    655 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    656 ; SSE2-NEXT:    pand %xmm1, %xmm0
    657 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    658 ; SSE2-NEXT:    psrld $16, %xmm1
    659 ; SSE2-NEXT:    pand %xmm0, %xmm1
    660 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    661 ; SSE2-NEXT:    psrlw $8, %xmm0
    662 ; SSE2-NEXT:    pand %xmm1, %xmm0
    663 ; SSE2-NEXT:    movd %xmm0, %eax
    664 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
    665 ; SSE2-NEXT:    retq
    666 ;
    667 ; SSE41-LABEL: test_v16i8:
    668 ; SSE41:       # %bb.0:
    669 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    670 ; SSE41-NEXT:    pand %xmm0, %xmm1
    671 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    672 ; SSE41-NEXT:    pand %xmm1, %xmm0
    673 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    674 ; SSE41-NEXT:    psrld $16, %xmm1
    675 ; SSE41-NEXT:    pand %xmm0, %xmm1
    676 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    677 ; SSE41-NEXT:    psrlw $8, %xmm0
    678 ; SSE41-NEXT:    pand %xmm1, %xmm0
    679 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
    680 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
    681 ; SSE41-NEXT:    retq
    682 ;
    683 ; AVX-LABEL: test_v16i8:
    684 ; AVX:       # %bb.0:
    685 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    686 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
    687 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    688 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
    689 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
    690 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
    691 ; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
    692 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
    693 ; AVX-NEXT:    vpextrb $0, %xmm0, %eax
    694 ; AVX-NEXT:    # kill: def $al killed $al killed $eax
    695 ; AVX-NEXT:    retq
    696 ;
    697 ; AVX512-LABEL: test_v16i8:
    698 ; AVX512:       # %bb.0:
    699 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    700 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
    701 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    702 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
    703 ; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
    704 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
    705 ; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
    706 ; AVX512-NEXT:    vpand %xmm1, %xmm0, %xmm0
    707 ; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
    708 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
    709 ; AVX512-NEXT:    retq
    710   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> %a0)
    711   ret i8 %1
    712 }
    713 
    714 define i8 @test_v32i8(<32 x i8> %a0) {
    715 ; SSE2-LABEL: test_v32i8:
    716 ; SSE2:       # %bb.0:
    717 ; SSE2-NEXT:    pand %xmm1, %xmm0
    718 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    719 ; SSE2-NEXT:    pand %xmm0, %xmm1
    720 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    721 ; SSE2-NEXT:    pand %xmm1, %xmm0
    722 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    723 ; SSE2-NEXT:    psrld $16, %xmm1
    724 ; SSE2-NEXT:    pand %xmm0, %xmm1
    725 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    726 ; SSE2-NEXT:    psrlw $8, %xmm0
    727 ; SSE2-NEXT:    pand %xmm1, %xmm0
    728 ; SSE2-NEXT:    movd %xmm0, %eax
    729 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
    730 ; SSE2-NEXT:    retq
    731 ;
    732 ; SSE41-LABEL: test_v32i8:
    733 ; SSE41:       # %bb.0:
    734 ; SSE41-NEXT:    pand %xmm1, %xmm0
    735 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    736 ; SSE41-NEXT:    pand %xmm0, %xmm1
    737 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
    738 ; SSE41-NEXT:    pand %xmm1, %xmm0
    739 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    740 ; SSE41-NEXT:    psrld $16, %xmm1
    741 ; SSE41-NEXT:    pand %xmm0, %xmm1
    742 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    743 ; SSE41-NEXT:    psrlw $8, %xmm0
    744 ; SSE41-NEXT:    pand %xmm1, %xmm0
    745 ; SSE41-NEXT:    pextrb $0, %xmm0, %eax
    746 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
    747 ; SSE41-NEXT:    retq
    748 ;
    749 ; AVX1-LABEL: test_v32i8:
    750 ; AVX1:       # %bb.0:
    751 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    752 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    753 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    754 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    755 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
    756 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    757 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
    758 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    759 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
    760 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    761 ; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
    762 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
    763 ; AVX1-NEXT:    vzeroupper
    764 ; AVX1-NEXT:    retq
    765 ;
    766 ; AVX2-LABEL: test_v32i8:
    767 ; AVX2:       # %bb.0:
    768 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    769 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    770 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    771 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    772 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    773 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    774 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
    775 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    776 ; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
    777 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    778 ; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
    779 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
    780 ; AVX2-NEXT:    vzeroupper
    781 ; AVX2-NEXT:    retq
    782 ;
    783 ; AVX512-LABEL: test_v32i8:
    784 ; AVX512:       # %bb.0:
    785 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    786 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    787 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    788 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    789 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    790 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    791 ; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
    792 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    793 ; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
    794 ; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
    795 ; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
    796 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
    797 ; AVX512-NEXT:    vzeroupper
    798 ; AVX512-NEXT:    retq
    799   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> %a0)
    800   ret i8 %1
    801 }
    802 
    803 define i8 @test_v64i8(<64 x i8> %a0) {
    804 ; SSE2-LABEL: test_v64i8:
    805 ; SSE2:       # %bb.0:
    806 ; SSE2-NEXT:    pand %xmm3, %xmm1
    807 ; SSE2-NEXT:    pand %xmm2, %xmm1
    808 ; SSE2-NEXT:    pand %xmm0, %xmm1
    809 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
    810 ; SSE2-NEXT:    pand %xmm1, %xmm0
    811 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    812 ; SSE2-NEXT:    pand %xmm0, %xmm1
    813 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    814 ; SSE2-NEXT:    psrld $16, %xmm0
    815 ; SSE2-NEXT:    pand %xmm1, %xmm0
    816 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    817 ; SSE2-NEXT:    psrlw $8, %xmm1
    818 ; SSE2-NEXT:    pand %xmm0, %xmm1
    819 ; SSE2-NEXT:    movd %xmm1, %eax
    820 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
    821 ; SSE2-NEXT:    retq
    822 ;
    823 ; SSE41-LABEL: test_v64i8:
    824 ; SSE41:       # %bb.0:
    825 ; SSE41-NEXT:    pand %xmm3, %xmm1
    826 ; SSE41-NEXT:    pand %xmm2, %xmm1
    827 ; SSE41-NEXT:    pand %xmm0, %xmm1
    828 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
    829 ; SSE41-NEXT:    pand %xmm1, %xmm0
    830 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    831 ; SSE41-NEXT:    pand %xmm0, %xmm1
    832 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    833 ; SSE41-NEXT:    psrld $16, %xmm0
    834 ; SSE41-NEXT:    pand %xmm1, %xmm0
    835 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    836 ; SSE41-NEXT:    psrlw $8, %xmm1
    837 ; SSE41-NEXT:    pand %xmm0, %xmm1
    838 ; SSE41-NEXT:    pextrb $0, %xmm1, %eax
    839 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
    840 ; SSE41-NEXT:    retq
    841 ;
    842 ; AVX1-LABEL: test_v64i8:
    843 ; AVX1:       # %bb.0:
    844 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    845 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    846 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    847 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    848 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    849 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
    850 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    851 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
    852 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    853 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
    854 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    855 ; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
    856 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
    857 ; AVX1-NEXT:    vzeroupper
    858 ; AVX1-NEXT:    retq
    859 ;
    860 ; AVX2-LABEL: test_v64i8:
    861 ; AVX2:       # %bb.0:
    862 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    863 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    864 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    865 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    866 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    867 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    868 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    869 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
    870 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    871 ; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
    872 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    873 ; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
    874 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
    875 ; AVX2-NEXT:    vzeroupper
    876 ; AVX2-NEXT:    retq
    877 ;
    878 ; AVX512-LABEL: test_v64i8:
    879 ; AVX512:       # %bb.0:
    880 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    881 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    882 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    883 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    884 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    885 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    886 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    887 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    888 ; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
    889 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    890 ; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
    891 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    892 ; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
    893 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
    894 ; AVX512-NEXT:    vzeroupper
    895 ; AVX512-NEXT:    retq
    896   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> %a0)
    897   ret i8 %1
    898 }
    899 
    900 define i8 @test_v128i8(<128 x i8> %a0) {
    901 ; SSE2-LABEL: test_v128i8:
    902 ; SSE2:       # %bb.0:
    903 ; SSE2-NEXT:    pand %xmm6, %xmm2
    904 ; SSE2-NEXT:    pand %xmm7, %xmm3
    905 ; SSE2-NEXT:    pand %xmm5, %xmm3
    906 ; SSE2-NEXT:    pand %xmm1, %xmm3
    907 ; SSE2-NEXT:    pand %xmm4, %xmm2
    908 ; SSE2-NEXT:    pand %xmm3, %xmm2
    909 ; SSE2-NEXT:    pand %xmm0, %xmm2
    910 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
    911 ; SSE2-NEXT:    pand %xmm2, %xmm0
    912 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    913 ; SSE2-NEXT:    pand %xmm0, %xmm1
    914 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
    915 ; SSE2-NEXT:    psrld $16, %xmm0
    916 ; SSE2-NEXT:    pand %xmm1, %xmm0
    917 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
    918 ; SSE2-NEXT:    psrlw $8, %xmm1
    919 ; SSE2-NEXT:    pand %xmm0, %xmm1
    920 ; SSE2-NEXT:    movd %xmm1, %eax
    921 ; SSE2-NEXT:    # kill: def $al killed $al killed $eax
    922 ; SSE2-NEXT:    retq
    923 ;
    924 ; SSE41-LABEL: test_v128i8:
    925 ; SSE41:       # %bb.0:
    926 ; SSE41-NEXT:    pand %xmm6, %xmm2
    927 ; SSE41-NEXT:    pand %xmm7, %xmm3
    928 ; SSE41-NEXT:    pand %xmm5, %xmm3
    929 ; SSE41-NEXT:    pand %xmm1, %xmm3
    930 ; SSE41-NEXT:    pand %xmm4, %xmm2
    931 ; SSE41-NEXT:    pand %xmm3, %xmm2
    932 ; SSE41-NEXT:    pand %xmm0, %xmm2
    933 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
    934 ; SSE41-NEXT:    pand %xmm2, %xmm0
    935 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    936 ; SSE41-NEXT:    pand %xmm0, %xmm1
    937 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
    938 ; SSE41-NEXT:    psrld $16, %xmm0
    939 ; SSE41-NEXT:    pand %xmm1, %xmm0
    940 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
    941 ; SSE41-NEXT:    psrlw $8, %xmm1
    942 ; SSE41-NEXT:    pand %xmm0, %xmm1
    943 ; SSE41-NEXT:    pextrb $0, %xmm1, %eax
    944 ; SSE41-NEXT:    # kill: def $al killed $al killed $eax
    945 ; SSE41-NEXT:    retq
    946 ;
    947 ; AVX1-LABEL: test_v128i8:
    948 ; AVX1:       # %bb.0:
    949 ; AVX1-NEXT:    vandps %ymm3, %ymm1, %ymm1
    950 ; AVX1-NEXT:    vandps %ymm1, %ymm2, %ymm1
    951 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    952 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    953 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    954 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
    955 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    956 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
    957 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    958 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm1
    959 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    960 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
    961 ; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
    962 ; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
    963 ; AVX1-NEXT:    # kill: def $al killed $al killed $eax
    964 ; AVX1-NEXT:    vzeroupper
    965 ; AVX1-NEXT:    retq
    966 ;
    967 ; AVX2-LABEL: test_v128i8:
    968 ; AVX2:       # %bb.0:
    969 ; AVX2-NEXT:    vpand %ymm3, %ymm1, %ymm1
    970 ; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
    971 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    972 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
    973 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    974 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    975 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    976 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    977 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    978 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
    979 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    980 ; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
    981 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
    982 ; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
    983 ; AVX2-NEXT:    # kill: def $al killed $al killed $eax
    984 ; AVX2-NEXT:    vzeroupper
    985 ; AVX2-NEXT:    retq
    986 ;
    987 ; AVX512-LABEL: test_v128i8:
    988 ; AVX512:       # %bb.0:
    989 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    990 ; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    991 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    992 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
    993 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    994 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
    995 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    996 ; AVX512-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
    997 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    998 ; AVX512-NEXT:    vpsrld $16, %xmm0, %xmm1
    999 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
   1000 ; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
   1001 ; AVX512-NEXT:    vpandq %zmm1, %zmm0, %zmm0
   1002 ; AVX512-NEXT:    vpextrb $0, %xmm0, %eax
   1003 ; AVX512-NEXT:    # kill: def $al killed $al killed $eax
   1004 ; AVX512-NEXT:    vzeroupper
   1005 ; AVX512-NEXT:    retq
   1006   %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> %a0)
   1007   ret i8 %1
   1008 }
   1009 
   1010 declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
   1011 declare i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>)
   1012 declare i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>)
   1013 declare i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>)
   1014 
   1015 declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>)
   1016 declare i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>)
   1017 declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>)
   1018 declare i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>)
   1019 
   1020 declare i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>)
   1021 declare i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>)
   1022 declare i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>)
   1023 declare i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>)
   1024 
   1025 declare i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>)
   1026 declare i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>)
   1027 declare i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>)
   1028 declare i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>)
   1029