1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL 8 9 ; 10 ; vXi64 11 ; 12 13 define i64 @test_v2i64(<2 x i64> %a0) { 14 ; SSE-LABEL: test_v2i64: 15 ; SSE: # %bb.0: 16 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 17 ; SSE-NEXT: pand %xmm0, %xmm1 18 ; SSE-NEXT: movq %xmm1, %rax 19 ; SSE-NEXT: retq 20 ; 21 ; AVX-LABEL: test_v2i64: 22 ; AVX: # %bb.0: 23 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 24 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 25 ; AVX-NEXT: vmovq %xmm0, %rax 26 ; AVX-NEXT: retq 27 ; 28 ; AVX512-LABEL: test_v2i64: 29 ; AVX512: # %bb.0: 30 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 31 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 32 ; AVX512-NEXT: vmovq %xmm0, %rax 33 ; AVX512-NEXT: retq 34 %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %a0) 35 ret i64 %1 36 } 37 38 define i64 @test_v4i64(<4 x i64> %a0) { 39 ; SSE-LABEL: test_v4i64: 40 ; SSE: # %bb.0: 41 ; SSE-NEXT: pand %xmm1, %xmm0 42 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 43 ; SSE-NEXT: pand %xmm0, %xmm1 44 ; SSE-NEXT: movq %xmm1, %rax 45 ; SSE-NEXT: retq 46 ; 47 ; AVX1-LABEL: test_v4i64: 48 ; AVX1: # %bb.0: 49 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 50 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 51 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 52 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 53 ; AVX1-NEXT: vmovq %xmm0, %rax 54 ; AVX1-NEXT: vzeroupper 55 ; AVX1-NEXT: retq 56 ; 57 ; AVX2-LABEL: test_v4i64: 58 ; AVX2: # %bb.0: 59 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 60 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 61 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 62 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 63 ; AVX2-NEXT: vmovq %xmm0, %rax 64 ; AVX2-NEXT: vzeroupper 65 ; AVX2-NEXT: retq 66 ; 67 ; AVX512-LABEL: test_v4i64: 68 ; AVX512: # %bb.0: 69 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 70 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 71 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 72 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 73 ; AVX512-NEXT: vmovq %xmm0, %rax 74 ; AVX512-NEXT: vzeroupper 75 ; AVX512-NEXT: retq 76 %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64> %a0) 77 ret i64 %1 78 } 79 80 define i64 @test_v8i64(<8 x i64> %a0) { 81 ; SSE-LABEL: test_v8i64: 82 ; SSE: # %bb.0: 83 ; SSE-NEXT: pand %xmm3, %xmm1 84 ; SSE-NEXT: pand %xmm2, %xmm1 85 ; SSE-NEXT: pand %xmm0, %xmm1 86 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 87 ; SSE-NEXT: pand %xmm1, %xmm0 88 ; SSE-NEXT: movq %xmm0, %rax 89 ; SSE-NEXT: retq 90 ; 91 ; AVX1-LABEL: test_v8i64: 92 ; AVX1: # %bb.0: 93 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 94 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 95 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 96 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 97 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 98 ; AVX1-NEXT: vmovq %xmm0, %rax 99 ; AVX1-NEXT: vzeroupper 100 ; AVX1-NEXT: retq 101 ; 102 ; AVX2-LABEL: test_v8i64: 103 ; AVX2: # %bb.0: 104 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 105 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 106 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 107 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 108 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 109 ; AVX2-NEXT: vmovq %xmm0, %rax 110 ; AVX2-NEXT: vzeroupper 111 ; AVX2-NEXT: retq 112 ; 113 ; AVX512-LABEL: test_v8i64: 114 ; AVX512: # %bb.0: 115 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 116 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 117 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 118 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 119 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 120 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 121 ; AVX512-NEXT: vmovq %xmm0, %rax 122 ; AVX512-NEXT: vzeroupper 123 ; AVX512-NEXT: retq 124 %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64> %a0) 125 ret i64 %1 126 } 127 128 define i64 @test_v16i64(<16 x i64> %a0) { 129 ; SSE-LABEL: test_v16i64: 130 ; SSE: # %bb.0: 131 ; SSE-NEXT: pand %xmm6, %xmm2 132 ; SSE-NEXT: pand %xmm7, %xmm3 133 ; SSE-NEXT: pand %xmm5, %xmm3 134 ; SSE-NEXT: pand %xmm1, %xmm3 135 ; SSE-NEXT: pand %xmm4, %xmm2 136 ; SSE-NEXT: pand %xmm3, %xmm2 137 ; SSE-NEXT: pand %xmm0, %xmm2 138 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 139 ; SSE-NEXT: pand %xmm2, %xmm0 140 ; SSE-NEXT: movq %xmm0, %rax 141 ; SSE-NEXT: retq 142 ; 143 ; AVX1-LABEL: test_v16i64: 144 ; AVX1: # %bb.0: 145 ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 146 ; AVX1-NEXT: vandps %ymm1, %ymm2, %ymm1 147 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 148 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 149 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 150 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 151 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 152 ; AVX1-NEXT: vmovq %xmm0, %rax 153 ; AVX1-NEXT: vzeroupper 154 ; AVX1-NEXT: retq 155 ; 156 ; AVX2-LABEL: test_v16i64: 157 ; AVX2: # %bb.0: 158 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1 159 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 160 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 161 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 162 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 163 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 164 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 165 ; AVX2-NEXT: vmovq %xmm0, %rax 166 ; AVX2-NEXT: vzeroupper 167 ; AVX2-NEXT: retq 168 ; 169 ; AVX512-LABEL: test_v16i64: 170 ; AVX512: # %bb.0: 171 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 172 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 173 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 174 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 175 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 176 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 177 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 178 ; AVX512-NEXT: vmovq %xmm0, %rax 179 ; AVX512-NEXT: vzeroupper 180 ; AVX512-NEXT: retq 181 %1 = call i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64> %a0) 182 ret i64 %1 183 } 184 185 ; 186 ; vXi32 187 ; 188 189 define i32 @test_v4i32(<4 x i32> %a0) { 190 ; SSE-LABEL: test_v4i32: 191 ; SSE: # %bb.0: 192 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 193 ; SSE-NEXT: pand %xmm0, %xmm1 194 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 195 ; SSE-NEXT: pand %xmm1, %xmm0 196 ; SSE-NEXT: movd %xmm0, %eax 197 ; SSE-NEXT: retq 198 ; 199 ; AVX-LABEL: test_v4i32: 200 ; AVX: # %bb.0: 201 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 202 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 203 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 204 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 205 ; AVX-NEXT: vmovd %xmm0, %eax 206 ; AVX-NEXT: retq 207 ; 208 ; AVX512-LABEL: test_v4i32: 209 ; AVX512: # %bb.0: 210 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 211 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 212 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 213 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 214 ; AVX512-NEXT: vmovd %xmm0, %eax 215 ; AVX512-NEXT: retq 216 %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> %a0) 217 ret i32 %1 218 } 219 220 define i32 @test_v8i32(<8 x i32> %a0) { 221 ; SSE-LABEL: test_v8i32: 222 ; SSE: # %bb.0: 223 ; SSE-NEXT: pand %xmm1, %xmm0 224 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 225 ; SSE-NEXT: pand %xmm0, %xmm1 226 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 227 ; SSE-NEXT: pand %xmm1, %xmm0 228 ; SSE-NEXT: movd %xmm0, %eax 229 ; SSE-NEXT: retq 230 ; 231 ; AVX1-LABEL: test_v8i32: 232 ; AVX1: # %bb.0: 233 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 234 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 235 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 236 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 237 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] 238 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 239 ; AVX1-NEXT: vmovd %xmm0, %eax 240 ; AVX1-NEXT: vzeroupper 241 ; AVX1-NEXT: retq 242 ; 243 ; AVX2-LABEL: test_v8i32: 244 ; AVX2: # %bb.0: 245 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 246 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 247 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 248 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 249 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 250 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 251 ; AVX2-NEXT: vmovd %xmm0, %eax 252 ; AVX2-NEXT: vzeroupper 253 ; AVX2-NEXT: retq 254 ; 255 ; AVX512-LABEL: test_v8i32: 256 ; AVX512: # %bb.0: 257 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 258 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 259 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 260 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 261 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 262 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 263 ; AVX512-NEXT: vmovd %xmm0, %eax 264 ; AVX512-NEXT: vzeroupper 265 ; AVX512-NEXT: retq 266 %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32> %a0) 267 ret i32 %1 268 } 269 270 define i32 @test_v16i32(<16 x i32> %a0) { 271 ; SSE-LABEL: test_v16i32: 272 ; SSE: # %bb.0: 273 ; SSE-NEXT: pand %xmm3, %xmm1 274 ; SSE-NEXT: pand %xmm2, %xmm1 275 ; SSE-NEXT: pand %xmm0, %xmm1 276 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 277 ; SSE-NEXT: pand %xmm1, %xmm0 278 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 279 ; SSE-NEXT: pand %xmm0, %xmm1 280 ; SSE-NEXT: movd %xmm1, %eax 281 ; SSE-NEXT: retq 282 ; 283 ; AVX1-LABEL: test_v16i32: 284 ; AVX1: # %bb.0: 285 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 286 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 287 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 288 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 289 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 290 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] 291 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 292 ; AVX1-NEXT: vmovd %xmm0, %eax 293 ; AVX1-NEXT: vzeroupper 294 ; AVX1-NEXT: retq 295 ; 296 ; AVX2-LABEL: test_v16i32: 297 ; AVX2: # %bb.0: 298 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 299 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 300 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 301 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 302 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 303 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 304 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 305 ; AVX2-NEXT: vmovd %xmm0, %eax 306 ; AVX2-NEXT: vzeroupper 307 ; AVX2-NEXT: retq 308 ; 309 ; AVX512-LABEL: test_v16i32: 310 ; AVX512: # %bb.0: 311 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 312 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 313 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 314 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 315 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 316 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 317 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 318 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 319 ; AVX512-NEXT: vmovd %xmm0, %eax 320 ; AVX512-NEXT: vzeroupper 321 ; AVX512-NEXT: retq 322 %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32> %a0) 323 ret i32 %1 324 } 325 326 define i32 @test_v32i32(<32 x i32> %a0) { 327 ; SSE-LABEL: test_v32i32: 328 ; SSE: # %bb.0: 329 ; SSE-NEXT: pand %xmm6, %xmm2 330 ; SSE-NEXT: pand %xmm7, %xmm3 331 ; SSE-NEXT: pand %xmm5, %xmm3 332 ; SSE-NEXT: pand %xmm1, %xmm3 333 ; SSE-NEXT: pand %xmm4, %xmm2 334 ; SSE-NEXT: pand %xmm3, %xmm2 335 ; SSE-NEXT: pand %xmm0, %xmm2 336 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 337 ; SSE-NEXT: pand %xmm2, %xmm0 338 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 339 ; SSE-NEXT: pand %xmm0, %xmm1 340 ; SSE-NEXT: movd %xmm1, %eax 341 ; SSE-NEXT: retq 342 ; 343 ; AVX1-LABEL: test_v32i32: 344 ; AVX1: # %bb.0: 345 ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 346 ; AVX1-NEXT: vandps %ymm1, %ymm2, %ymm1 347 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 348 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 349 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 350 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 351 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 352 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] 353 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 354 ; AVX1-NEXT: vmovd %xmm0, %eax 355 ; AVX1-NEXT: vzeroupper 356 ; AVX1-NEXT: retq 357 ; 358 ; AVX2-LABEL: test_v32i32: 359 ; AVX2: # %bb.0: 360 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1 361 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 362 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 363 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 364 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 365 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 366 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 367 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 368 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 369 ; AVX2-NEXT: vmovd %xmm0, %eax 370 ; AVX2-NEXT: vzeroupper 371 ; AVX2-NEXT: retq 372 ; 373 ; AVX512-LABEL: test_v32i32: 374 ; AVX512: # %bb.0: 375 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 376 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 377 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 378 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 379 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 380 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 381 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 382 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 383 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 384 ; AVX512-NEXT: vmovd %xmm0, %eax 385 ; AVX512-NEXT: vzeroupper 386 ; AVX512-NEXT: retq 387 %1 = call i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32> %a0) 388 ret i32 %1 389 } 390 391 ; 392 ; vXi16 393 ; 394 395 define i16 @test_v8i16(<8 x i16> %a0) { 396 ; SSE-LABEL: test_v8i16: 397 ; SSE: # %bb.0: 398 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 399 ; SSE-NEXT: pand %xmm0, %xmm1 400 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 401 ; SSE-NEXT: pand %xmm1, %xmm0 402 ; SSE-NEXT: movdqa %xmm0, %xmm1 403 ; SSE-NEXT: psrld $16, %xmm1 404 ; SSE-NEXT: pand %xmm0, %xmm1 405 ; SSE-NEXT: movd %xmm1, %eax 406 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax 407 ; SSE-NEXT: retq 408 ; 409 ; AVX-LABEL: test_v8i16: 410 ; AVX: # %bb.0: 411 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 412 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 413 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 414 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 415 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 416 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 417 ; AVX-NEXT: vmovd %xmm0, %eax 418 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax 419 ; AVX-NEXT: retq 420 ; 421 ; AVX512-LABEL: test_v8i16: 422 ; AVX512: # %bb.0: 423 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 424 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 425 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 426 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 427 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 428 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 429 ; AVX512-NEXT: vmovd %xmm0, %eax 430 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 431 ; AVX512-NEXT: retq 432 %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16> %a0) 433 ret i16 %1 434 } 435 436 define i16 @test_v16i16(<16 x i16> %a0) { 437 ; SSE-LABEL: test_v16i16: 438 ; SSE: # %bb.0: 439 ; SSE-NEXT: pand %xmm1, %xmm0 440 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 441 ; SSE-NEXT: pand %xmm0, %xmm1 442 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 443 ; SSE-NEXT: pand %xmm1, %xmm0 444 ; SSE-NEXT: movdqa %xmm0, %xmm1 445 ; SSE-NEXT: psrld $16, %xmm1 446 ; SSE-NEXT: pand %xmm0, %xmm1 447 ; SSE-NEXT: movd %xmm1, %eax 448 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax 449 ; SSE-NEXT: retq 450 ; 451 ; AVX1-LABEL: test_v16i16: 452 ; AVX1: # %bb.0: 453 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 454 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 455 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 456 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 457 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] 458 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 459 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 460 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 461 ; AVX1-NEXT: vmovd %xmm0, %eax 462 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 463 ; AVX1-NEXT: vzeroupper 464 ; AVX1-NEXT: retq 465 ; 466 ; AVX2-LABEL: test_v16i16: 467 ; AVX2: # %bb.0: 468 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 469 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 470 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 471 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 472 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 473 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 474 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 475 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 476 ; AVX2-NEXT: vmovd %xmm0, %eax 477 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 478 ; AVX2-NEXT: vzeroupper 479 ; AVX2-NEXT: retq 480 ; 481 ; AVX512-LABEL: test_v16i16: 482 ; AVX512: # %bb.0: 483 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 484 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 485 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 486 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 487 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 488 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 489 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 490 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 491 ; AVX512-NEXT: vmovd %xmm0, %eax 492 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 493 ; AVX512-NEXT: vzeroupper 494 ; AVX512-NEXT: retq 495 %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16> %a0) 496 ret i16 %1 497 } 498 499 define i16 @test_v32i16(<32 x i16> %a0) { 500 ; SSE-LABEL: test_v32i16: 501 ; SSE: # %bb.0: 502 ; SSE-NEXT: pand %xmm3, %xmm1 503 ; SSE-NEXT: pand %xmm2, %xmm1 504 ; SSE-NEXT: pand %xmm0, %xmm1 505 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 506 ; SSE-NEXT: pand %xmm1, %xmm0 507 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 508 ; SSE-NEXT: pand %xmm0, %xmm1 509 ; SSE-NEXT: movdqa %xmm1, %xmm0 510 ; SSE-NEXT: psrld $16, %xmm0 511 ; SSE-NEXT: pand %xmm1, %xmm0 512 ; SSE-NEXT: movd %xmm0, %eax 513 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax 514 ; SSE-NEXT: retq 515 ; 516 ; AVX1-LABEL: test_v32i16: 517 ; AVX1: # %bb.0: 518 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 519 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 520 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 521 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 522 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 523 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] 524 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 525 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 526 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 527 ; AVX1-NEXT: vmovd %xmm0, %eax 528 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 529 ; AVX1-NEXT: vzeroupper 530 ; AVX1-NEXT: retq 531 ; 532 ; AVX2-LABEL: test_v32i16: 533 ; AVX2: # %bb.0: 534 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 535 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 536 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 537 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 538 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 539 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 540 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 541 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 542 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 543 ; AVX2-NEXT: vmovd %xmm0, %eax 544 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 545 ; AVX2-NEXT: vzeroupper 546 ; AVX2-NEXT: retq 547 ; 548 ; AVX512-LABEL: test_v32i16: 549 ; AVX512: # %bb.0: 550 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 551 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 552 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 553 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 554 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 555 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 556 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 557 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 558 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 559 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 560 ; AVX512-NEXT: vmovd %xmm0, %eax 561 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 562 ; AVX512-NEXT: vzeroupper 563 ; AVX512-NEXT: retq 564 %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16> %a0) 565 ret i16 %1 566 } 567 568 define i16 @test_v64i16(<64 x i16> %a0) { 569 ; SSE-LABEL: test_v64i16: 570 ; SSE: # %bb.0: 571 ; SSE-NEXT: pand %xmm6, %xmm2 572 ; SSE-NEXT: pand %xmm7, %xmm3 573 ; SSE-NEXT: pand %xmm5, %xmm3 574 ; SSE-NEXT: pand %xmm1, %xmm3 575 ; SSE-NEXT: pand %xmm4, %xmm2 576 ; SSE-NEXT: pand %xmm3, %xmm2 577 ; SSE-NEXT: pand %xmm0, %xmm2 578 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 579 ; SSE-NEXT: pand %xmm2, %xmm0 580 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 581 ; SSE-NEXT: pand %xmm0, %xmm1 582 ; SSE-NEXT: movdqa %xmm1, %xmm0 583 ; SSE-NEXT: psrld $16, %xmm0 584 ; SSE-NEXT: pand %xmm1, %xmm0 585 ; SSE-NEXT: movd %xmm0, %eax 586 ; SSE-NEXT: # kill: def $ax killed $ax killed $eax 587 ; SSE-NEXT: retq 588 ; 589 ; AVX1-LABEL: test_v64i16: 590 ; AVX1: # %bb.0: 591 ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 592 ; AVX1-NEXT: vandps %ymm1, %ymm2, %ymm1 593 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 594 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 595 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 596 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 597 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 598 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] 599 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 600 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 601 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 602 ; AVX1-NEXT: vmovd %xmm0, %eax 603 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 604 ; AVX1-NEXT: vzeroupper 605 ; AVX1-NEXT: retq 606 ; 607 ; AVX2-LABEL: test_v64i16: 608 ; AVX2: # %bb.0: 609 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1 610 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 611 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 612 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 613 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 614 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 615 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 616 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 617 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 618 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 619 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 620 ; AVX2-NEXT: vmovd %xmm0, %eax 621 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 622 ; AVX2-NEXT: vzeroupper 623 ; AVX2-NEXT: retq 624 ; 625 ; AVX512-LABEL: test_v64i16: 626 ; AVX512: # %bb.0: 627 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 628 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 629 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 630 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 631 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 632 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 633 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 634 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 635 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 636 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 637 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 638 ; AVX512-NEXT: vmovd %xmm0, %eax 639 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 640 ; AVX512-NEXT: vzeroupper 641 ; AVX512-NEXT: retq 642 %1 = call i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16> %a0) 643 ret i16 %1 644 } 645 646 ; 647 ; vXi8 648 ; 649 650 define i8 @test_v16i8(<16 x i8> %a0) { 651 ; SSE2-LABEL: test_v16i8: 652 ; SSE2: # %bb.0: 653 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 654 ; SSE2-NEXT: pand %xmm0, %xmm1 655 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 656 ; SSE2-NEXT: pand %xmm1, %xmm0 657 ; SSE2-NEXT: movdqa %xmm0, %xmm1 658 ; SSE2-NEXT: psrld $16, %xmm1 659 ; SSE2-NEXT: pand %xmm0, %xmm1 660 ; SSE2-NEXT: movdqa %xmm1, %xmm0 661 ; SSE2-NEXT: psrlw $8, %xmm0 662 ; SSE2-NEXT: pand %xmm1, %xmm0 663 ; SSE2-NEXT: movd %xmm0, %eax 664 ; SSE2-NEXT: # kill: def $al killed $al killed $eax 665 ; SSE2-NEXT: retq 666 ; 667 ; SSE41-LABEL: test_v16i8: 668 ; SSE41: # %bb.0: 669 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 670 ; SSE41-NEXT: pand %xmm0, %xmm1 671 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 672 ; SSE41-NEXT: pand %xmm1, %xmm0 673 ; SSE41-NEXT: movdqa %xmm0, %xmm1 674 ; SSE41-NEXT: psrld $16, %xmm1 675 ; SSE41-NEXT: pand %xmm0, %xmm1 676 ; SSE41-NEXT: movdqa %xmm1, %xmm0 677 ; SSE41-NEXT: psrlw $8, %xmm0 678 ; SSE41-NEXT: pand %xmm1, %xmm0 679 ; SSE41-NEXT: pextrb $0, %xmm0, %eax 680 ; SSE41-NEXT: # kill: def $al killed $al killed $eax 681 ; SSE41-NEXT: retq 682 ; 683 ; AVX-LABEL: test_v16i8: 684 ; AVX: # %bb.0: 685 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 686 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 687 ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 688 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 689 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 690 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 691 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 692 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 693 ; AVX-NEXT: vpextrb $0, %xmm0, %eax 694 ; AVX-NEXT: # kill: def $al killed $al killed $eax 695 ; AVX-NEXT: retq 696 ; 697 ; AVX512-LABEL: test_v16i8: 698 ; AVX512: # %bb.0: 699 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 700 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 701 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 702 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 703 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 704 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 705 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 706 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 707 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax 708 ; AVX512-NEXT: # kill: def $al killed $al killed $eax 709 ; AVX512-NEXT: retq 710 %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8> %a0) 711 ret i8 %1 712 } 713 714 define i8 @test_v32i8(<32 x i8> %a0) { 715 ; SSE2-LABEL: test_v32i8: 716 ; SSE2: # %bb.0: 717 ; SSE2-NEXT: pand %xmm1, %xmm0 718 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 719 ; SSE2-NEXT: pand %xmm0, %xmm1 720 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 721 ; SSE2-NEXT: pand %xmm1, %xmm0 722 ; SSE2-NEXT: movdqa %xmm0, %xmm1 723 ; SSE2-NEXT: psrld $16, %xmm1 724 ; SSE2-NEXT: pand %xmm0, %xmm1 725 ; SSE2-NEXT: movdqa %xmm1, %xmm0 726 ; SSE2-NEXT: psrlw $8, %xmm0 727 ; SSE2-NEXT: pand %xmm1, %xmm0 728 ; SSE2-NEXT: movd %xmm0, %eax 729 ; SSE2-NEXT: # kill: def $al killed $al killed $eax 730 ; SSE2-NEXT: retq 731 ; 732 ; SSE41-LABEL: test_v32i8: 733 ; SSE41: # %bb.0: 734 ; SSE41-NEXT: pand %xmm1, %xmm0 735 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 736 ; SSE41-NEXT: pand %xmm0, %xmm1 737 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 738 ; SSE41-NEXT: pand %xmm1, %xmm0 739 ; SSE41-NEXT: movdqa %xmm0, %xmm1 740 ; SSE41-NEXT: psrld $16, %xmm1 741 ; SSE41-NEXT: pand %xmm0, %xmm1 742 ; SSE41-NEXT: movdqa %xmm1, %xmm0 743 ; SSE41-NEXT: psrlw $8, %xmm0 744 ; SSE41-NEXT: pand %xmm1, %xmm0 745 ; SSE41-NEXT: pextrb $0, %xmm0, %eax 746 ; SSE41-NEXT: # kill: def $al killed $al killed $eax 747 ; SSE41-NEXT: retq 748 ; 749 ; AVX1-LABEL: test_v32i8: 750 ; AVX1: # %bb.0: 751 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 752 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 753 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 754 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 755 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] 756 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 757 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 758 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 759 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 760 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 761 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax 762 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 763 ; AVX1-NEXT: vzeroupper 764 ; AVX1-NEXT: retq 765 ; 766 ; AVX2-LABEL: test_v32i8: 767 ; AVX2: # %bb.0: 768 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 769 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 770 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 771 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 772 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 773 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 774 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 775 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 776 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 777 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 778 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax 779 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 780 ; AVX2-NEXT: vzeroupper 781 ; AVX2-NEXT: retq 782 ; 783 ; AVX512-LABEL: test_v32i8: 784 ; AVX512: # %bb.0: 785 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 786 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 787 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 788 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 789 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 790 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 791 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 792 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 793 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 794 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 795 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax 796 ; AVX512-NEXT: # kill: def $al killed $al killed $eax 797 ; AVX512-NEXT: vzeroupper 798 ; AVX512-NEXT: retq 799 %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8> %a0) 800 ret i8 %1 801 } 802 803 define i8 @test_v64i8(<64 x i8> %a0) { 804 ; SSE2-LABEL: test_v64i8: 805 ; SSE2: # %bb.0: 806 ; SSE2-NEXT: pand %xmm3, %xmm1 807 ; SSE2-NEXT: pand %xmm2, %xmm1 808 ; SSE2-NEXT: pand %xmm0, %xmm1 809 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 810 ; SSE2-NEXT: pand %xmm1, %xmm0 811 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 812 ; SSE2-NEXT: pand %xmm0, %xmm1 813 ; SSE2-NEXT: movdqa %xmm1, %xmm0 814 ; SSE2-NEXT: psrld $16, %xmm0 815 ; SSE2-NEXT: pand %xmm1, %xmm0 816 ; SSE2-NEXT: movdqa %xmm0, %xmm1 817 ; SSE2-NEXT: psrlw $8, %xmm1 818 ; SSE2-NEXT: pand %xmm0, %xmm1 819 ; SSE2-NEXT: movd %xmm1, %eax 820 ; SSE2-NEXT: # kill: def $al killed $al killed $eax 821 ; SSE2-NEXT: retq 822 ; 823 ; SSE41-LABEL: test_v64i8: 824 ; SSE41: # %bb.0: 825 ; SSE41-NEXT: pand %xmm3, %xmm1 826 ; SSE41-NEXT: pand %xmm2, %xmm1 827 ; SSE41-NEXT: pand %xmm0, %xmm1 828 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 829 ; SSE41-NEXT: pand %xmm1, %xmm0 830 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 831 ; SSE41-NEXT: pand %xmm0, %xmm1 832 ; SSE41-NEXT: movdqa %xmm1, %xmm0 833 ; SSE41-NEXT: psrld $16, %xmm0 834 ; SSE41-NEXT: pand %xmm1, %xmm0 835 ; SSE41-NEXT: movdqa %xmm0, %xmm1 836 ; SSE41-NEXT: psrlw $8, %xmm1 837 ; SSE41-NEXT: pand %xmm0, %xmm1 838 ; SSE41-NEXT: pextrb $0, %xmm1, %eax 839 ; SSE41-NEXT: # kill: def $al killed $al killed $eax 840 ; SSE41-NEXT: retq 841 ; 842 ; AVX1-LABEL: test_v64i8: 843 ; AVX1: # %bb.0: 844 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 845 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 846 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 847 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 848 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 849 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] 850 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 851 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 852 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 853 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 854 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 855 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax 856 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 857 ; AVX1-NEXT: vzeroupper 858 ; AVX1-NEXT: retq 859 ; 860 ; AVX2-LABEL: test_v64i8: 861 ; AVX2: # %bb.0: 862 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 863 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 864 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 865 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 866 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 867 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 868 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 869 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 870 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 871 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 872 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 873 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax 874 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 875 ; AVX2-NEXT: vzeroupper 876 ; AVX2-NEXT: retq 877 ; 878 ; AVX512-LABEL: test_v64i8: 879 ; AVX512: # %bb.0: 880 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 881 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 882 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 883 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 884 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 885 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 886 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 887 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 888 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 889 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 890 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 891 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 892 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax 893 ; AVX512-NEXT: # kill: def $al killed $al killed $eax 894 ; AVX512-NEXT: vzeroupper 895 ; AVX512-NEXT: retq 896 %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8> %a0) 897 ret i8 %1 898 } 899 900 define i8 @test_v128i8(<128 x i8> %a0) { 901 ; SSE2-LABEL: test_v128i8: 902 ; SSE2: # %bb.0: 903 ; SSE2-NEXT: pand %xmm6, %xmm2 904 ; SSE2-NEXT: pand %xmm7, %xmm3 905 ; SSE2-NEXT: pand %xmm5, %xmm3 906 ; SSE2-NEXT: pand %xmm1, %xmm3 907 ; SSE2-NEXT: pand %xmm4, %xmm2 908 ; SSE2-NEXT: pand %xmm3, %xmm2 909 ; SSE2-NEXT: pand %xmm0, %xmm2 910 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 911 ; SSE2-NEXT: pand %xmm2, %xmm0 912 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 913 ; SSE2-NEXT: pand %xmm0, %xmm1 914 ; SSE2-NEXT: movdqa %xmm1, %xmm0 915 ; SSE2-NEXT: psrld $16, %xmm0 916 ; SSE2-NEXT: pand %xmm1, %xmm0 917 ; SSE2-NEXT: movdqa %xmm0, %xmm1 918 ; SSE2-NEXT: psrlw $8, %xmm1 919 ; SSE2-NEXT: pand %xmm0, %xmm1 920 ; SSE2-NEXT: movd %xmm1, %eax 921 ; SSE2-NEXT: # kill: def $al killed $al killed $eax 922 ; SSE2-NEXT: retq 923 ; 924 ; SSE41-LABEL: test_v128i8: 925 ; SSE41: # %bb.0: 926 ; SSE41-NEXT: pand %xmm6, %xmm2 927 ; SSE41-NEXT: pand %xmm7, %xmm3 928 ; SSE41-NEXT: pand %xmm5, %xmm3 929 ; SSE41-NEXT: pand %xmm1, %xmm3 930 ; SSE41-NEXT: pand %xmm4, %xmm2 931 ; SSE41-NEXT: pand %xmm3, %xmm2 932 ; SSE41-NEXT: pand %xmm0, %xmm2 933 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 934 ; SSE41-NEXT: pand %xmm2, %xmm0 935 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 936 ; SSE41-NEXT: pand %xmm0, %xmm1 937 ; SSE41-NEXT: movdqa %xmm1, %xmm0 938 ; SSE41-NEXT: psrld $16, %xmm0 939 ; SSE41-NEXT: pand %xmm1, %xmm0 940 ; SSE41-NEXT: movdqa %xmm0, %xmm1 941 ; SSE41-NEXT: psrlw $8, %xmm1 942 ; SSE41-NEXT: pand %xmm0, %xmm1 943 ; SSE41-NEXT: pextrb $0, %xmm1, %eax 944 ; SSE41-NEXT: # kill: def $al killed $al killed $eax 945 ; SSE41-NEXT: retq 946 ; 947 ; AVX1-LABEL: test_v128i8: 948 ; AVX1: # %bb.0: 949 ; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 950 ; AVX1-NEXT: vandps %ymm1, %ymm2, %ymm1 951 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 952 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 953 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 954 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 955 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 956 ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3] 957 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 958 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 959 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 960 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 961 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 962 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax 963 ; AVX1-NEXT: # kill: def $al killed $al killed $eax 964 ; AVX1-NEXT: vzeroupper 965 ; AVX1-NEXT: retq 966 ; 967 ; AVX2-LABEL: test_v128i8: 968 ; AVX2: # %bb.0: 969 ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1 970 ; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 971 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 972 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 973 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 974 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 975 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 976 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 977 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 978 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 979 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 980 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 981 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 982 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax 983 ; AVX2-NEXT: # kill: def $al killed $al killed $eax 984 ; AVX2-NEXT: vzeroupper 985 ; AVX2-NEXT: retq 986 ; 987 ; AVX512-LABEL: test_v128i8: 988 ; AVX512: # %bb.0: 989 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 990 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 991 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 992 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 993 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 994 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 995 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 996 ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 997 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 998 ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 999 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 1000 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1001 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 1002 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax 1003 ; AVX512-NEXT: # kill: def $al killed $al killed $eax 1004 ; AVX512-NEXT: vzeroupper 1005 ; AVX512-NEXT: retq 1006 %1 = call i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8> %a0) 1007 ret i8 %1 1008 } 1009 1010 declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>) 1011 declare i64 @llvm.experimental.vector.reduce.and.i64.v4i64(<4 x i64>) 1012 declare i64 @llvm.experimental.vector.reduce.and.i64.v8i64(<8 x i64>) 1013 declare i64 @llvm.experimental.vector.reduce.and.i64.v16i64(<16 x i64>) 1014 1015 declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32>) 1016 declare i32 @llvm.experimental.vector.reduce.and.i32.v8i32(<8 x i32>) 1017 declare i32 @llvm.experimental.vector.reduce.and.i32.v16i32(<16 x i32>) 1018 declare i32 @llvm.experimental.vector.reduce.and.i32.v32i32(<32 x i32>) 1019 1020 declare i16 @llvm.experimental.vector.reduce.and.i16.v8i16(<8 x i16>) 1021 declare i16 @llvm.experimental.vector.reduce.and.i16.v16i16(<16 x i16>) 1022 declare i16 @llvm.experimental.vector.reduce.and.i16.v32i16(<32 x i16>) 1023 declare i16 @llvm.experimental.vector.reduce.and.i16.v64i16(<64 x i16>) 1024 1025 declare i8 @llvm.experimental.vector.reduce.and.i8.v16i8(<16 x i8>) 1026 declare i8 @llvm.experimental.vector.reduce.and.i8.v32i8(<32 x i8>) 1027 declare i8 @llvm.experimental.vector.reduce.and.i8.v64i8(<64 x i8>) 1028 declare i8 @llvm.experimental.vector.reduce.and.i8.v128i8(<128 x i8>) 1029