1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2 3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE42 4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1 5 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2 6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2 7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE42 8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 10 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512 11 12 ; 13 ; 128-bit Vectors 14 ; 15 16 define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17 ; X86-SSE2-LABEL: test_reduce_v2i64: 18 ; X86-SSE2: ## %bb.0: 19 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 20 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 21 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23 ; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 25 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 26 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29 ; X86-SSE2-NEXT: pand %xmm5, %xmm2 30 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31 ; X86-SSE2-NEXT: por %xmm2, %xmm3 32 ; X86-SSE2-NEXT: pand %xmm3, %xmm0 33 ; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34 ; X86-SSE2-NEXT: por %xmm0, %xmm3 35 ; X86-SSE2-NEXT: movd %xmm3, %eax 36 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 37 ; X86-SSE2-NEXT: movd %xmm0, %edx 38 ; X86-SSE2-NEXT: retl 39 ; 40 ; X86-SSE42-LABEL: test_reduce_v2i64: 41 ; X86-SSE42: ## %bb.0: 42 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 44 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 45 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 46 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 47 ; X86-SSE42-NEXT: movd %xmm2, %eax 48 ; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 49 ; X86-SSE42-NEXT: retl 50 ; 51 ; X86-AVX-LABEL: test_reduce_v2i64: 52 ; X86-AVX: ## %bb.0: 53 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 54 ; X86-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 55 ; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 56 ; X86-AVX-NEXT: vmovd %xmm0, %eax 57 ; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx 58 ; X86-AVX-NEXT: retl 59 ; 60 ; X64-SSE2-LABEL: test_reduce_v2i64: 61 ; X64-SSE2: ## %bb.0: 62 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 63 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 64 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 65 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3 66 ; X64-SSE2-NEXT: pxor %xmm1, %xmm2 67 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 68 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 69 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 70 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 71 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 72 ; X64-SSE2-NEXT: pand %xmm5, %xmm2 73 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 74 ; X64-SSE2-NEXT: por %xmm2, %xmm3 75 ; X64-SSE2-NEXT: pand %xmm3, %xmm0 76 ; X64-SSE2-NEXT: pandn %xmm1, %xmm3 77 ; X64-SSE2-NEXT: por %xmm0, %xmm3 78 ; X64-SSE2-NEXT: movq %xmm3, %rax 79 ; X64-SSE2-NEXT: retq 80 ; 81 ; X64-SSE42-LABEL: test_reduce_v2i64: 82 ; X64-SSE42: ## %bb.0: 83 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 84 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 85 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 86 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 87 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 88 ; X64-SSE42-NEXT: movq %xmm2, %rax 89 ; X64-SSE42-NEXT: retq 90 ; 91 ; X64-AVX1-LABEL: test_reduce_v2i64: 92 ; X64-AVX1: ## %bb.0: 93 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 94 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 95 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 96 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 97 ; X64-AVX1-NEXT: retq 98 ; 99 ; X64-AVX2-LABEL: test_reduce_v2i64: 100 ; X64-AVX2: ## %bb.0: 101 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 102 ; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 103 ; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 104 ; X64-AVX2-NEXT: vmovq %xmm0, %rax 105 ; X64-AVX2-NEXT: retq 106 ; 107 ; X64-AVX512-LABEL: test_reduce_v2i64: 108 ; X64-AVX512: ## %bb.0: 109 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 110 ; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 111 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 112 ; X64-AVX512-NEXT: retq 113 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 114 %2 = icmp slt <2 x i64> %a0, %1 115 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 116 %4 = extractelement <2 x i64> %3, i32 0 117 ret i64 %4 118 } 119 120 define i32 @test_reduce_v4i32(<4 x i32> %a0) { 121 ; X86-SSE2-LABEL: test_reduce_v4i32: 122 ; X86-SSE2: ## %bb.0: 123 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 124 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 125 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 126 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 127 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 128 ; X86-SSE2-NEXT: por %xmm0, %xmm2 129 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 130 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 131 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 132 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 133 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 134 ; X86-SSE2-NEXT: por %xmm2, %xmm1 135 ; X86-SSE2-NEXT: movd %xmm1, %eax 136 ; X86-SSE2-NEXT: retl 137 ; 138 ; X86-SSE42-LABEL: test_reduce_v4i32: 139 ; X86-SSE42: ## %bb.0: 140 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 141 ; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 142 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 143 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 144 ; X86-SSE42-NEXT: movd %xmm0, %eax 145 ; X86-SSE42-NEXT: retl 146 ; 147 ; X86-AVX-LABEL: test_reduce_v4i32: 148 ; X86-AVX: ## %bb.0: 149 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 150 ; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 151 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 152 ; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 153 ; X86-AVX-NEXT: vmovd %xmm0, %eax 154 ; X86-AVX-NEXT: retl 155 ; 156 ; X64-SSE2-LABEL: test_reduce_v4i32: 157 ; X64-SSE2: ## %bb.0: 158 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 159 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 160 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 161 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 162 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 163 ; X64-SSE2-NEXT: por %xmm0, %xmm2 164 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 165 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 166 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 167 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 168 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 169 ; X64-SSE2-NEXT: por %xmm2, %xmm1 170 ; X64-SSE2-NEXT: movd %xmm1, %eax 171 ; X64-SSE2-NEXT: retq 172 ; 173 ; X64-SSE42-LABEL: test_reduce_v4i32: 174 ; X64-SSE42: ## %bb.0: 175 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 176 ; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 177 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 178 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 179 ; X64-SSE42-NEXT: movd %xmm0, %eax 180 ; X64-SSE42-NEXT: retq 181 ; 182 ; X64-AVX-LABEL: test_reduce_v4i32: 183 ; X64-AVX: ## %bb.0: 184 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 185 ; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 186 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 187 ; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 188 ; X64-AVX-NEXT: vmovd %xmm0, %eax 189 ; X64-AVX-NEXT: retq 190 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 191 %2 = icmp slt <4 x i32> %a0, %1 192 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 193 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 194 %5 = icmp slt <4 x i32> %3, %4 195 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 196 %7 = extractelement <4 x i32> %6, i32 0 197 ret i32 %7 198 } 199 200 define i16 @test_reduce_v8i16(<8 x i16> %a0) { 201 ; X86-SSE2-LABEL: test_reduce_v8i16: 202 ; X86-SSE2: ## %bb.0: 203 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 204 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 205 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 206 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 207 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 208 ; X86-SSE2-NEXT: psrld $16, %xmm1 209 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 210 ; X86-SSE2-NEXT: movd %xmm1, %eax 211 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 212 ; X86-SSE2-NEXT: retl 213 ; 214 ; X86-SSE42-LABEL: test_reduce_v8i16: 215 ; X86-SSE42: ## %bb.0: 216 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 217 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 218 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 219 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 220 ; X86-SSE42-NEXT: movd %xmm0, %eax 221 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 222 ; X86-SSE42-NEXT: retl 223 ; 224 ; X86-AVX-LABEL: test_reduce_v8i16: 225 ; X86-AVX: ## %bb.0: 226 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 227 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 228 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 229 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 230 ; X86-AVX-NEXT: vmovd %xmm0, %eax 231 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 232 ; X86-AVX-NEXT: retl 233 ; 234 ; X64-SSE2-LABEL: test_reduce_v8i16: 235 ; X64-SSE2: ## %bb.0: 236 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 237 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 238 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 239 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 240 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 241 ; X64-SSE2-NEXT: psrld $16, %xmm1 242 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 243 ; X64-SSE2-NEXT: movd %xmm1, %eax 244 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 245 ; X64-SSE2-NEXT: retq 246 ; 247 ; X64-SSE42-LABEL: test_reduce_v8i16: 248 ; X64-SSE42: ## %bb.0: 249 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 250 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 251 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 252 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 253 ; X64-SSE42-NEXT: movd %xmm0, %eax 254 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 255 ; X64-SSE42-NEXT: retq 256 ; 257 ; X64-AVX-LABEL: test_reduce_v8i16: 258 ; X64-AVX: ## %bb.0: 259 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 260 ; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 261 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 262 ; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 263 ; X64-AVX-NEXT: vmovd %xmm0, %eax 264 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 265 ; X64-AVX-NEXT: retq 266 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 267 %2 = icmp slt <8 x i16> %a0, %1 268 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 269 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 270 %5 = icmp slt <8 x i16> %3, %4 271 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 272 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 273 %8 = icmp slt <8 x i16> %6, %7 274 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 275 %10 = extractelement <8 x i16> %9, i32 0 276 ret i16 %10 277 } 278 279 define i8 @test_reduce_v16i8(<16 x i8> %a0) { 280 ; X86-SSE2-LABEL: test_reduce_v16i8: 281 ; X86-SSE2: ## %bb.0: 282 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 283 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 284 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 285 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 286 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 287 ; X86-SSE2-NEXT: por %xmm0, %xmm2 288 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 289 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 290 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 291 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 292 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 293 ; X86-SSE2-NEXT: por %xmm2, %xmm1 294 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 295 ; X86-SSE2-NEXT: psrld $16, %xmm0 296 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 297 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 298 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 299 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 300 ; X86-SSE2-NEXT: por %xmm1, %xmm2 301 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 302 ; X86-SSE2-NEXT: psrlw $8, %xmm0 303 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 304 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 305 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 306 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 307 ; X86-SSE2-NEXT: por %xmm2, %xmm1 308 ; X86-SSE2-NEXT: movd %xmm1, %eax 309 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 310 ; X86-SSE2-NEXT: retl 311 ; 312 ; X86-SSE42-LABEL: test_reduce_v16i8: 313 ; X86-SSE42: ## %bb.0: 314 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 315 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 316 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 317 ; X86-SSE42-NEXT: psrlw $8, %xmm2 318 ; X86-SSE42-NEXT: pminub %xmm0, %xmm2 319 ; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 320 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 321 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax 322 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 323 ; X86-SSE42-NEXT: retl 324 ; 325 ; X86-AVX-LABEL: test_reduce_v16i8: 326 ; X86-AVX: ## %bb.0: 327 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 328 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 329 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 330 ; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 331 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 332 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 333 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax 334 ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 335 ; X86-AVX-NEXT: retl 336 ; 337 ; X64-SSE2-LABEL: test_reduce_v16i8: 338 ; X64-SSE2: ## %bb.0: 339 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 340 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 341 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 342 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 343 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 344 ; X64-SSE2-NEXT: por %xmm0, %xmm2 345 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 346 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 347 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 348 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 349 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 350 ; X64-SSE2-NEXT: por %xmm2, %xmm1 351 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 352 ; X64-SSE2-NEXT: psrld $16, %xmm0 353 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 354 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 355 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 356 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 357 ; X64-SSE2-NEXT: por %xmm1, %xmm2 358 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 359 ; X64-SSE2-NEXT: psrlw $8, %xmm0 360 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 361 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 362 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 363 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 364 ; X64-SSE2-NEXT: por %xmm2, %xmm1 365 ; X64-SSE2-NEXT: movd %xmm1, %eax 366 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 367 ; X64-SSE2-NEXT: retq 368 ; 369 ; X64-SSE42-LABEL: test_reduce_v16i8: 370 ; X64-SSE42: ## %bb.0: 371 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 372 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 373 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 374 ; X64-SSE42-NEXT: psrlw $8, %xmm2 375 ; X64-SSE42-NEXT: pminub %xmm0, %xmm2 376 ; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 377 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 378 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax 379 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 380 ; X64-SSE42-NEXT: retq 381 ; 382 ; X64-AVX-LABEL: test_reduce_v16i8: 383 ; X64-AVX: ## %bb.0: 384 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 385 ; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 386 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 387 ; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 388 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 389 ; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 390 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax 391 ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 392 ; X64-AVX-NEXT: retq 393 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 394 %2 = icmp slt <16 x i8> %a0, %1 395 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 396 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 397 %5 = icmp slt <16 x i8> %3, %4 398 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 399 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 400 %8 = icmp slt <16 x i8> %6, %7 401 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 402 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 403 %11 = icmp slt <16 x i8> %9, %10 404 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 405 %13 = extractelement <16 x i8> %12, i32 0 406 ret i8 %13 407 } 408 409 ; 410 ; 256-bit Vectors 411 ; 412 413 define i64 @test_reduce_v4i64(<4 x i64> %a0) { 414 ; X86-SSE2-LABEL: test_reduce_v4i64: 415 ; X86-SSE2: ## %bb.0: 416 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 417 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 418 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3 419 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 420 ; X86-SSE2-NEXT: pxor %xmm2, %xmm4 421 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 422 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 423 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 424 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 425 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 426 ; X86-SSE2-NEXT: pand %xmm6, %xmm3 427 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 428 ; X86-SSE2-NEXT: por %xmm3, %xmm4 429 ; X86-SSE2-NEXT: pand %xmm4, %xmm0 430 ; X86-SSE2-NEXT: pandn %xmm1, %xmm4 431 ; X86-SSE2-NEXT: por %xmm0, %xmm4 432 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1] 433 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 434 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 435 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2 436 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 437 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 438 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 439 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 440 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 441 ; X86-SSE2-NEXT: pand %xmm5, %xmm1 442 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 443 ; X86-SSE2-NEXT: por %xmm1, %xmm2 444 ; X86-SSE2-NEXT: pand %xmm2, %xmm4 445 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 446 ; X86-SSE2-NEXT: por %xmm4, %xmm2 447 ; X86-SSE2-NEXT: movd %xmm2, %eax 448 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 449 ; X86-SSE2-NEXT: movd %xmm0, %edx 450 ; X86-SSE2-NEXT: retl 451 ; 452 ; X86-SSE42-LABEL: test_reduce_v4i64: 453 ; X86-SSE42: ## %bb.0: 454 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 455 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 456 ; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 457 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 458 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 459 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 460 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 461 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 462 ; X86-SSE42-NEXT: movd %xmm2, %eax 463 ; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 464 ; X86-SSE42-NEXT: retl 465 ; 466 ; X86-AVX1-LABEL: test_reduce_v4i64: 467 ; X86-AVX1: ## %bb.0: 468 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 469 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 470 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 471 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 472 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 473 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 474 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 475 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 476 ; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 477 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 478 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 479 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 480 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 481 ; X86-AVX1-NEXT: vzeroupper 482 ; X86-AVX1-NEXT: retl 483 ; 484 ; X86-AVX2-LABEL: test_reduce_v4i64: 485 ; X86-AVX2: ## %bb.0: 486 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 487 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 488 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 489 ; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 490 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 491 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 492 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 493 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 494 ; X86-AVX2-NEXT: vzeroupper 495 ; X86-AVX2-NEXT: retl 496 ; 497 ; X64-SSE2-LABEL: test_reduce_v4i64: 498 ; X64-SSE2: ## %bb.0: 499 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 500 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 501 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3 502 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 503 ; X64-SSE2-NEXT: pxor %xmm2, %xmm4 504 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 505 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 506 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 507 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 508 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 509 ; X64-SSE2-NEXT: pand %xmm6, %xmm3 510 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 511 ; X64-SSE2-NEXT: por %xmm3, %xmm4 512 ; X64-SSE2-NEXT: pand %xmm4, %xmm0 513 ; X64-SSE2-NEXT: pandn %xmm1, %xmm4 514 ; X64-SSE2-NEXT: por %xmm0, %xmm4 515 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1] 516 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 517 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 518 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2 519 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 520 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 521 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 522 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 523 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 524 ; X64-SSE2-NEXT: pand %xmm5, %xmm1 525 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 526 ; X64-SSE2-NEXT: por %xmm1, %xmm2 527 ; X64-SSE2-NEXT: pand %xmm2, %xmm4 528 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 529 ; X64-SSE2-NEXT: por %xmm4, %xmm2 530 ; X64-SSE2-NEXT: movq %xmm2, %rax 531 ; X64-SSE2-NEXT: retq 532 ; 533 ; X64-SSE42-LABEL: test_reduce_v4i64: 534 ; X64-SSE42: ## %bb.0: 535 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 536 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 537 ; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 538 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 539 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 540 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 541 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 542 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 543 ; X64-SSE42-NEXT: movq %xmm2, %rax 544 ; X64-SSE42-NEXT: retq 545 ; 546 ; X64-AVX1-LABEL: test_reduce_v4i64: 547 ; X64-AVX1: ## %bb.0: 548 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 549 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 550 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 551 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 552 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 553 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 554 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 555 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 556 ; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 557 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 558 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 559 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 560 ; X64-AVX1-NEXT: vzeroupper 561 ; X64-AVX1-NEXT: retq 562 ; 563 ; X64-AVX2-LABEL: test_reduce_v4i64: 564 ; X64-AVX2: ## %bb.0: 565 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 566 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 567 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 568 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 569 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 570 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 571 ; X64-AVX2-NEXT: vmovq %xmm0, %rax 572 ; X64-AVX2-NEXT: vzeroupper 573 ; X64-AVX2-NEXT: retq 574 ; 575 ; X64-AVX512-LABEL: test_reduce_v4i64: 576 ; X64-AVX512: ## %bb.0: 577 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 578 ; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0 579 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 580 ; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0 581 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 582 ; X64-AVX512-NEXT: vzeroupper 583 ; X64-AVX512-NEXT: retq 584 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 585 %2 = icmp slt <4 x i64> %a0, %1 586 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 587 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 588 %5 = icmp slt <4 x i64> %3, %4 589 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 590 %7 = extractelement <4 x i64> %6, i32 0 591 ret i64 %7 592 } 593 594 define i32 @test_reduce_v8i32(<8 x i32> %a0) { 595 ; X86-SSE2-LABEL: test_reduce_v8i32: 596 ; X86-SSE2: ## %bb.0: 597 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 598 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 599 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 600 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 601 ; X86-SSE2-NEXT: por %xmm0, %xmm2 602 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 603 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 604 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 605 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 606 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 607 ; X86-SSE2-NEXT: por %xmm2, %xmm1 608 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 609 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 610 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 611 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 612 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 613 ; X86-SSE2-NEXT: por %xmm1, %xmm2 614 ; X86-SSE2-NEXT: movd %xmm2, %eax 615 ; X86-SSE2-NEXT: retl 616 ; 617 ; X86-SSE42-LABEL: test_reduce_v8i32: 618 ; X86-SSE42: ## %bb.0: 619 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 620 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 621 ; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 622 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 623 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 624 ; X86-SSE42-NEXT: movd %xmm0, %eax 625 ; X86-SSE42-NEXT: retl 626 ; 627 ; X86-AVX1-LABEL: test_reduce_v8i32: 628 ; X86-AVX1: ## %bb.0: 629 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 630 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 631 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 632 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 633 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 634 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 635 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 636 ; X86-AVX1-NEXT: vzeroupper 637 ; X86-AVX1-NEXT: retl 638 ; 639 ; X86-AVX2-LABEL: test_reduce_v8i32: 640 ; X86-AVX2: ## %bb.0: 641 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 642 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 643 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 644 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 645 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 646 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 647 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 648 ; X86-AVX2-NEXT: vzeroupper 649 ; X86-AVX2-NEXT: retl 650 ; 651 ; X64-SSE2-LABEL: test_reduce_v8i32: 652 ; X64-SSE2: ## %bb.0: 653 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 654 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 655 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 656 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 657 ; X64-SSE2-NEXT: por %xmm0, %xmm2 658 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 659 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 660 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 661 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 662 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 663 ; X64-SSE2-NEXT: por %xmm2, %xmm1 664 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 665 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 666 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 667 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 668 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 669 ; X64-SSE2-NEXT: por %xmm1, %xmm2 670 ; X64-SSE2-NEXT: movd %xmm2, %eax 671 ; X64-SSE2-NEXT: retq 672 ; 673 ; X64-SSE42-LABEL: test_reduce_v8i32: 674 ; X64-SSE42: ## %bb.0: 675 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 676 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 677 ; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 678 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 679 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 680 ; X64-SSE42-NEXT: movd %xmm0, %eax 681 ; X64-SSE42-NEXT: retq 682 ; 683 ; X64-AVX1-LABEL: test_reduce_v8i32: 684 ; X64-AVX1: ## %bb.0: 685 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 686 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 687 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 688 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 689 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 690 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 691 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 692 ; X64-AVX1-NEXT: vzeroupper 693 ; X64-AVX1-NEXT: retq 694 ; 695 ; X64-AVX2-LABEL: test_reduce_v8i32: 696 ; X64-AVX2: ## %bb.0: 697 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 698 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 699 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 700 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 701 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 702 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 703 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 704 ; X64-AVX2-NEXT: vzeroupper 705 ; X64-AVX2-NEXT: retq 706 ; 707 ; X64-AVX512-LABEL: test_reduce_v8i32: 708 ; X64-AVX512: ## %bb.0: 709 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 710 ; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0 711 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 712 ; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0 713 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 714 ; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0 715 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 716 ; X64-AVX512-NEXT: vzeroupper 717 ; X64-AVX512-NEXT: retq 718 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 719 %2 = icmp slt <8 x i32> %a0, %1 720 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 721 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 722 %5 = icmp slt <8 x i32> %3, %4 723 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 724 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 725 %8 = icmp slt <8 x i32> %6, %7 726 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 727 %10 = extractelement <8 x i32> %9, i32 0 728 ret i32 %10 729 } 730 731 define i16 @test_reduce_v16i16(<16 x i16> %a0) { 732 ; X86-SSE2-LABEL: test_reduce_v16i16: 733 ; X86-SSE2: ## %bb.0: 734 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 735 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 736 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 737 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 738 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 739 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 740 ; X86-SSE2-NEXT: psrld $16, %xmm1 741 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 742 ; X86-SSE2-NEXT: movd %xmm1, %eax 743 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 744 ; X86-SSE2-NEXT: retl 745 ; 746 ; X86-SSE42-LABEL: test_reduce_v16i16: 747 ; X86-SSE42: ## %bb.0: 748 ; X86-SSE42-NEXT: pminsw %xmm1, %xmm0 749 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 750 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 751 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 752 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 753 ; X86-SSE42-NEXT: movd %xmm0, %eax 754 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 755 ; X86-SSE42-NEXT: retl 756 ; 757 ; X86-AVX1-LABEL: test_reduce_v16i16: 758 ; X86-AVX1: ## %bb.0: 759 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 760 ; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 761 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 762 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 763 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 764 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 765 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 766 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 767 ; X86-AVX1-NEXT: vzeroupper 768 ; X86-AVX1-NEXT: retl 769 ; 770 ; X86-AVX2-LABEL: test_reduce_v16i16: 771 ; X86-AVX2: ## %bb.0: 772 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 773 ; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 774 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 775 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 776 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 777 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 778 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 779 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 780 ; X86-AVX2-NEXT: vzeroupper 781 ; X86-AVX2-NEXT: retl 782 ; 783 ; X64-SSE2-LABEL: test_reduce_v16i16: 784 ; X64-SSE2: ## %bb.0: 785 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 786 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 787 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 788 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 789 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 790 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 791 ; X64-SSE2-NEXT: psrld $16, %xmm1 792 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 793 ; X64-SSE2-NEXT: movd %xmm1, %eax 794 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 795 ; X64-SSE2-NEXT: retq 796 ; 797 ; X64-SSE42-LABEL: test_reduce_v16i16: 798 ; X64-SSE42: ## %bb.0: 799 ; X64-SSE42-NEXT: pminsw %xmm1, %xmm0 800 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 801 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 802 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 803 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 804 ; X64-SSE42-NEXT: movd %xmm0, %eax 805 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 806 ; X64-SSE42-NEXT: retq 807 ; 808 ; X64-AVX1-LABEL: test_reduce_v16i16: 809 ; X64-AVX1: ## %bb.0: 810 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 811 ; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 812 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 813 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 814 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 815 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 816 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 817 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 818 ; X64-AVX1-NEXT: vzeroupper 819 ; X64-AVX1-NEXT: retq 820 ; 821 ; X64-AVX2-LABEL: test_reduce_v16i16: 822 ; X64-AVX2: ## %bb.0: 823 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 824 ; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 825 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 826 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 827 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 828 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 829 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 830 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 831 ; X64-AVX2-NEXT: vzeroupper 832 ; X64-AVX2-NEXT: retq 833 ; 834 ; X64-AVX512-LABEL: test_reduce_v16i16: 835 ; X64-AVX512: ## %bb.0: 836 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 837 ; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 838 ; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 839 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 840 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 841 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 842 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 843 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 844 ; X64-AVX512-NEXT: vzeroupper 845 ; X64-AVX512-NEXT: retq 846 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 847 %2 = icmp slt <16 x i16> %a0, %1 848 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 849 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 850 %5 = icmp slt <16 x i16> %3, %4 851 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 852 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 853 %8 = icmp slt <16 x i16> %6, %7 854 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 855 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 856 %11 = icmp slt <16 x i16> %9, %10 857 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 858 %13 = extractelement <16 x i16> %12, i32 0 859 ret i16 %13 860 } 861 862 define i8 @test_reduce_v32i8(<32 x i8> %a0) { 863 ; X86-SSE2-LABEL: test_reduce_v32i8: 864 ; X86-SSE2: ## %bb.0: 865 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 866 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 867 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 868 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 869 ; X86-SSE2-NEXT: por %xmm0, %xmm2 870 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 871 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 872 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 873 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 874 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 875 ; X86-SSE2-NEXT: por %xmm2, %xmm1 876 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 877 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 878 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 879 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 880 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 881 ; X86-SSE2-NEXT: por %xmm1, %xmm2 882 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 883 ; X86-SSE2-NEXT: psrld $16, %xmm0 884 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 885 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 886 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 887 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 888 ; X86-SSE2-NEXT: por %xmm2, %xmm1 889 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 890 ; X86-SSE2-NEXT: psrlw $8, %xmm0 891 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 892 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 893 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 894 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 895 ; X86-SSE2-NEXT: por %xmm1, %xmm2 896 ; X86-SSE2-NEXT: movd %xmm2, %eax 897 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 898 ; X86-SSE2-NEXT: retl 899 ; 900 ; X86-SSE42-LABEL: test_reduce_v32i8: 901 ; X86-SSE42: ## %bb.0: 902 ; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 903 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 904 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 905 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 906 ; X86-SSE42-NEXT: psrlw $8, %xmm2 907 ; X86-SSE42-NEXT: pminub %xmm0, %xmm2 908 ; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 909 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 910 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax 911 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 912 ; X86-SSE42-NEXT: retl 913 ; 914 ; X86-AVX1-LABEL: test_reduce_v32i8: 915 ; X86-AVX1: ## %bb.0: 916 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 917 ; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 918 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 919 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 920 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 921 ; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 922 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 923 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 924 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax 925 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 926 ; X86-AVX1-NEXT: vzeroupper 927 ; X86-AVX1-NEXT: retl 928 ; 929 ; X86-AVX2-LABEL: test_reduce_v32i8: 930 ; X86-AVX2: ## %bb.0: 931 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 932 ; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 933 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 934 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 935 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 936 ; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 937 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 938 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 939 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax 940 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 941 ; X86-AVX2-NEXT: vzeroupper 942 ; X86-AVX2-NEXT: retl 943 ; 944 ; X64-SSE2-LABEL: test_reduce_v32i8: 945 ; X64-SSE2: ## %bb.0: 946 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 947 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 948 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 949 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 950 ; X64-SSE2-NEXT: por %xmm0, %xmm2 951 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 952 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 953 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 954 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 955 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 956 ; X64-SSE2-NEXT: por %xmm2, %xmm1 957 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 958 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 959 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 960 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 961 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 962 ; X64-SSE2-NEXT: por %xmm1, %xmm2 963 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 964 ; X64-SSE2-NEXT: psrld $16, %xmm0 965 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 966 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 967 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 968 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 969 ; X64-SSE2-NEXT: por %xmm2, %xmm1 970 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 971 ; X64-SSE2-NEXT: psrlw $8, %xmm0 972 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 973 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 974 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 975 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 976 ; X64-SSE2-NEXT: por %xmm1, %xmm2 977 ; X64-SSE2-NEXT: movd %xmm2, %eax 978 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 979 ; X64-SSE2-NEXT: retq 980 ; 981 ; X64-SSE42-LABEL: test_reduce_v32i8: 982 ; X64-SSE42: ## %bb.0: 983 ; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 984 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 985 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 986 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 987 ; X64-SSE42-NEXT: psrlw $8, %xmm2 988 ; X64-SSE42-NEXT: pminub %xmm0, %xmm2 989 ; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 990 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 991 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax 992 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 993 ; X64-SSE42-NEXT: retq 994 ; 995 ; X64-AVX1-LABEL: test_reduce_v32i8: 996 ; X64-AVX1: ## %bb.0: 997 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 998 ; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 999 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1000 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1001 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 1002 ; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1003 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1004 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1005 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax 1006 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1007 ; X64-AVX1-NEXT: vzeroupper 1008 ; X64-AVX1-NEXT: retq 1009 ; 1010 ; X64-AVX2-LABEL: test_reduce_v32i8: 1011 ; X64-AVX2: ## %bb.0: 1012 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1013 ; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1014 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1015 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1016 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 1017 ; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 1018 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1019 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1020 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax 1021 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1022 ; X64-AVX2-NEXT: vzeroupper 1023 ; X64-AVX2-NEXT: retq 1024 ; 1025 ; X64-AVX512-LABEL: test_reduce_v32i8: 1026 ; X64-AVX512: ## %bb.0: 1027 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1028 ; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1029 ; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1030 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1031 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 1032 ; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 1033 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1034 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1035 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax 1036 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1037 ; X64-AVX512-NEXT: vzeroupper 1038 ; X64-AVX512-NEXT: retq 1039 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1040 %2 = icmp slt <32 x i8> %a0, %1 1041 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1042 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1043 %5 = icmp slt <32 x i8> %3, %4 1044 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1045 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1046 %8 = icmp slt <32 x i8> %6, %7 1047 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1048 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1049 %11 = icmp slt <32 x i8> %9, %10 1050 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1051 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1052 %14 = icmp slt <32 x i8> %12, %13 1053 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1054 %16 = extractelement <32 x i8> %15, i32 0 1055 ret i8 %16 1056 } 1057 1058 ; 1059 ; 512-bit Vectors 1060 ; 1061 1062 define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1063 ; X86-SSE2-LABEL: test_reduce_v8i64: 1064 ; X86-SSE2: ## %bb.0: 1065 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 1066 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm5 1067 ; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1068 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1069 ; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1070 ; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1071 ; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1072 ; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1073 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1074 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1075 ; X86-SSE2-NEXT: pand %xmm5, %xmm6 1076 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1077 ; X86-SSE2-NEXT: por %xmm6, %xmm5 1078 ; X86-SSE2-NEXT: pand %xmm5, %xmm1 1079 ; X86-SSE2-NEXT: pandn %xmm3, %xmm5 1080 ; X86-SSE2-NEXT: por %xmm1, %xmm5 1081 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1082 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1083 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1084 ; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1085 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1086 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1087 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1088 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2] 1089 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1090 ; X86-SSE2-NEXT: pand %xmm1, %xmm3 1091 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] 1092 ; X86-SSE2-NEXT: por %xmm3, %xmm1 1093 ; X86-SSE2-NEXT: pand %xmm1, %xmm0 1094 ; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1095 ; X86-SSE2-NEXT: por %xmm0, %xmm1 1096 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1097 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1098 ; X86-SSE2-NEXT: movdqa %xmm5, %xmm2 1099 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1100 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1101 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1102 ; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1103 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1104 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1105 ; X86-SSE2-NEXT: pand %xmm0, %xmm2 1106 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1107 ; X86-SSE2-NEXT: por %xmm2, %xmm0 1108 ; X86-SSE2-NEXT: pand %xmm0, %xmm1 1109 ; X86-SSE2-NEXT: pandn %xmm5, %xmm0 1110 ; X86-SSE2-NEXT: por %xmm1, %xmm0 1111 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1112 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1113 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1114 ; X86-SSE2-NEXT: pxor %xmm1, %xmm4 1115 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm3 1116 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1117 ; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1118 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1119 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1120 ; X86-SSE2-NEXT: pand %xmm2, %xmm4 1121 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1122 ; X86-SSE2-NEXT: por %xmm4, %xmm2 1123 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 1124 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1125 ; X86-SSE2-NEXT: por %xmm0, %xmm2 1126 ; X86-SSE2-NEXT: movd %xmm2, %eax 1127 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1128 ; X86-SSE2-NEXT: movd %xmm0, %edx 1129 ; X86-SSE2-NEXT: retl 1130 ; 1131 ; X86-SSE42-LABEL: test_reduce_v8i64: 1132 ; X86-SSE42: ## %bb.0: 1133 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 1134 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 1135 ; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1136 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1137 ; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1138 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1139 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1140 ; X86-SSE42-NEXT: movapd %xmm3, %xmm0 1141 ; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1142 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1143 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1] 1144 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1145 ; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1146 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1147 ; X86-SSE42-NEXT: movd %xmm1, %eax 1148 ; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1149 ; X86-SSE42-NEXT: retl 1150 ; 1151 ; X86-AVX1-LABEL: test_reduce_v8i64: 1152 ; X86-AVX1: ## %bb.0: 1153 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1154 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1155 ; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1156 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 1157 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1158 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1159 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1160 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1161 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 1162 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1163 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1164 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1165 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1166 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1167 ; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 1168 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1169 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1170 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 1171 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1172 ; X86-AVX1-NEXT: vzeroupper 1173 ; X86-AVX1-NEXT: retl 1174 ; 1175 ; X86-AVX2-LABEL: test_reduce_v8i64: 1176 ; X86-AVX2: ## %bb.0: 1177 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1178 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1179 ; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1180 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1181 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1182 ; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1183 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1184 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1185 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 1186 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1187 ; X86-AVX2-NEXT: vzeroupper 1188 ; X86-AVX2-NEXT: retl 1189 ; 1190 ; X64-SSE2-LABEL: test_reduce_v8i64: 1191 ; X64-SSE2: ## %bb.0: 1192 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 1193 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm5 1194 ; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1195 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1196 ; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1197 ; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1198 ; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1199 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1200 ; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1201 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1202 ; X64-SSE2-NEXT: pand %xmm8, %xmm6 1203 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1204 ; X64-SSE2-NEXT: por %xmm6, %xmm5 1205 ; X64-SSE2-NEXT: pand %xmm5, %xmm1 1206 ; X64-SSE2-NEXT: pandn %xmm3, %xmm5 1207 ; X64-SSE2-NEXT: por %xmm1, %xmm5 1208 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1209 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1210 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1211 ; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1212 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1213 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1214 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1215 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1216 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1217 ; X64-SSE2-NEXT: pand %xmm7, %xmm1 1218 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1219 ; X64-SSE2-NEXT: por %xmm1, %xmm3 1220 ; X64-SSE2-NEXT: pand %xmm3, %xmm0 1221 ; X64-SSE2-NEXT: pandn %xmm2, %xmm3 1222 ; X64-SSE2-NEXT: por %xmm0, %xmm3 1223 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1224 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1225 ; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1226 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1227 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1228 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1229 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2] 1230 ; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1231 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1232 ; X64-SSE2-NEXT: pand %xmm6, %xmm0 1233 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1234 ; X64-SSE2-NEXT: por %xmm0, %xmm1 1235 ; X64-SSE2-NEXT: pand %xmm1, %xmm3 1236 ; X64-SSE2-NEXT: pandn %xmm5, %xmm1 1237 ; X64-SSE2-NEXT: por %xmm3, %xmm1 1238 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1239 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1240 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1241 ; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1242 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm3 1243 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1244 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1245 ; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1246 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1247 ; X64-SSE2-NEXT: pand %xmm5, %xmm2 1248 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1249 ; X64-SSE2-NEXT: por %xmm2, %xmm3 1250 ; X64-SSE2-NEXT: pand %xmm3, %xmm1 1251 ; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1252 ; X64-SSE2-NEXT: por %xmm1, %xmm3 1253 ; X64-SSE2-NEXT: movq %xmm3, %rax 1254 ; X64-SSE2-NEXT: retq 1255 ; 1256 ; X64-SSE42-LABEL: test_reduce_v8i64: 1257 ; X64-SSE42: ## %bb.0: 1258 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 1259 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 1260 ; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1261 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1262 ; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1263 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1264 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1265 ; X64-SSE42-NEXT: movapd %xmm3, %xmm0 1266 ; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1267 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1268 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1] 1269 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1270 ; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1271 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1272 ; X64-SSE42-NEXT: movq %xmm1, %rax 1273 ; X64-SSE42-NEXT: retq 1274 ; 1275 ; X64-AVX1-LABEL: test_reduce_v8i64: 1276 ; X64-AVX1: ## %bb.0: 1277 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1278 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1279 ; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1280 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 1281 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1282 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1283 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1284 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1285 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 1286 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1287 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1288 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1289 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1290 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1291 ; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 1292 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1293 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1294 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 1295 ; X64-AVX1-NEXT: vzeroupper 1296 ; X64-AVX1-NEXT: retq 1297 ; 1298 ; X64-AVX2-LABEL: test_reduce_v8i64: 1299 ; X64-AVX2: ## %bb.0: 1300 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1301 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1302 ; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1303 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1304 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1305 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1306 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1307 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1308 ; X64-AVX2-NEXT: vmovq %xmm0, %rax 1309 ; X64-AVX2-NEXT: vzeroupper 1310 ; X64-AVX2-NEXT: retq 1311 ; 1312 ; X64-AVX512-LABEL: test_reduce_v8i64: 1313 ; X64-AVX512: ## %bb.0: 1314 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1315 ; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 1316 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1317 ; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 1318 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1319 ; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 1320 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 1321 ; X64-AVX512-NEXT: vzeroupper 1322 ; X64-AVX512-NEXT: retq 1323 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1324 %2 = icmp slt <8 x i64> %a0, %1 1325 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1326 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1327 %5 = icmp slt <8 x i64> %3, %4 1328 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1329 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1330 %8 = icmp slt <8 x i64> %6, %7 1331 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1332 %10 = extractelement <8 x i64> %9, i32 0 1333 ret i64 %10 1334 } 1335 1336 define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1337 ; X86-SSE2-LABEL: test_reduce_v16i32: 1338 ; X86-SSE2: ## %bb.0: 1339 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 1340 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1341 ; X86-SSE2-NEXT: pand %xmm4, %xmm0 1342 ; X86-SSE2-NEXT: pandn %xmm2, %xmm4 1343 ; X86-SSE2-NEXT: por %xmm0, %xmm4 1344 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1345 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1346 ; X86-SSE2-NEXT: pand %xmm0, %xmm1 1347 ; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1348 ; X86-SSE2-NEXT: por %xmm1, %xmm0 1349 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1350 ; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1351 ; X86-SSE2-NEXT: pand %xmm1, %xmm4 1352 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1353 ; X86-SSE2-NEXT: por %xmm4, %xmm1 1354 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1355 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1356 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1357 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 1358 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1359 ; X86-SSE2-NEXT: por %xmm1, %xmm2 1360 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1361 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1362 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1363 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 1364 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1365 ; X86-SSE2-NEXT: por %xmm2, %xmm1 1366 ; X86-SSE2-NEXT: movd %xmm1, %eax 1367 ; X86-SSE2-NEXT: retl 1368 ; 1369 ; X86-SSE42-LABEL: test_reduce_v16i32: 1370 ; X86-SSE42: ## %bb.0: 1371 ; X86-SSE42-NEXT: pminsd %xmm3, %xmm1 1372 ; X86-SSE42-NEXT: pminsd %xmm2, %xmm0 1373 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 1374 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1375 ; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 1376 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1377 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 1378 ; X86-SSE42-NEXT: movd %xmm0, %eax 1379 ; X86-SSE42-NEXT: retl 1380 ; 1381 ; X86-AVX1-LABEL: test_reduce_v16i32: 1382 ; X86-AVX1: ## %bb.0: 1383 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1384 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1385 ; X86-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 1386 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1387 ; X86-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0 1388 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1389 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1390 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1391 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1392 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 1393 ; X86-AVX1-NEXT: vzeroupper 1394 ; X86-AVX1-NEXT: retl 1395 ; 1396 ; X86-AVX2-LABEL: test_reduce_v16i32: 1397 ; X86-AVX2: ## %bb.0: 1398 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1399 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1400 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1401 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1402 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1403 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1404 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1405 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 1406 ; X86-AVX2-NEXT: vzeroupper 1407 ; X86-AVX2-NEXT: retl 1408 ; 1409 ; X64-SSE2-LABEL: test_reduce_v16i32: 1410 ; X64-SSE2: ## %bb.0: 1411 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 1412 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1413 ; X64-SSE2-NEXT: pand %xmm4, %xmm0 1414 ; X64-SSE2-NEXT: pandn %xmm2, %xmm4 1415 ; X64-SSE2-NEXT: por %xmm0, %xmm4 1416 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1417 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1418 ; X64-SSE2-NEXT: pand %xmm0, %xmm1 1419 ; X64-SSE2-NEXT: pandn %xmm3, %xmm0 1420 ; X64-SSE2-NEXT: por %xmm1, %xmm0 1421 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1422 ; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1423 ; X64-SSE2-NEXT: pand %xmm1, %xmm4 1424 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1425 ; X64-SSE2-NEXT: por %xmm4, %xmm1 1426 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1427 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1428 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1429 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 1430 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1431 ; X64-SSE2-NEXT: por %xmm1, %xmm2 1432 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1433 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1434 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1435 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 1436 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1437 ; X64-SSE2-NEXT: por %xmm2, %xmm1 1438 ; X64-SSE2-NEXT: movd %xmm1, %eax 1439 ; X64-SSE2-NEXT: retq 1440 ; 1441 ; X64-SSE42-LABEL: test_reduce_v16i32: 1442 ; X64-SSE42: ## %bb.0: 1443 ; X64-SSE42-NEXT: pminsd %xmm3, %xmm1 1444 ; X64-SSE42-NEXT: pminsd %xmm2, %xmm0 1445 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 1446 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1447 ; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 1448 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1449 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 1450 ; X64-SSE42-NEXT: movd %xmm0, %eax 1451 ; X64-SSE42-NEXT: retq 1452 ; 1453 ; X64-AVX1-LABEL: test_reduce_v16i32: 1454 ; X64-AVX1: ## %bb.0: 1455 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1456 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1457 ; X64-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 1458 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1459 ; X64-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0 1460 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1461 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1462 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1463 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1464 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 1465 ; X64-AVX1-NEXT: vzeroupper 1466 ; X64-AVX1-NEXT: retq 1467 ; 1468 ; X64-AVX2-LABEL: test_reduce_v16i32: 1469 ; X64-AVX2: ## %bb.0: 1470 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1471 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1472 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1473 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1474 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1475 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1476 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1477 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 1478 ; X64-AVX2-NEXT: vzeroupper 1479 ; X64-AVX2-NEXT: retq 1480 ; 1481 ; X64-AVX512-LABEL: test_reduce_v16i32: 1482 ; X64-AVX512: ## %bb.0: 1483 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1484 ; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0 1485 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1486 ; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0 1487 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1488 ; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0 1489 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1490 ; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0 1491 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 1492 ; X64-AVX512-NEXT: vzeroupper 1493 ; X64-AVX512-NEXT: retq 1494 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1495 %2 = icmp slt <16 x i32> %a0, %1 1496 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1497 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1498 %5 = icmp slt <16 x i32> %3, %4 1499 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1500 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1501 %8 = icmp slt <16 x i32> %6, %7 1502 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1503 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1504 %11 = icmp slt <16 x i32> %9, %10 1505 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1506 %13 = extractelement <16 x i32> %12, i32 0 1507 ret i32 %13 1508 } 1509 1510 define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1511 ; X86-SSE2-LABEL: test_reduce_v32i16: 1512 ; X86-SSE2: ## %bb.0: 1513 ; X86-SSE2-NEXT: pminsw %xmm3, %xmm1 1514 ; X86-SSE2-NEXT: pminsw %xmm2, %xmm0 1515 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1516 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1517 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1518 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1519 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1520 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1521 ; X86-SSE2-NEXT: psrld $16, %xmm1 1522 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1523 ; X86-SSE2-NEXT: movd %xmm1, %eax 1524 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1525 ; X86-SSE2-NEXT: retl 1526 ; 1527 ; X86-SSE42-LABEL: test_reduce_v32i16: 1528 ; X86-SSE42: ## %bb.0: 1529 ; X86-SSE42-NEXT: pminsw %xmm3, %xmm1 1530 ; X86-SSE42-NEXT: pminsw %xmm2, %xmm0 1531 ; X86-SSE42-NEXT: pminsw %xmm1, %xmm0 1532 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 1533 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1534 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1535 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1536 ; X86-SSE42-NEXT: movd %xmm0, %eax 1537 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1538 ; X86-SSE42-NEXT: retl 1539 ; 1540 ; X86-AVX1-LABEL: test_reduce_v32i16: 1541 ; X86-AVX1: ## %bb.0: 1542 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1543 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1544 ; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 1545 ; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1546 ; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 1547 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 1548 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1549 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1550 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1551 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 1552 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1553 ; X86-AVX1-NEXT: vzeroupper 1554 ; X86-AVX1-NEXT: retl 1555 ; 1556 ; X86-AVX2-LABEL: test_reduce_v32i16: 1557 ; X86-AVX2: ## %bb.0: 1558 ; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1559 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1560 ; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1561 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 1562 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1563 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1564 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1565 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 1566 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1567 ; X86-AVX2-NEXT: vzeroupper 1568 ; X86-AVX2-NEXT: retl 1569 ; 1570 ; X64-SSE2-LABEL: test_reduce_v32i16: 1571 ; X64-SSE2: ## %bb.0: 1572 ; X64-SSE2-NEXT: pminsw %xmm3, %xmm1 1573 ; X64-SSE2-NEXT: pminsw %xmm2, %xmm0 1574 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1575 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1576 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1577 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1578 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1579 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1580 ; X64-SSE2-NEXT: psrld $16, %xmm1 1581 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1582 ; X64-SSE2-NEXT: movd %xmm1, %eax 1583 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1584 ; X64-SSE2-NEXT: retq 1585 ; 1586 ; X64-SSE42-LABEL: test_reduce_v32i16: 1587 ; X64-SSE42: ## %bb.0: 1588 ; X64-SSE42-NEXT: pminsw %xmm3, %xmm1 1589 ; X64-SSE42-NEXT: pminsw %xmm2, %xmm0 1590 ; X64-SSE42-NEXT: pminsw %xmm1, %xmm0 1591 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 1592 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1593 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1594 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1595 ; X64-SSE42-NEXT: movd %xmm0, %eax 1596 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1597 ; X64-SSE42-NEXT: retq 1598 ; 1599 ; X64-AVX1-LABEL: test_reduce_v32i16: 1600 ; X64-AVX1: ## %bb.0: 1601 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1602 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1603 ; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 1604 ; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1605 ; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 1606 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 1607 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1608 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1609 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1610 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 1611 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1612 ; X64-AVX1-NEXT: vzeroupper 1613 ; X64-AVX1-NEXT: retq 1614 ; 1615 ; X64-AVX2-LABEL: test_reduce_v32i16: 1616 ; X64-AVX2: ## %bb.0: 1617 ; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1618 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1619 ; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1620 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 1621 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1622 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1623 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1624 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 1625 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1626 ; X64-AVX2-NEXT: vzeroupper 1627 ; X64-AVX2-NEXT: retq 1628 ; 1629 ; X64-AVX512-LABEL: test_reduce_v32i16: 1630 ; X64-AVX512: ## %bb.0: 1631 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1632 ; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1633 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1634 ; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1635 ; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] 1636 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1637 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1638 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1639 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 1640 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1641 ; X64-AVX512-NEXT: vzeroupper 1642 ; X64-AVX512-NEXT: retq 1643 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1644 %2 = icmp slt <32 x i16> %a0, %1 1645 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1646 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1647 %5 = icmp slt <32 x i16> %3, %4 1648 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1649 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1650 %8 = icmp slt <32 x i16> %6, %7 1651 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1652 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1653 %11 = icmp slt <32 x i16> %9, %10 1654 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1655 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1656 %14 = icmp slt <32 x i16> %12, %13 1657 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1658 %16 = extractelement <32 x i16> %15, i32 0 1659 ret i16 %16 1660 } 1661 1662 define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1663 ; X86-SSE2-LABEL: test_reduce_v64i8: 1664 ; X86-SSE2: ## %bb.0: 1665 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 1666 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm4 1667 ; X86-SSE2-NEXT: pand %xmm4, %xmm0 1668 ; X86-SSE2-NEXT: pandn %xmm2, %xmm4 1669 ; X86-SSE2-NEXT: por %xmm0, %xmm4 1670 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1671 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 1672 ; X86-SSE2-NEXT: pand %xmm0, %xmm1 1673 ; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1674 ; X86-SSE2-NEXT: por %xmm1, %xmm0 1675 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1676 ; X86-SSE2-NEXT: pcmpgtb %xmm4, %xmm1 1677 ; X86-SSE2-NEXT: pand %xmm1, %xmm4 1678 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1679 ; X86-SSE2-NEXT: por %xmm4, %xmm1 1680 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1681 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1682 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1683 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 1684 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1685 ; X86-SSE2-NEXT: por %xmm1, %xmm2 1686 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1687 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1688 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1689 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 1690 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1691 ; X86-SSE2-NEXT: por %xmm2, %xmm1 1692 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1693 ; X86-SSE2-NEXT: psrld $16, %xmm0 1694 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1695 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1696 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 1697 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1698 ; X86-SSE2-NEXT: por %xmm1, %xmm2 1699 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 1700 ; X86-SSE2-NEXT: psrlw $8, %xmm0 1701 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1702 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1703 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 1704 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1705 ; X86-SSE2-NEXT: por %xmm2, %xmm1 1706 ; X86-SSE2-NEXT: movd %xmm1, %eax 1707 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1708 ; X86-SSE2-NEXT: retl 1709 ; 1710 ; X86-SSE42-LABEL: test_reduce_v64i8: 1711 ; X86-SSE42: ## %bb.0: 1712 ; X86-SSE42-NEXT: pminsb %xmm3, %xmm1 1713 ; X86-SSE42-NEXT: pminsb %xmm2, %xmm0 1714 ; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 1715 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1716 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1717 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 1718 ; X86-SSE42-NEXT: psrlw $8, %xmm2 1719 ; X86-SSE42-NEXT: pminub %xmm0, %xmm2 1720 ; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 1721 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1722 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax 1723 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1724 ; X86-SSE42-NEXT: retl 1725 ; 1726 ; X86-AVX1-LABEL: test_reduce_v64i8: 1727 ; X86-AVX1: ## %bb.0: 1728 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1729 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1730 ; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 1731 ; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1732 ; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 1733 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1734 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1735 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 1736 ; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1737 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1738 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1739 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax 1740 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1741 ; X86-AVX1-NEXT: vzeroupper 1742 ; X86-AVX1-NEXT: retl 1743 ; 1744 ; X86-AVX2-LABEL: test_reduce_v64i8: 1745 ; X86-AVX2: ## %bb.0: 1746 ; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1747 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1748 ; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1749 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1750 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1751 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 1752 ; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 1753 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1754 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1755 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax 1756 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1757 ; X86-AVX2-NEXT: vzeroupper 1758 ; X86-AVX2-NEXT: retl 1759 ; 1760 ; X64-SSE2-LABEL: test_reduce_v64i8: 1761 ; X64-SSE2: ## %bb.0: 1762 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 1763 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm4 1764 ; X64-SSE2-NEXT: pand %xmm4, %xmm0 1765 ; X64-SSE2-NEXT: pandn %xmm2, %xmm4 1766 ; X64-SSE2-NEXT: por %xmm0, %xmm4 1767 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1768 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 1769 ; X64-SSE2-NEXT: pand %xmm0, %xmm1 1770 ; X64-SSE2-NEXT: pandn %xmm3, %xmm0 1771 ; X64-SSE2-NEXT: por %xmm1, %xmm0 1772 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1773 ; X64-SSE2-NEXT: pcmpgtb %xmm4, %xmm1 1774 ; X64-SSE2-NEXT: pand %xmm1, %xmm4 1775 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1776 ; X64-SSE2-NEXT: por %xmm4, %xmm1 1777 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1778 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1779 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1780 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 1781 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1782 ; X64-SSE2-NEXT: por %xmm1, %xmm2 1783 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1784 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1785 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1786 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 1787 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1788 ; X64-SSE2-NEXT: por %xmm2, %xmm1 1789 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1790 ; X64-SSE2-NEXT: psrld $16, %xmm0 1791 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1792 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1793 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 1794 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1795 ; X64-SSE2-NEXT: por %xmm1, %xmm2 1796 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1797 ; X64-SSE2-NEXT: psrlw $8, %xmm0 1798 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1799 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1800 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 1801 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1802 ; X64-SSE2-NEXT: por %xmm2, %xmm1 1803 ; X64-SSE2-NEXT: movd %xmm1, %eax 1804 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1805 ; X64-SSE2-NEXT: retq 1806 ; 1807 ; X64-SSE42-LABEL: test_reduce_v64i8: 1808 ; X64-SSE42: ## %bb.0: 1809 ; X64-SSE42-NEXT: pminsb %xmm3, %xmm1 1810 ; X64-SSE42-NEXT: pminsb %xmm2, %xmm0 1811 ; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 1812 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1813 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1814 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 1815 ; X64-SSE42-NEXT: psrlw $8, %xmm2 1816 ; X64-SSE42-NEXT: pminub %xmm0, %xmm2 1817 ; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 1818 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1819 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax 1820 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1821 ; X64-SSE42-NEXT: retq 1822 ; 1823 ; X64-AVX1-LABEL: test_reduce_v64i8: 1824 ; X64-AVX1: ## %bb.0: 1825 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1826 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1827 ; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 1828 ; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1829 ; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 1830 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1831 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1832 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 1833 ; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1834 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1835 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1836 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax 1837 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1838 ; X64-AVX1-NEXT: vzeroupper 1839 ; X64-AVX1-NEXT: retq 1840 ; 1841 ; X64-AVX2-LABEL: test_reduce_v64i8: 1842 ; X64-AVX2: ## %bb.0: 1843 ; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1844 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1845 ; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1846 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1847 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1848 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 1849 ; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 1850 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1851 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1852 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax 1853 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1854 ; X64-AVX2-NEXT: vzeroupper 1855 ; X64-AVX2-NEXT: retq 1856 ; 1857 ; X64-AVX512-LABEL: test_reduce_v64i8: 1858 ; X64-AVX512: ## %bb.0: 1859 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1860 ; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1861 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1862 ; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1863 ; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] 1864 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1865 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 1866 ; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 1867 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1868 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1869 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax 1870 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1871 ; X64-AVX512-NEXT: vzeroupper 1872 ; X64-AVX512-NEXT: retq 1873 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1874 %2 = icmp slt <64 x i8> %a0, %1 1875 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1876 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1877 %5 = icmp slt <64 x i8> %3, %4 1878 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1879 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1880 %8 = icmp slt <64 x i8> %6, %7 1881 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1882 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1883 %11 = icmp slt <64 x i8> %9, %10 1884 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1885 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1886 %14 = icmp slt <64 x i8> %12, %13 1887 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1888 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1889 %17 = icmp slt <64 x i8> %15, %16 1890 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1891 %19 = extractelement <64 x i8> %18, i32 0 1892 ret i8 %19 1893 } 1894