1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2 3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE42 4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1 5 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2 6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2 7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE42 8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 10 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512 11 12 ; 13 ; 128-bit Vectors 14 ; 15 16 define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17 ; X86-SSE2-LABEL: test_reduce_v2i64: 18 ; X86-SSE2: ## %bb.0: 19 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 20 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 21 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23 ; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 25 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 26 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29 ; X86-SSE2-NEXT: pand %xmm5, %xmm2 30 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31 ; X86-SSE2-NEXT: por %xmm2, %xmm3 32 ; X86-SSE2-NEXT: pand %xmm3, %xmm0 33 ; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34 ; X86-SSE2-NEXT: por %xmm0, %xmm3 35 ; X86-SSE2-NEXT: movd %xmm3, %eax 36 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 37 ; X86-SSE2-NEXT: movd %xmm0, %edx 38 ; X86-SSE2-NEXT: retl 39 ; 40 ; X86-SSE42-LABEL: test_reduce_v2i64: 41 ; X86-SSE42: ## %bb.0: 42 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 44 ; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 45 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 46 ; X86-SSE42-NEXT: movd %xmm2, %eax 47 ; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 48 ; X86-SSE42-NEXT: retl 49 ; 50 ; X86-AVX-LABEL: test_reduce_v2i64: 51 ; X86-AVX: ## %bb.0: 52 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 53 ; X86-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 54 ; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 55 ; X86-AVX-NEXT: vmovd %xmm0, %eax 56 ; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx 57 ; X86-AVX-NEXT: retl 58 ; 59 ; X64-SSE2-LABEL: test_reduce_v2i64: 60 ; X64-SSE2: ## %bb.0: 61 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 62 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 63 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 64 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3 65 ; X64-SSE2-NEXT: pxor %xmm1, %xmm2 66 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm4 67 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 68 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 69 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 70 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 71 ; X64-SSE2-NEXT: pand %xmm5, %xmm2 72 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 73 ; X64-SSE2-NEXT: por %xmm2, %xmm3 74 ; X64-SSE2-NEXT: pand %xmm3, %xmm0 75 ; X64-SSE2-NEXT: pandn %xmm1, %xmm3 76 ; X64-SSE2-NEXT: por %xmm0, %xmm3 77 ; X64-SSE2-NEXT: movq %xmm3, %rax 78 ; X64-SSE2-NEXT: retq 79 ; 80 ; X64-SSE42-LABEL: test_reduce_v2i64: 81 ; X64-SSE42: ## %bb.0: 82 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 83 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 84 ; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 85 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 86 ; X64-SSE42-NEXT: movq %xmm2, %rax 87 ; X64-SSE42-NEXT: retq 88 ; 89 ; X64-AVX1-LABEL: test_reduce_v2i64: 90 ; X64-AVX1: ## %bb.0: 91 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 92 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 93 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 94 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 95 ; X64-AVX1-NEXT: retq 96 ; 97 ; X64-AVX2-LABEL: test_reduce_v2i64: 98 ; X64-AVX2: ## %bb.0: 99 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 100 ; X64-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 101 ; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 102 ; X64-AVX2-NEXT: vmovq %xmm0, %rax 103 ; X64-AVX2-NEXT: retq 104 ; 105 ; X64-AVX512-LABEL: test_reduce_v2i64: 106 ; X64-AVX512: ## %bb.0: 107 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 108 ; X64-AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 109 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 110 ; X64-AVX512-NEXT: retq 111 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 112 %2 = icmp sgt <2 x i64> %a0, %1 113 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 114 %4 = extractelement <2 x i64> %3, i32 0 115 ret i64 %4 116 } 117 118 define i32 @test_reduce_v4i32(<4 x i32> %a0) { 119 ; X86-SSE2-LABEL: test_reduce_v4i32: 120 ; X86-SSE2: ## %bb.0: 121 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 122 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 123 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 124 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 125 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 126 ; X86-SSE2-NEXT: por %xmm0, %xmm2 127 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 128 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 129 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 130 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 131 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 132 ; X86-SSE2-NEXT: por %xmm2, %xmm1 133 ; X86-SSE2-NEXT: movd %xmm1, %eax 134 ; X86-SSE2-NEXT: retl 135 ; 136 ; X86-SSE42-LABEL: test_reduce_v4i32: 137 ; X86-SSE42: ## %bb.0: 138 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 139 ; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 140 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 141 ; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 142 ; X86-SSE42-NEXT: movd %xmm0, %eax 143 ; X86-SSE42-NEXT: retl 144 ; 145 ; X86-AVX-LABEL: test_reduce_v4i32: 146 ; X86-AVX: ## %bb.0: 147 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 148 ; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 149 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 150 ; X86-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 151 ; X86-AVX-NEXT: vmovd %xmm0, %eax 152 ; X86-AVX-NEXT: retl 153 ; 154 ; X64-SSE2-LABEL: test_reduce_v4i32: 155 ; X64-SSE2: ## %bb.0: 156 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 157 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 158 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 159 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 160 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 161 ; X64-SSE2-NEXT: por %xmm0, %xmm2 162 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 163 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 164 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 165 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 166 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 167 ; X64-SSE2-NEXT: por %xmm2, %xmm1 168 ; X64-SSE2-NEXT: movd %xmm1, %eax 169 ; X64-SSE2-NEXT: retq 170 ; 171 ; X64-SSE42-LABEL: test_reduce_v4i32: 172 ; X64-SSE42: ## %bb.0: 173 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 174 ; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 175 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 176 ; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 177 ; X64-SSE42-NEXT: movd %xmm0, %eax 178 ; X64-SSE42-NEXT: retq 179 ; 180 ; X64-AVX-LABEL: test_reduce_v4i32: 181 ; X64-AVX: ## %bb.0: 182 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 183 ; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 184 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 185 ; X64-AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 186 ; X64-AVX-NEXT: vmovd %xmm0, %eax 187 ; X64-AVX-NEXT: retq 188 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 189 %2 = icmp sgt <4 x i32> %a0, %1 190 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 191 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 192 %5 = icmp sgt <4 x i32> %3, %4 193 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 194 %7 = extractelement <4 x i32> %6, i32 0 195 ret i32 %7 196 } 197 198 define i16 @test_reduce_v8i16(<8 x i16> %a0) { 199 ; X86-SSE2-LABEL: test_reduce_v8i16: 200 ; X86-SSE2: ## %bb.0: 201 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 202 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 203 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 204 ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 205 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 206 ; X86-SSE2-NEXT: psrld $16, %xmm1 207 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 208 ; X86-SSE2-NEXT: movd %xmm1, %eax 209 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 210 ; X86-SSE2-NEXT: retl 211 ; 212 ; X86-SSE42-LABEL: test_reduce_v8i16: 213 ; X86-SSE42: ## %bb.0: 214 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 215 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 216 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 217 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 218 ; X86-SSE42-NEXT: movd %xmm0, %eax 219 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 220 ; X86-SSE42-NEXT: retl 221 ; 222 ; X86-AVX-LABEL: test_reduce_v8i16: 223 ; X86-AVX: ## %bb.0: 224 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 225 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 226 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 227 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 228 ; X86-AVX-NEXT: vmovd %xmm0, %eax 229 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 230 ; X86-AVX-NEXT: retl 231 ; 232 ; X64-SSE2-LABEL: test_reduce_v8i16: 233 ; X64-SSE2: ## %bb.0: 234 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 235 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 236 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 237 ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 238 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 239 ; X64-SSE2-NEXT: psrld $16, %xmm1 240 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 241 ; X64-SSE2-NEXT: movd %xmm1, %eax 242 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 243 ; X64-SSE2-NEXT: retq 244 ; 245 ; X64-SSE42-LABEL: test_reduce_v8i16: 246 ; X64-SSE42: ## %bb.0: 247 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 248 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 249 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 250 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 251 ; X64-SSE42-NEXT: movd %xmm0, %eax 252 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 253 ; X64-SSE42-NEXT: retq 254 ; 255 ; X64-AVX-LABEL: test_reduce_v8i16: 256 ; X64-AVX: ## %bb.0: 257 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 258 ; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 259 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 260 ; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 261 ; X64-AVX-NEXT: vmovd %xmm0, %eax 262 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 263 ; X64-AVX-NEXT: retq 264 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 265 %2 = icmp sgt <8 x i16> %a0, %1 266 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 267 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 268 %5 = icmp sgt <8 x i16> %3, %4 269 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 270 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 271 %8 = icmp sgt <8 x i16> %6, %7 272 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 273 %10 = extractelement <8 x i16> %9, i32 0 274 ret i16 %10 275 } 276 277 define i8 @test_reduce_v16i8(<16 x i8> %a0) { 278 ; X86-SSE2-LABEL: test_reduce_v16i8: 279 ; X86-SSE2: ## %bb.0: 280 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 281 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 282 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 283 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 284 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 285 ; X86-SSE2-NEXT: por %xmm0, %xmm2 286 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 287 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 288 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 289 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 290 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 291 ; X86-SSE2-NEXT: por %xmm2, %xmm1 292 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 293 ; X86-SSE2-NEXT: psrld $16, %xmm0 294 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 295 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 296 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 297 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 298 ; X86-SSE2-NEXT: por %xmm1, %xmm2 299 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 300 ; X86-SSE2-NEXT: psrlw $8, %xmm0 301 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 302 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 303 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 304 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 305 ; X86-SSE2-NEXT: por %xmm2, %xmm1 306 ; X86-SSE2-NEXT: movd %xmm1, %eax 307 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 308 ; X86-SSE2-NEXT: retl 309 ; 310 ; X86-SSE42-LABEL: test_reduce_v16i8: 311 ; X86-SSE42: ## %bb.0: 312 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 313 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 314 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 315 ; X86-SSE42-NEXT: psrlw $8, %xmm2 316 ; X86-SSE42-NEXT: pminub %xmm0, %xmm2 317 ; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 318 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 319 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax 320 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 321 ; X86-SSE42-NEXT: retl 322 ; 323 ; X86-AVX-LABEL: test_reduce_v16i8: 324 ; X86-AVX: ## %bb.0: 325 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 326 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 327 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 328 ; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 329 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 330 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 331 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax 332 ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 333 ; X86-AVX-NEXT: retl 334 ; 335 ; X64-SSE2-LABEL: test_reduce_v16i8: 336 ; X64-SSE2: ## %bb.0: 337 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 338 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 339 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 340 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 341 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 342 ; X64-SSE2-NEXT: por %xmm0, %xmm2 343 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 344 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 345 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 346 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 347 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 348 ; X64-SSE2-NEXT: por %xmm2, %xmm1 349 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 350 ; X64-SSE2-NEXT: psrld $16, %xmm0 351 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 352 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 353 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 354 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 355 ; X64-SSE2-NEXT: por %xmm1, %xmm2 356 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 357 ; X64-SSE2-NEXT: psrlw $8, %xmm0 358 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 359 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 360 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 361 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 362 ; X64-SSE2-NEXT: por %xmm2, %xmm1 363 ; X64-SSE2-NEXT: movd %xmm1, %eax 364 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 365 ; X64-SSE2-NEXT: retq 366 ; 367 ; X64-SSE42-LABEL: test_reduce_v16i8: 368 ; X64-SSE42: ## %bb.0: 369 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 370 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 371 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 372 ; X64-SSE42-NEXT: psrlw $8, %xmm2 373 ; X64-SSE42-NEXT: pminub %xmm0, %xmm2 374 ; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 375 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 376 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax 377 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 378 ; X64-SSE42-NEXT: retq 379 ; 380 ; X64-AVX-LABEL: test_reduce_v16i8: 381 ; X64-AVX: ## %bb.0: 382 ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 383 ; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 384 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 385 ; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 386 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 387 ; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 388 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax 389 ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 390 ; X64-AVX-NEXT: retq 391 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 392 %2 = icmp sgt <16 x i8> %a0, %1 393 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 394 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 395 %5 = icmp sgt <16 x i8> %3, %4 396 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 397 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 398 %8 = icmp sgt <16 x i8> %6, %7 399 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 400 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 401 %11 = icmp sgt <16 x i8> %9, %10 402 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 403 %13 = extractelement <16 x i8> %12, i32 0 404 ret i8 %13 405 } 406 407 ; 408 ; 256-bit Vectors 409 ; 410 411 define i64 @test_reduce_v4i64(<4 x i64> %a0) { 412 ; X86-SSE2-LABEL: test_reduce_v4i64: 413 ; X86-SSE2: ## %bb.0: 414 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 415 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 416 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3 417 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 418 ; X86-SSE2-NEXT: pxor %xmm2, %xmm4 419 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 420 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 421 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 422 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 423 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 424 ; X86-SSE2-NEXT: pand %xmm6, %xmm3 425 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 426 ; X86-SSE2-NEXT: por %xmm3, %xmm4 427 ; X86-SSE2-NEXT: pand %xmm4, %xmm0 428 ; X86-SSE2-NEXT: pandn %xmm1, %xmm4 429 ; X86-SSE2-NEXT: por %xmm0, %xmm4 430 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1] 431 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 432 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 433 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2 434 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 435 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 436 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 437 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 438 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 439 ; X86-SSE2-NEXT: pand %xmm5, %xmm1 440 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 441 ; X86-SSE2-NEXT: por %xmm1, %xmm2 442 ; X86-SSE2-NEXT: pand %xmm2, %xmm4 443 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 444 ; X86-SSE2-NEXT: por %xmm4, %xmm2 445 ; X86-SSE2-NEXT: movd %xmm2, %eax 446 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 447 ; X86-SSE2-NEXT: movd %xmm0, %edx 448 ; X86-SSE2-NEXT: retl 449 ; 450 ; X86-SSE42-LABEL: test_reduce_v4i64: 451 ; X86-SSE42: ## %bb.0: 452 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 453 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 454 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 455 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 456 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 457 ; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 458 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 459 ; X86-SSE42-NEXT: movd %xmm2, %eax 460 ; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 461 ; X86-SSE42-NEXT: retl 462 ; 463 ; X86-AVX1-LABEL: test_reduce_v4i64: 464 ; X86-AVX1: ## %bb.0: 465 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 466 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 467 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 468 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 469 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 470 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 471 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 472 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 473 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 474 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 475 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 476 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 477 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 478 ; X86-AVX1-NEXT: vzeroupper 479 ; X86-AVX1-NEXT: retl 480 ; 481 ; X86-AVX2-LABEL: test_reduce_v4i64: 482 ; X86-AVX2: ## %bb.0: 483 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 484 ; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 485 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 486 ; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 487 ; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 488 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 489 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 490 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 491 ; X86-AVX2-NEXT: vzeroupper 492 ; X86-AVX2-NEXT: retl 493 ; 494 ; X64-SSE2-LABEL: test_reduce_v4i64: 495 ; X64-SSE2: ## %bb.0: 496 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 497 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 498 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3 499 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm4 500 ; X64-SSE2-NEXT: pxor %xmm2, %xmm4 501 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 502 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 503 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 504 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 505 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 506 ; X64-SSE2-NEXT: pand %xmm6, %xmm3 507 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 508 ; X64-SSE2-NEXT: por %xmm3, %xmm4 509 ; X64-SSE2-NEXT: pand %xmm4, %xmm0 510 ; X64-SSE2-NEXT: pandn %xmm1, %xmm4 511 ; X64-SSE2-NEXT: por %xmm0, %xmm4 512 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1] 513 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 514 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 515 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2 516 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 517 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 518 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 519 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 520 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 521 ; X64-SSE2-NEXT: pand %xmm5, %xmm1 522 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 523 ; X64-SSE2-NEXT: por %xmm1, %xmm2 524 ; X64-SSE2-NEXT: pand %xmm2, %xmm4 525 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 526 ; X64-SSE2-NEXT: por %xmm4, %xmm2 527 ; X64-SSE2-NEXT: movq %xmm2, %rax 528 ; X64-SSE2-NEXT: retq 529 ; 530 ; X64-SSE42-LABEL: test_reduce_v4i64: 531 ; X64-SSE42: ## %bb.0: 532 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 533 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 534 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 535 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 536 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 537 ; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 538 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 539 ; X64-SSE42-NEXT: movq %xmm2, %rax 540 ; X64-SSE42-NEXT: retq 541 ; 542 ; X64-AVX1-LABEL: test_reduce_v4i64: 543 ; X64-AVX1: ## %bb.0: 544 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 545 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 546 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 547 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 548 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 549 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 550 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 551 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 552 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 553 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 554 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 555 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 556 ; X64-AVX1-NEXT: vzeroupper 557 ; X64-AVX1-NEXT: retq 558 ; 559 ; X64-AVX2-LABEL: test_reduce_v4i64: 560 ; X64-AVX2: ## %bb.0: 561 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 562 ; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 563 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 564 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 565 ; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 566 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 567 ; X64-AVX2-NEXT: vmovq %xmm0, %rax 568 ; X64-AVX2-NEXT: vzeroupper 569 ; X64-AVX2-NEXT: retq 570 ; 571 ; X64-AVX512-LABEL: test_reduce_v4i64: 572 ; X64-AVX512: ## %bb.0: 573 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 574 ; X64-AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 575 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 576 ; X64-AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 577 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 578 ; X64-AVX512-NEXT: vzeroupper 579 ; X64-AVX512-NEXT: retq 580 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 581 %2 = icmp sgt <4 x i64> %a0, %1 582 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 583 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 584 %5 = icmp sgt <4 x i64> %3, %4 585 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 586 %7 = extractelement <4 x i64> %6, i32 0 587 ret i64 %7 588 } 589 590 define i32 @test_reduce_v8i32(<8 x i32> %a0) { 591 ; X86-SSE2-LABEL: test_reduce_v8i32: 592 ; X86-SSE2: ## %bb.0: 593 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 594 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 595 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 596 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 597 ; X86-SSE2-NEXT: por %xmm0, %xmm2 598 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 599 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 600 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 601 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 602 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 603 ; X86-SSE2-NEXT: por %xmm2, %xmm1 604 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 605 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 606 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 607 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 608 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 609 ; X86-SSE2-NEXT: por %xmm1, %xmm2 610 ; X86-SSE2-NEXT: movd %xmm2, %eax 611 ; X86-SSE2-NEXT: retl 612 ; 613 ; X86-SSE42-LABEL: test_reduce_v8i32: 614 ; X86-SSE42: ## %bb.0: 615 ; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 616 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 617 ; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 618 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 619 ; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 620 ; X86-SSE42-NEXT: movd %xmm0, %eax 621 ; X86-SSE42-NEXT: retl 622 ; 623 ; X86-AVX1-LABEL: test_reduce_v8i32: 624 ; X86-AVX1: ## %bb.0: 625 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 626 ; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 627 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 628 ; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 629 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 630 ; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 631 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 632 ; X86-AVX1-NEXT: vzeroupper 633 ; X86-AVX1-NEXT: retl 634 ; 635 ; X86-AVX2-LABEL: test_reduce_v8i32: 636 ; X86-AVX2: ## %bb.0: 637 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 638 ; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 639 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 640 ; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 641 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 642 ; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 643 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 644 ; X86-AVX2-NEXT: vzeroupper 645 ; X86-AVX2-NEXT: retl 646 ; 647 ; X64-SSE2-LABEL: test_reduce_v8i32: 648 ; X64-SSE2: ## %bb.0: 649 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 650 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 651 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 652 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 653 ; X64-SSE2-NEXT: por %xmm0, %xmm2 654 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 655 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 656 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 657 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 658 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 659 ; X64-SSE2-NEXT: por %xmm2, %xmm1 660 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 661 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 662 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 663 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 664 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 665 ; X64-SSE2-NEXT: por %xmm1, %xmm2 666 ; X64-SSE2-NEXT: movd %xmm2, %eax 667 ; X64-SSE2-NEXT: retq 668 ; 669 ; X64-SSE42-LABEL: test_reduce_v8i32: 670 ; X64-SSE42: ## %bb.0: 671 ; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 672 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 673 ; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 674 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 675 ; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 676 ; X64-SSE42-NEXT: movd %xmm0, %eax 677 ; X64-SSE42-NEXT: retq 678 ; 679 ; X64-AVX1-LABEL: test_reduce_v8i32: 680 ; X64-AVX1: ## %bb.0: 681 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 682 ; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 683 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 684 ; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 685 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 686 ; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 687 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 688 ; X64-AVX1-NEXT: vzeroupper 689 ; X64-AVX1-NEXT: retq 690 ; 691 ; X64-AVX2-LABEL: test_reduce_v8i32: 692 ; X64-AVX2: ## %bb.0: 693 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 694 ; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 695 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 696 ; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 697 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 698 ; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 699 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 700 ; X64-AVX2-NEXT: vzeroupper 701 ; X64-AVX2-NEXT: retq 702 ; 703 ; X64-AVX512-LABEL: test_reduce_v8i32: 704 ; X64-AVX512: ## %bb.0: 705 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 706 ; X64-AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 707 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 708 ; X64-AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 709 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 710 ; X64-AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 711 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 712 ; X64-AVX512-NEXT: vzeroupper 713 ; X64-AVX512-NEXT: retq 714 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 715 %2 = icmp sgt <8 x i32> %a0, %1 716 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 717 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 718 %5 = icmp sgt <8 x i32> %3, %4 719 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 720 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 721 %8 = icmp sgt <8 x i32> %6, %7 722 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 723 %10 = extractelement <8 x i32> %9, i32 0 724 ret i32 %10 725 } 726 727 define i16 @test_reduce_v16i16(<16 x i16> %a0) { 728 ; X86-SSE2-LABEL: test_reduce_v16i16: 729 ; X86-SSE2: ## %bb.0: 730 ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 731 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 732 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 733 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 734 ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 735 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 736 ; X86-SSE2-NEXT: psrld $16, %xmm1 737 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 738 ; X86-SSE2-NEXT: movd %xmm1, %eax 739 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 740 ; X86-SSE2-NEXT: retl 741 ; 742 ; X86-SSE42-LABEL: test_reduce_v16i16: 743 ; X86-SSE42: ## %bb.0: 744 ; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0 745 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 746 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 747 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 748 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 749 ; X86-SSE42-NEXT: movd %xmm0, %eax 750 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 751 ; X86-SSE42-NEXT: retl 752 ; 753 ; X86-AVX1-LABEL: test_reduce_v16i16: 754 ; X86-AVX1: ## %bb.0: 755 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 756 ; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 757 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 758 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 759 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 760 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 761 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 762 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 763 ; X86-AVX1-NEXT: vzeroupper 764 ; X86-AVX1-NEXT: retl 765 ; 766 ; X86-AVX2-LABEL: test_reduce_v16i16: 767 ; X86-AVX2: ## %bb.0: 768 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 769 ; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 770 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 771 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 772 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 773 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 774 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 775 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 776 ; X86-AVX2-NEXT: vzeroupper 777 ; X86-AVX2-NEXT: retl 778 ; 779 ; X64-SSE2-LABEL: test_reduce_v16i16: 780 ; X64-SSE2: ## %bb.0: 781 ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 782 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 783 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 784 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 785 ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 786 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 787 ; X64-SSE2-NEXT: psrld $16, %xmm1 788 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 789 ; X64-SSE2-NEXT: movd %xmm1, %eax 790 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 791 ; X64-SSE2-NEXT: retq 792 ; 793 ; X64-SSE42-LABEL: test_reduce_v16i16: 794 ; X64-SSE42: ## %bb.0: 795 ; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0 796 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 797 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 798 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 799 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 800 ; X64-SSE42-NEXT: movd %xmm0, %eax 801 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 802 ; X64-SSE42-NEXT: retq 803 ; 804 ; X64-AVX1-LABEL: test_reduce_v16i16: 805 ; X64-AVX1: ## %bb.0: 806 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 807 ; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 808 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 809 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 810 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 811 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 812 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 813 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 814 ; X64-AVX1-NEXT: vzeroupper 815 ; X64-AVX1-NEXT: retq 816 ; 817 ; X64-AVX2-LABEL: test_reduce_v16i16: 818 ; X64-AVX2: ## %bb.0: 819 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 820 ; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 821 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 822 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 823 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 824 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 825 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 826 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 827 ; X64-AVX2-NEXT: vzeroupper 828 ; X64-AVX2-NEXT: retq 829 ; 830 ; X64-AVX512-LABEL: test_reduce_v16i16: 831 ; X64-AVX512: ## %bb.0: 832 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 833 ; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 834 ; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 835 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 836 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 837 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 838 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 839 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 840 ; X64-AVX512-NEXT: vzeroupper 841 ; X64-AVX512-NEXT: retq 842 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 843 %2 = icmp sgt <16 x i16> %a0, %1 844 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 845 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 846 %5 = icmp sgt <16 x i16> %3, %4 847 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 848 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 849 %8 = icmp sgt <16 x i16> %6, %7 850 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 851 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 852 %11 = icmp sgt <16 x i16> %9, %10 853 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 854 %13 = extractelement <16 x i16> %12, i32 0 855 ret i16 %13 856 } 857 858 define i8 @test_reduce_v32i8(<32 x i8> %a0) { 859 ; X86-SSE2-LABEL: test_reduce_v32i8: 860 ; X86-SSE2: ## %bb.0: 861 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 862 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 863 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 864 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 865 ; X86-SSE2-NEXT: por %xmm0, %xmm2 866 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 867 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 868 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 869 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 870 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 871 ; X86-SSE2-NEXT: por %xmm2, %xmm1 872 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 873 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 874 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 875 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 876 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 877 ; X86-SSE2-NEXT: por %xmm1, %xmm2 878 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 879 ; X86-SSE2-NEXT: psrld $16, %xmm0 880 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 881 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 882 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 883 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 884 ; X86-SSE2-NEXT: por %xmm2, %xmm1 885 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 886 ; X86-SSE2-NEXT: psrlw $8, %xmm0 887 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 888 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 889 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 890 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 891 ; X86-SSE2-NEXT: por %xmm1, %xmm2 892 ; X86-SSE2-NEXT: movd %xmm2, %eax 893 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 894 ; X86-SSE2-NEXT: retl 895 ; 896 ; X86-SSE42-LABEL: test_reduce_v32i8: 897 ; X86-SSE42: ## %bb.0: 898 ; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0 899 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 900 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 901 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 902 ; X86-SSE42-NEXT: psrlw $8, %xmm2 903 ; X86-SSE42-NEXT: pminub %xmm0, %xmm2 904 ; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 905 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 906 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax 907 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 908 ; X86-SSE42-NEXT: retl 909 ; 910 ; X86-AVX1-LABEL: test_reduce_v32i8: 911 ; X86-AVX1: ## %bb.0: 912 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 913 ; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 914 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 915 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 916 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 917 ; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 918 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 919 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 920 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax 921 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 922 ; X86-AVX1-NEXT: vzeroupper 923 ; X86-AVX1-NEXT: retl 924 ; 925 ; X86-AVX2-LABEL: test_reduce_v32i8: 926 ; X86-AVX2: ## %bb.0: 927 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 928 ; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 929 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 930 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 931 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 932 ; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 933 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 934 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 935 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax 936 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 937 ; X86-AVX2-NEXT: vzeroupper 938 ; X86-AVX2-NEXT: retl 939 ; 940 ; X64-SSE2-LABEL: test_reduce_v32i8: 941 ; X64-SSE2: ## %bb.0: 942 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 943 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 944 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 945 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 946 ; X64-SSE2-NEXT: por %xmm0, %xmm2 947 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 948 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 949 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 950 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 951 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 952 ; X64-SSE2-NEXT: por %xmm2, %xmm1 953 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 954 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 955 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 956 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 957 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 958 ; X64-SSE2-NEXT: por %xmm1, %xmm2 959 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 960 ; X64-SSE2-NEXT: psrld $16, %xmm0 961 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 962 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 963 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 964 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 965 ; X64-SSE2-NEXT: por %xmm2, %xmm1 966 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 967 ; X64-SSE2-NEXT: psrlw $8, %xmm0 968 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 969 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 970 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 971 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 972 ; X64-SSE2-NEXT: por %xmm1, %xmm2 973 ; X64-SSE2-NEXT: movd %xmm2, %eax 974 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 975 ; X64-SSE2-NEXT: retq 976 ; 977 ; X64-SSE42-LABEL: test_reduce_v32i8: 978 ; X64-SSE42: ## %bb.0: 979 ; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0 980 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 981 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 982 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 983 ; X64-SSE42-NEXT: psrlw $8, %xmm2 984 ; X64-SSE42-NEXT: pminub %xmm0, %xmm2 985 ; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 986 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 987 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax 988 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 989 ; X64-SSE42-NEXT: retq 990 ; 991 ; X64-AVX1-LABEL: test_reduce_v32i8: 992 ; X64-AVX1: ## %bb.0: 993 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 994 ; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 995 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 996 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 997 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 998 ; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 999 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1000 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1001 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax 1002 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1003 ; X64-AVX1-NEXT: vzeroupper 1004 ; X64-AVX1-NEXT: retq 1005 ; 1006 ; X64-AVX2-LABEL: test_reduce_v32i8: 1007 ; X64-AVX2: ## %bb.0: 1008 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1009 ; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1010 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1011 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1012 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 1013 ; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 1014 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1015 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1016 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax 1017 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1018 ; X64-AVX2-NEXT: vzeroupper 1019 ; X64-AVX2-NEXT: retq 1020 ; 1021 ; X64-AVX512-LABEL: test_reduce_v32i8: 1022 ; X64-AVX512: ## %bb.0: 1023 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1024 ; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1025 ; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1026 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1027 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 1028 ; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 1029 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1030 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1031 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax 1032 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1033 ; X64-AVX512-NEXT: vzeroupper 1034 ; X64-AVX512-NEXT: retq 1035 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1036 %2 = icmp sgt <32 x i8> %a0, %1 1037 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1038 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1039 %5 = icmp sgt <32 x i8> %3, %4 1040 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1041 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1042 %8 = icmp sgt <32 x i8> %6, %7 1043 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1044 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1045 %11 = icmp sgt <32 x i8> %9, %10 1046 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1047 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1048 %14 = icmp sgt <32 x i8> %12, %13 1049 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1050 %16 = extractelement <32 x i8> %15, i32 0 1051 ret i8 %16 1052 } 1053 1054 ; 1055 ; 512-bit Vectors 1056 ; 1057 1058 define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1059 ; X86-SSE2-LABEL: test_reduce_v8i64: 1060 ; X86-SSE2: ## %bb.0: 1061 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 1062 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm5 1063 ; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1064 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm6 1065 ; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1066 ; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1067 ; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1068 ; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1069 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1070 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1071 ; X86-SSE2-NEXT: pand %xmm5, %xmm6 1072 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1073 ; X86-SSE2-NEXT: por %xmm6, %xmm5 1074 ; X86-SSE2-NEXT: pand %xmm5, %xmm0 1075 ; X86-SSE2-NEXT: pandn %xmm2, %xmm5 1076 ; X86-SSE2-NEXT: por %xmm0, %xmm5 1077 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1078 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1079 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1080 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1081 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm6 1082 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1083 ; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1084 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1085 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1086 ; X86-SSE2-NEXT: pand %xmm0, %xmm2 1087 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 1088 ; X86-SSE2-NEXT: por %xmm2, %xmm0 1089 ; X86-SSE2-NEXT: pand %xmm0, %xmm1 1090 ; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1091 ; X86-SSE2-NEXT: por %xmm1, %xmm0 1092 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1093 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1094 ; X86-SSE2-NEXT: movdqa %xmm5, %xmm2 1095 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1096 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1097 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1098 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 1099 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2] 1100 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1101 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 1102 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1103 ; X86-SSE2-NEXT: por %xmm2, %xmm1 1104 ; X86-SSE2-NEXT: pand %xmm1, %xmm5 1105 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1106 ; X86-SSE2-NEXT: por %xmm5, %xmm1 1107 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1108 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1109 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1110 ; X86-SSE2-NEXT: pxor %xmm0, %xmm4 1111 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1112 ; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1113 ; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1114 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1115 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1116 ; X86-SSE2-NEXT: pand %xmm2, %xmm4 1117 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1118 ; X86-SSE2-NEXT: por %xmm4, %xmm2 1119 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 1120 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1121 ; X86-SSE2-NEXT: por %xmm1, %xmm2 1122 ; X86-SSE2-NEXT: movd %xmm2, %eax 1123 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1124 ; X86-SSE2-NEXT: movd %xmm0, %edx 1125 ; X86-SSE2-NEXT: retl 1126 ; 1127 ; X86-SSE42-LABEL: test_reduce_v8i64: 1128 ; X86-SSE42: ## %bb.0: 1129 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 1130 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1131 ; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1132 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1133 ; X86-SSE42-NEXT: movdqa %xmm4, %xmm0 1134 ; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1135 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1136 ; X86-SSE42-NEXT: movapd %xmm2, %xmm0 1137 ; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1138 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1139 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1] 1140 ; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1141 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1142 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1143 ; X86-SSE42-NEXT: movd %xmm1, %eax 1144 ; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1145 ; X86-SSE42-NEXT: retl 1146 ; 1147 ; X86-AVX1-LABEL: test_reduce_v8i64: 1148 ; X86-AVX1: ## %bb.0: 1149 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1150 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1151 ; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1152 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 1153 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1154 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1155 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1156 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1157 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 1158 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1159 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1160 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1161 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1162 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1163 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 1164 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1165 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1166 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 1167 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1168 ; X86-AVX1-NEXT: vzeroupper 1169 ; X86-AVX1-NEXT: retl 1170 ; 1171 ; X86-AVX2-LABEL: test_reduce_v8i64: 1172 ; X86-AVX2: ## %bb.0: 1173 ; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1174 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1175 ; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1176 ; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1177 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1178 ; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1179 ; X86-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1180 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1181 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 1182 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1183 ; X86-AVX2-NEXT: vzeroupper 1184 ; X86-AVX2-NEXT: retl 1185 ; 1186 ; X64-SSE2-LABEL: test_reduce_v8i64: 1187 ; X64-SSE2: ## %bb.0: 1188 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 1189 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm5 1190 ; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1191 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm6 1192 ; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1193 ; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1194 ; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1195 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1196 ; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1197 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1198 ; X64-SSE2-NEXT: pand %xmm8, %xmm6 1199 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1200 ; X64-SSE2-NEXT: por %xmm6, %xmm5 1201 ; X64-SSE2-NEXT: pand %xmm5, %xmm0 1202 ; X64-SSE2-NEXT: pandn %xmm2, %xmm5 1203 ; X64-SSE2-NEXT: por %xmm0, %xmm5 1204 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1205 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1206 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1207 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1208 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm6 1209 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1210 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1211 ; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1212 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 1213 ; X64-SSE2-NEXT: pand %xmm7, %xmm0 1214 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 1215 ; X64-SSE2-NEXT: por %xmm0, %xmm2 1216 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 1217 ; X64-SSE2-NEXT: pandn %xmm3, %xmm2 1218 ; X64-SSE2-NEXT: por %xmm1, %xmm2 1219 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1220 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1221 ; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1222 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1223 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 1224 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1225 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] 1226 ; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1227 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1228 ; X64-SSE2-NEXT: pand %xmm6, %xmm0 1229 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1230 ; X64-SSE2-NEXT: por %xmm0, %xmm1 1231 ; X64-SSE2-NEXT: pand %xmm1, %xmm5 1232 ; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1233 ; X64-SSE2-NEXT: por %xmm5, %xmm1 1234 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1235 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1236 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1237 ; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1238 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1239 ; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1240 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1241 ; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1242 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1243 ; X64-SSE2-NEXT: pand %xmm5, %xmm2 1244 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1245 ; X64-SSE2-NEXT: por %xmm2, %xmm3 1246 ; X64-SSE2-NEXT: pand %xmm3, %xmm1 1247 ; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1248 ; X64-SSE2-NEXT: por %xmm1, %xmm3 1249 ; X64-SSE2-NEXT: movq %xmm3, %rax 1250 ; X64-SSE2-NEXT: retq 1251 ; 1252 ; X64-SSE42-LABEL: test_reduce_v8i64: 1253 ; X64-SSE42: ## %bb.0: 1254 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 1255 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1256 ; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1257 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1258 ; X64-SSE42-NEXT: movdqa %xmm4, %xmm0 1259 ; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1260 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1261 ; X64-SSE42-NEXT: movapd %xmm2, %xmm0 1262 ; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1263 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1264 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1] 1265 ; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1266 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1267 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1268 ; X64-SSE42-NEXT: movq %xmm1, %rax 1269 ; X64-SSE42-NEXT: retq 1270 ; 1271 ; X64-AVX1-LABEL: test_reduce_v8i64: 1272 ; X64-AVX1: ## %bb.0: 1273 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1274 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1275 ; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1276 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 1277 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1278 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1279 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1280 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1281 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 1282 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1283 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1284 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1285 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1286 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1287 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 1288 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1289 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1290 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 1291 ; X64-AVX1-NEXT: vzeroupper 1292 ; X64-AVX1-NEXT: retq 1293 ; 1294 ; X64-AVX2-LABEL: test_reduce_v8i64: 1295 ; X64-AVX2: ## %bb.0: 1296 ; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1297 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1298 ; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1299 ; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1300 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1301 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1302 ; X64-AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1303 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1304 ; X64-AVX2-NEXT: vmovq %xmm0, %rax 1305 ; X64-AVX2-NEXT: vzeroupper 1306 ; X64-AVX2-NEXT: retq 1307 ; 1308 ; X64-AVX512-LABEL: test_reduce_v8i64: 1309 ; X64-AVX512: ## %bb.0: 1310 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1311 ; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 1312 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1313 ; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 1314 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1315 ; X64-AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 1316 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 1317 ; X64-AVX512-NEXT: vzeroupper 1318 ; X64-AVX512-NEXT: retq 1319 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1320 %2 = icmp sgt <8 x i64> %a0, %1 1321 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1322 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1323 %5 = icmp sgt <8 x i64> %3, %4 1324 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1325 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1326 %8 = icmp sgt <8 x i64> %6, %7 1327 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1328 %10 = extractelement <8 x i64> %9, i32 0 1329 ret i64 %10 1330 } 1331 1332 define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1333 ; X86-SSE2-LABEL: test_reduce_v16i32: 1334 ; X86-SSE2: ## %bb.0: 1335 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1336 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1337 ; X86-SSE2-NEXT: pand %xmm4, %xmm1 1338 ; X86-SSE2-NEXT: pandn %xmm3, %xmm4 1339 ; X86-SSE2-NEXT: por %xmm1, %xmm4 1340 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1341 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1342 ; X86-SSE2-NEXT: pand %xmm1, %xmm0 1343 ; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1344 ; X86-SSE2-NEXT: por %xmm0, %xmm1 1345 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1346 ; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1347 ; X86-SSE2-NEXT: pand %xmm0, %xmm1 1348 ; X86-SSE2-NEXT: pandn %xmm4, %xmm0 1349 ; X86-SSE2-NEXT: por %xmm1, %xmm0 1350 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1351 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1352 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1353 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 1354 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1355 ; X86-SSE2-NEXT: por %xmm0, %xmm2 1356 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1357 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1358 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1359 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 1360 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1361 ; X86-SSE2-NEXT: por %xmm2, %xmm1 1362 ; X86-SSE2-NEXT: movd %xmm1, %eax 1363 ; X86-SSE2-NEXT: retl 1364 ; 1365 ; X86-SSE42-LABEL: test_reduce_v16i32: 1366 ; X86-SSE42: ## %bb.0: 1367 ; X86-SSE42-NEXT: pmaxsd %xmm3, %xmm1 1368 ; X86-SSE42-NEXT: pmaxsd %xmm2, %xmm0 1369 ; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1370 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1371 ; X86-SSE42-NEXT: pmaxsd %xmm0, %xmm1 1372 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1373 ; X86-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1374 ; X86-SSE42-NEXT: movd %xmm0, %eax 1375 ; X86-SSE42-NEXT: retl 1376 ; 1377 ; X86-AVX1-LABEL: test_reduce_v16i32: 1378 ; X86-AVX1: ## %bb.0: 1379 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1380 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1381 ; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 1382 ; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1383 ; X86-AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0 1384 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1385 ; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1386 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1387 ; X86-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1388 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 1389 ; X86-AVX1-NEXT: vzeroupper 1390 ; X86-AVX1-NEXT: retl 1391 ; 1392 ; X86-AVX2-LABEL: test_reduce_v16i32: 1393 ; X86-AVX2: ## %bb.0: 1394 ; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1395 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1396 ; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1397 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1398 ; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1399 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1400 ; X86-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1401 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 1402 ; X86-AVX2-NEXT: vzeroupper 1403 ; X86-AVX2-NEXT: retl 1404 ; 1405 ; X64-SSE2-LABEL: test_reduce_v16i32: 1406 ; X64-SSE2: ## %bb.0: 1407 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 1408 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1409 ; X64-SSE2-NEXT: pand %xmm4, %xmm1 1410 ; X64-SSE2-NEXT: pandn %xmm3, %xmm4 1411 ; X64-SSE2-NEXT: por %xmm1, %xmm4 1412 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1413 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1414 ; X64-SSE2-NEXT: pand %xmm1, %xmm0 1415 ; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1416 ; X64-SSE2-NEXT: por %xmm0, %xmm1 1417 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1418 ; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1419 ; X64-SSE2-NEXT: pand %xmm0, %xmm1 1420 ; X64-SSE2-NEXT: pandn %xmm4, %xmm0 1421 ; X64-SSE2-NEXT: por %xmm1, %xmm0 1422 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1423 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1424 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1425 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 1426 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 1427 ; X64-SSE2-NEXT: por %xmm0, %xmm2 1428 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1429 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1430 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1431 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 1432 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1433 ; X64-SSE2-NEXT: por %xmm2, %xmm1 1434 ; X64-SSE2-NEXT: movd %xmm1, %eax 1435 ; X64-SSE2-NEXT: retq 1436 ; 1437 ; X64-SSE42-LABEL: test_reduce_v16i32: 1438 ; X64-SSE42: ## %bb.0: 1439 ; X64-SSE42-NEXT: pmaxsd %xmm3, %xmm1 1440 ; X64-SSE42-NEXT: pmaxsd %xmm2, %xmm0 1441 ; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1442 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1443 ; X64-SSE42-NEXT: pmaxsd %xmm0, %xmm1 1444 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1445 ; X64-SSE42-NEXT: pmaxsd %xmm1, %xmm0 1446 ; X64-SSE42-NEXT: movd %xmm0, %eax 1447 ; X64-SSE42-NEXT: retq 1448 ; 1449 ; X64-AVX1-LABEL: test_reduce_v16i32: 1450 ; X64-AVX1: ## %bb.0: 1451 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1452 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1453 ; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 1454 ; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1455 ; X64-AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0 1456 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1457 ; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1458 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1459 ; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1460 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 1461 ; X64-AVX1-NEXT: vzeroupper 1462 ; X64-AVX1-NEXT: retq 1463 ; 1464 ; X64-AVX2-LABEL: test_reduce_v16i32: 1465 ; X64-AVX2: ## %bb.0: 1466 ; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1467 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1468 ; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1469 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1470 ; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1471 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1472 ; X64-AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1473 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 1474 ; X64-AVX2-NEXT: vzeroupper 1475 ; X64-AVX2-NEXT: retq 1476 ; 1477 ; X64-AVX512-LABEL: test_reduce_v16i32: 1478 ; X64-AVX512: ## %bb.0: 1479 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1480 ; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1481 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1482 ; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1483 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1484 ; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1485 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1486 ; X64-AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1487 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 1488 ; X64-AVX512-NEXT: vzeroupper 1489 ; X64-AVX512-NEXT: retq 1490 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1491 %2 = icmp sgt <16 x i32> %a0, %1 1492 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1493 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1494 %5 = icmp sgt <16 x i32> %3, %4 1495 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1496 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1497 %8 = icmp sgt <16 x i32> %6, %7 1498 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1499 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1500 %11 = icmp sgt <16 x i32> %9, %10 1501 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1502 %13 = extractelement <16 x i32> %12, i32 0 1503 ret i32 %13 1504 } 1505 1506 define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1507 ; X86-SSE2-LABEL: test_reduce_v32i16: 1508 ; X86-SSE2: ## %bb.0: 1509 ; X86-SSE2-NEXT: pmaxsw %xmm3, %xmm1 1510 ; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm0 1511 ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1512 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1513 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1514 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1515 ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1516 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1517 ; X86-SSE2-NEXT: psrld $16, %xmm1 1518 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1519 ; X86-SSE2-NEXT: movd %xmm1, %eax 1520 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1521 ; X86-SSE2-NEXT: retl 1522 ; 1523 ; X86-SSE42-LABEL: test_reduce_v32i16: 1524 ; X86-SSE42: ## %bb.0: 1525 ; X86-SSE42-NEXT: pmaxsw %xmm3, %xmm1 1526 ; X86-SSE42-NEXT: pmaxsw %xmm2, %xmm0 1527 ; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0 1528 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1529 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1530 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1531 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1532 ; X86-SSE42-NEXT: movd %xmm0, %eax 1533 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1534 ; X86-SSE42-NEXT: retl 1535 ; 1536 ; X86-AVX1-LABEL: test_reduce_v32i16: 1537 ; X86-AVX1: ## %bb.0: 1538 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1539 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1540 ; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 1541 ; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1542 ; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 1543 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1544 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1545 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1546 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1547 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 1548 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1549 ; X86-AVX1-NEXT: vzeroupper 1550 ; X86-AVX1-NEXT: retl 1551 ; 1552 ; X86-AVX2-LABEL: test_reduce_v32i16: 1553 ; X86-AVX2: ## %bb.0: 1554 ; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1555 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1556 ; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1557 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1558 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1559 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1560 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1561 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 1562 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1563 ; X86-AVX2-NEXT: vzeroupper 1564 ; X86-AVX2-NEXT: retl 1565 ; 1566 ; X64-SSE2-LABEL: test_reduce_v32i16: 1567 ; X64-SSE2: ## %bb.0: 1568 ; X64-SSE2-NEXT: pmaxsw %xmm3, %xmm1 1569 ; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm0 1570 ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1571 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1572 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1573 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1574 ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 1575 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1576 ; X64-SSE2-NEXT: psrld $16, %xmm1 1577 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 1578 ; X64-SSE2-NEXT: movd %xmm1, %eax 1579 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1580 ; X64-SSE2-NEXT: retq 1581 ; 1582 ; X64-SSE42-LABEL: test_reduce_v32i16: 1583 ; X64-SSE42: ## %bb.0: 1584 ; X64-SSE42-NEXT: pmaxsw %xmm3, %xmm1 1585 ; X64-SSE42-NEXT: pmaxsw %xmm2, %xmm0 1586 ; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0 1587 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1588 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1589 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1590 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1591 ; X64-SSE42-NEXT: movd %xmm0, %eax 1592 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1593 ; X64-SSE42-NEXT: retq 1594 ; 1595 ; X64-AVX1-LABEL: test_reduce_v32i16: 1596 ; X64-AVX1: ## %bb.0: 1597 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1598 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1599 ; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 1600 ; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1601 ; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 1602 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1603 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1604 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1605 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1606 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 1607 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1608 ; X64-AVX1-NEXT: vzeroupper 1609 ; X64-AVX1-NEXT: retq 1610 ; 1611 ; X64-AVX2-LABEL: test_reduce_v32i16: 1612 ; X64-AVX2: ## %bb.0: 1613 ; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1614 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1615 ; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1616 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1617 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1618 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1619 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1620 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 1621 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1622 ; X64-AVX2-NEXT: vzeroupper 1623 ; X64-AVX2-NEXT: retq 1624 ; 1625 ; X64-AVX512-LABEL: test_reduce_v32i16: 1626 ; X64-AVX512: ## %bb.0: 1627 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1628 ; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1629 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1630 ; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1631 ; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1632 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1633 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1634 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1635 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 1636 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1637 ; X64-AVX512-NEXT: vzeroupper 1638 ; X64-AVX512-NEXT: retq 1639 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1640 %2 = icmp sgt <32 x i16> %a0, %1 1641 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1642 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1643 %5 = icmp sgt <32 x i16> %3, %4 1644 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1645 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1646 %8 = icmp sgt <32 x i16> %6, %7 1647 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1648 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1649 %11 = icmp sgt <32 x i16> %9, %10 1650 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1651 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1652 %14 = icmp sgt <32 x i16> %12, %13 1653 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1654 %16 = extractelement <32 x i16> %15, i32 0 1655 ret i16 %16 1656 } 1657 1658 define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1659 ; X86-SSE2-LABEL: test_reduce_v64i8: 1660 ; X86-SSE2: ## %bb.0: 1661 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1662 ; X86-SSE2-NEXT: pcmpgtb %xmm3, %xmm4 1663 ; X86-SSE2-NEXT: pand %xmm4, %xmm1 1664 ; X86-SSE2-NEXT: pandn %xmm3, %xmm4 1665 ; X86-SSE2-NEXT: por %xmm1, %xmm4 1666 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1667 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1668 ; X86-SSE2-NEXT: pand %xmm1, %xmm0 1669 ; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1670 ; X86-SSE2-NEXT: por %xmm0, %xmm1 1671 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1672 ; X86-SSE2-NEXT: pcmpgtb %xmm4, %xmm0 1673 ; X86-SSE2-NEXT: pand %xmm0, %xmm1 1674 ; X86-SSE2-NEXT: pandn %xmm4, %xmm0 1675 ; X86-SSE2-NEXT: por %xmm1, %xmm0 1676 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1677 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1678 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1679 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 1680 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1681 ; X86-SSE2-NEXT: por %xmm0, %xmm2 1682 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1683 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1684 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1685 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 1686 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1687 ; X86-SSE2-NEXT: por %xmm2, %xmm1 1688 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1689 ; X86-SSE2-NEXT: psrld $16, %xmm0 1690 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1691 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1692 ; X86-SSE2-NEXT: pand %xmm2, %xmm1 1693 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1694 ; X86-SSE2-NEXT: por %xmm1, %xmm2 1695 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 1696 ; X86-SSE2-NEXT: psrlw $8, %xmm0 1697 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1698 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1699 ; X86-SSE2-NEXT: pand %xmm1, %xmm2 1700 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1701 ; X86-SSE2-NEXT: por %xmm2, %xmm1 1702 ; X86-SSE2-NEXT: movd %xmm1, %eax 1703 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1704 ; X86-SSE2-NEXT: retl 1705 ; 1706 ; X86-SSE42-LABEL: test_reduce_v64i8: 1707 ; X86-SSE42: ## %bb.0: 1708 ; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1 1709 ; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0 1710 ; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0 1711 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1712 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1713 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 1714 ; X86-SSE42-NEXT: psrlw $8, %xmm2 1715 ; X86-SSE42-NEXT: pminub %xmm0, %xmm2 1716 ; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 1717 ; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1718 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax 1719 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1720 ; X86-SSE42-NEXT: retl 1721 ; 1722 ; X86-AVX1-LABEL: test_reduce_v64i8: 1723 ; X86-AVX1: ## %bb.0: 1724 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1725 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1726 ; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 1727 ; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1728 ; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 1729 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1730 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1731 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 1732 ; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1733 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1734 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1735 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax 1736 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1737 ; X86-AVX1-NEXT: vzeroupper 1738 ; X86-AVX1-NEXT: retl 1739 ; 1740 ; X86-AVX2-LABEL: test_reduce_v64i8: 1741 ; X86-AVX2: ## %bb.0: 1742 ; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1743 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1744 ; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1745 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1746 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1747 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 1748 ; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 1749 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1750 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1751 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax 1752 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1753 ; X86-AVX2-NEXT: vzeroupper 1754 ; X86-AVX2-NEXT: retl 1755 ; 1756 ; X64-SSE2-LABEL: test_reduce_v64i8: 1757 ; X64-SSE2: ## %bb.0: 1758 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 1759 ; X64-SSE2-NEXT: pcmpgtb %xmm3, %xmm4 1760 ; X64-SSE2-NEXT: pand %xmm4, %xmm1 1761 ; X64-SSE2-NEXT: pandn %xmm3, %xmm4 1762 ; X64-SSE2-NEXT: por %xmm1, %xmm4 1763 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1764 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1765 ; X64-SSE2-NEXT: pand %xmm1, %xmm0 1766 ; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1767 ; X64-SSE2-NEXT: por %xmm0, %xmm1 1768 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1769 ; X64-SSE2-NEXT: pcmpgtb %xmm4, %xmm0 1770 ; X64-SSE2-NEXT: pand %xmm0, %xmm1 1771 ; X64-SSE2-NEXT: pandn %xmm4, %xmm0 1772 ; X64-SSE2-NEXT: por %xmm1, %xmm0 1773 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1774 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1775 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1776 ; X64-SSE2-NEXT: pand %xmm2, %xmm0 1777 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2 1778 ; X64-SSE2-NEXT: por %xmm0, %xmm2 1779 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1780 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1781 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1782 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 1783 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1784 ; X64-SSE2-NEXT: por %xmm2, %xmm1 1785 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1786 ; X64-SSE2-NEXT: psrld $16, %xmm0 1787 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1788 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1789 ; X64-SSE2-NEXT: pand %xmm2, %xmm1 1790 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1791 ; X64-SSE2-NEXT: por %xmm1, %xmm2 1792 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1793 ; X64-SSE2-NEXT: psrlw $8, %xmm0 1794 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1795 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1796 ; X64-SSE2-NEXT: pand %xmm1, %xmm2 1797 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1798 ; X64-SSE2-NEXT: por %xmm2, %xmm1 1799 ; X64-SSE2-NEXT: movd %xmm1, %eax 1800 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1801 ; X64-SSE2-NEXT: retq 1802 ; 1803 ; X64-SSE42-LABEL: test_reduce_v64i8: 1804 ; X64-SSE42: ## %bb.0: 1805 ; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1 1806 ; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0 1807 ; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0 1808 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1809 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1810 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 1811 ; X64-SSE42-NEXT: psrlw $8, %xmm2 1812 ; X64-SSE42-NEXT: pminub %xmm0, %xmm2 1813 ; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 1814 ; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1815 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax 1816 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1817 ; X64-SSE42-NEXT: retq 1818 ; 1819 ; X64-AVX1-LABEL: test_reduce_v64i8: 1820 ; X64-AVX1: ## %bb.0: 1821 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1822 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1823 ; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 1824 ; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1825 ; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 1826 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1827 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1828 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 1829 ; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1830 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1831 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1832 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax 1833 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1834 ; X64-AVX1-NEXT: vzeroupper 1835 ; X64-AVX1-NEXT: retq 1836 ; 1837 ; X64-AVX2-LABEL: test_reduce_v64i8: 1838 ; X64-AVX2: ## %bb.0: 1839 ; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1840 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1841 ; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1842 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1843 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1844 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 1845 ; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 1846 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1847 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1848 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax 1849 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1850 ; X64-AVX2-NEXT: vzeroupper 1851 ; X64-AVX2-NEXT: retq 1852 ; 1853 ; X64-AVX512-LABEL: test_reduce_v64i8: 1854 ; X64-AVX512: ## %bb.0: 1855 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1856 ; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1857 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1858 ; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1859 ; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1860 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1861 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 1862 ; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 1863 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1864 ; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1865 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax 1866 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1867 ; X64-AVX512-NEXT: vzeroupper 1868 ; X64-AVX512-NEXT: retq 1869 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1870 %2 = icmp sgt <64 x i8> %a0, %1 1871 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1872 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1873 %5 = icmp sgt <64 x i8> %3, %4 1874 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1875 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1876 %8 = icmp sgt <64 x i8> %6, %7 1877 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1878 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1879 %11 = icmp sgt <64 x i8> %9, %10 1880 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1881 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1882 %14 = icmp sgt <64 x i8> %12, %13 1883 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1884 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1885 %17 = icmp sgt <64 x i8> %15, %16 1886 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1887 %19 = extractelement <64 x i8> %18, i32 0 1888 ret i8 %19 1889 } 1890