1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2 3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE42 4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1 5 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2 6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2 7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE42 8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1 9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2 10 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512 11 12 ; 13 ; 128-bit Vectors 14 ; 15 16 define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17 ; X86-SSE2-LABEL: test_reduce_v2i64: 18 ; X86-SSE2: ## %bb.0: 19 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 20 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 21 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23 ; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 25 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 26 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29 ; X86-SSE2-NEXT: pand %xmm5, %xmm2 30 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31 ; X86-SSE2-NEXT: por %xmm2, %xmm3 32 ; X86-SSE2-NEXT: pand %xmm3, %xmm0 33 ; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34 ; X86-SSE2-NEXT: por %xmm0, %xmm3 35 ; X86-SSE2-NEXT: movd %xmm3, %eax 36 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 37 ; X86-SSE2-NEXT: movd %xmm0, %edx 38 ; X86-SSE2-NEXT: retl 39 ; 40 ; X86-SSE42-LABEL: test_reduce_v2i64: 41 ; X86-SSE42: ## %bb.0: 42 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 44 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648] 45 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm3 46 ; X86-SSE42-NEXT: pxor %xmm0, %xmm3 47 ; X86-SSE42-NEXT: pxor %xmm2, %xmm0 48 ; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 49 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 50 ; X86-SSE42-NEXT: movd %xmm2, %eax 51 ; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 52 ; X86-SSE42-NEXT: retl 53 ; 54 ; X86-AVX-LABEL: test_reduce_v2i64: 55 ; X86-AVX: ## %bb.0: 56 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 57 ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 58 ; X86-AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 59 ; X86-AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2 60 ; X86-AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 61 ; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 62 ; X86-AVX-NEXT: vmovd %xmm0, %eax 63 ; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx 64 ; X86-AVX-NEXT: retl 65 ; 66 ; X64-SSE2-LABEL: test_reduce_v2i64: 67 ; X64-SSE2: ## %bb.0: 68 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 69 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 70 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 71 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3 72 ; X64-SSE2-NEXT: pxor %xmm1, %xmm2 73 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 74 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 75 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 76 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 77 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 78 ; X64-SSE2-NEXT: pand %xmm5, %xmm2 79 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 80 ; X64-SSE2-NEXT: por %xmm2, %xmm3 81 ; X64-SSE2-NEXT: pand %xmm3, %xmm0 82 ; X64-SSE2-NEXT: pandn %xmm1, %xmm3 83 ; X64-SSE2-NEXT: por %xmm0, %xmm3 84 ; X64-SSE2-NEXT: movq %xmm3, %rax 85 ; X64-SSE2-NEXT: retq 86 ; 87 ; X64-SSE42-LABEL: test_reduce_v2i64: 88 ; X64-SSE42: ## %bb.0: 89 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 90 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 91 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 92 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm3 93 ; X64-SSE42-NEXT: pxor %xmm0, %xmm3 94 ; X64-SSE42-NEXT: pxor %xmm2, %xmm0 95 ; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 96 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 97 ; X64-SSE42-NEXT: movq %xmm2, %rax 98 ; X64-SSE42-NEXT: retq 99 ; 100 ; X64-AVX1-LABEL: test_reduce_v2i64: 101 ; X64-AVX1: ## %bb.0: 102 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 103 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 104 ; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 105 ; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 106 ; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 107 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 108 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 109 ; X64-AVX1-NEXT: retq 110 ; 111 ; X64-AVX2-LABEL: test_reduce_v2i64: 112 ; X64-AVX2: ## %bb.0: 113 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 114 ; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 115 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 116 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 117 ; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 118 ; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 119 ; X64-AVX2-NEXT: vmovq %xmm0, %rax 120 ; X64-AVX2-NEXT: retq 121 ; 122 ; X64-AVX512-LABEL: test_reduce_v2i64: 123 ; X64-AVX512: ## %bb.0: 124 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 125 ; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 126 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 127 ; X64-AVX512-NEXT: retq 128 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 129 %2 = icmp ult <2 x i64> %a0, %1 130 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 131 %4 = extractelement <2 x i64> %3, i32 0 132 ret i64 %4 133 } 134 135 define i32 @test_reduce_v4i32(<4 x i32> %a0) { 136 ; X86-SSE2-LABEL: test_reduce_v4i32: 137 ; X86-SSE2: ## %bb.0: 138 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 139 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 140 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 141 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3 142 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 143 ; X86-SSE2-NEXT: pxor %xmm2, %xmm4 144 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 145 ; X86-SSE2-NEXT: pand %xmm4, %xmm0 146 ; X86-SSE2-NEXT: pandn %xmm1, %xmm4 147 ; X86-SSE2-NEXT: por %xmm0, %xmm4 148 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,2,3] 149 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 150 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 151 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2 152 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 153 ; X86-SSE2-NEXT: pand %xmm2, %xmm4 154 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 155 ; X86-SSE2-NEXT: por %xmm4, %xmm2 156 ; X86-SSE2-NEXT: movd %xmm2, %eax 157 ; X86-SSE2-NEXT: retl 158 ; 159 ; X86-SSE42-LABEL: test_reduce_v4i32: 160 ; X86-SSE42: ## %bb.0: 161 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 162 ; X86-SSE42-NEXT: pminud %xmm0, %xmm1 163 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 164 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0 165 ; X86-SSE42-NEXT: movd %xmm0, %eax 166 ; X86-SSE42-NEXT: retl 167 ; 168 ; X86-AVX-LABEL: test_reduce_v4i32: 169 ; X86-AVX: ## %bb.0: 170 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 171 ; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 172 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 173 ; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 174 ; X86-AVX-NEXT: vmovd %xmm0, %eax 175 ; X86-AVX-NEXT: retl 176 ; 177 ; X64-SSE2-LABEL: test_reduce_v4i32: 178 ; X64-SSE2: ## %bb.0: 179 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 180 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 181 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 182 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3 183 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 184 ; X64-SSE2-NEXT: pxor %xmm2, %xmm4 185 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 186 ; X64-SSE2-NEXT: pand %xmm4, %xmm0 187 ; X64-SSE2-NEXT: pandn %xmm1, %xmm4 188 ; X64-SSE2-NEXT: por %xmm0, %xmm4 189 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,2,3] 190 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 191 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 192 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2 193 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 194 ; X64-SSE2-NEXT: pand %xmm2, %xmm4 195 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 196 ; X64-SSE2-NEXT: por %xmm4, %xmm2 197 ; X64-SSE2-NEXT: movd %xmm2, %eax 198 ; X64-SSE2-NEXT: retq 199 ; 200 ; X64-SSE42-LABEL: test_reduce_v4i32: 201 ; X64-SSE42: ## %bb.0: 202 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 203 ; X64-SSE42-NEXT: pminud %xmm0, %xmm1 204 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 205 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0 206 ; X64-SSE42-NEXT: movd %xmm0, %eax 207 ; X64-SSE42-NEXT: retq 208 ; 209 ; X64-AVX-LABEL: test_reduce_v4i32: 210 ; X64-AVX: ## %bb.0: 211 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 212 ; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 213 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 214 ; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 215 ; X64-AVX-NEXT: vmovd %xmm0, %eax 216 ; X64-AVX-NEXT: retq 217 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 218 %2 = icmp ult <4 x i32> %a0, %1 219 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 220 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 221 %5 = icmp ult <4 x i32> %3, %4 222 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 223 %7 = extractelement <4 x i32> %6, i32 0 224 ret i32 %7 225 } 226 227 define i16 @test_reduce_v8i16(<8 x i16> %a0) { 228 ; X86-SSE2-LABEL: test_reduce_v8i16: 229 ; X86-SSE2: ## %bb.0: 230 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 231 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 232 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 233 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 234 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 235 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 236 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 237 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 238 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 239 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 240 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 241 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 242 ; X86-SSE2-NEXT: psrld $16, %xmm1 243 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 244 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 245 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 246 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 247 ; X86-SSE2-NEXT: movd %xmm1, %eax 248 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 249 ; X86-SSE2-NEXT: retl 250 ; 251 ; X86-SSE42-LABEL: test_reduce_v8i16: 252 ; X86-SSE42: ## %bb.0: 253 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 254 ; X86-SSE42-NEXT: movd %xmm0, %eax 255 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 256 ; X86-SSE42-NEXT: retl 257 ; 258 ; X86-AVX-LABEL: test_reduce_v8i16: 259 ; X86-AVX: ## %bb.0: 260 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 261 ; X86-AVX-NEXT: vmovd %xmm0, %eax 262 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 263 ; X86-AVX-NEXT: retl 264 ; 265 ; X64-SSE2-LABEL: test_reduce_v8i16: 266 ; X64-SSE2: ## %bb.0: 267 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 268 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 269 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 270 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 271 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 272 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 273 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 274 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 275 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 276 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 277 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 278 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 279 ; X64-SSE2-NEXT: psrld $16, %xmm1 280 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 281 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 282 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 283 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 284 ; X64-SSE2-NEXT: movd %xmm1, %eax 285 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 286 ; X64-SSE2-NEXT: retq 287 ; 288 ; X64-SSE42-LABEL: test_reduce_v8i16: 289 ; X64-SSE42: ## %bb.0: 290 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 291 ; X64-SSE42-NEXT: movd %xmm0, %eax 292 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 293 ; X64-SSE42-NEXT: retq 294 ; 295 ; X64-AVX-LABEL: test_reduce_v8i16: 296 ; X64-AVX: ## %bb.0: 297 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 298 ; X64-AVX-NEXT: vmovd %xmm0, %eax 299 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 300 ; X64-AVX-NEXT: retq 301 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 302 %2 = icmp ult <8 x i16> %a0, %1 303 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 304 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 305 %5 = icmp ult <8 x i16> %3, %4 306 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 307 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 308 %8 = icmp ult <8 x i16> %6, %7 309 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 310 %10 = extractelement <8 x i16> %9, i32 0 311 ret i16 %10 312 } 313 314 define i8 @test_reduce_v16i8(<16 x i8> %a0) { 315 ; X86-SSE2-LABEL: test_reduce_v16i8: 316 ; X86-SSE2: ## %bb.0: 317 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 318 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1 319 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 320 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0 321 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 322 ; X86-SSE2-NEXT: psrld $16, %xmm1 323 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1 324 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 325 ; X86-SSE2-NEXT: psrlw $8, %xmm0 326 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0 327 ; X86-SSE2-NEXT: movd %xmm0, %eax 328 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 329 ; X86-SSE2-NEXT: retl 330 ; 331 ; X86-SSE42-LABEL: test_reduce_v16i8: 332 ; X86-SSE42: ## %bb.0: 333 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 334 ; X86-SSE42-NEXT: psrlw $8, %xmm1 335 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 336 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 337 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax 338 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 339 ; X86-SSE42-NEXT: retl 340 ; 341 ; X86-AVX-LABEL: test_reduce_v16i8: 342 ; X86-AVX: ## %bb.0: 343 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 344 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 345 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 346 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax 347 ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 348 ; X86-AVX-NEXT: retl 349 ; 350 ; X64-SSE2-LABEL: test_reduce_v16i8: 351 ; X64-SSE2: ## %bb.0: 352 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 353 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1 354 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 355 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0 356 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 357 ; X64-SSE2-NEXT: psrld $16, %xmm1 358 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1 359 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 360 ; X64-SSE2-NEXT: psrlw $8, %xmm0 361 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0 362 ; X64-SSE2-NEXT: movd %xmm0, %eax 363 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 364 ; X64-SSE2-NEXT: retq 365 ; 366 ; X64-SSE42-LABEL: test_reduce_v16i8: 367 ; X64-SSE42: ## %bb.0: 368 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 369 ; X64-SSE42-NEXT: psrlw $8, %xmm1 370 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 371 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 372 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax 373 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 374 ; X64-SSE42-NEXT: retq 375 ; 376 ; X64-AVX-LABEL: test_reduce_v16i8: 377 ; X64-AVX: ## %bb.0: 378 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 379 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 380 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 381 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax 382 ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 383 ; X64-AVX-NEXT: retq 384 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 385 %2 = icmp ult <16 x i8> %a0, %1 386 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 387 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 388 %5 = icmp ult <16 x i8> %3, %4 389 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 390 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 391 %8 = icmp ult <16 x i8> %6, %7 392 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 393 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 394 %11 = icmp ult <16 x i8> %9, %10 395 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 396 %13 = extractelement <16 x i8> %12, i32 0 397 ret i8 %13 398 } 399 400 ; 401 ; 256-bit Vectors 402 ; 403 404 define i64 @test_reduce_v4i64(<4 x i64> %a0) { 405 ; X86-SSE2-LABEL: test_reduce_v4i64: 406 ; X86-SSE2: ## %bb.0: 407 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 408 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 409 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3 410 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 411 ; X86-SSE2-NEXT: pxor %xmm2, %xmm4 412 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 413 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 414 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 415 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 416 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 417 ; X86-SSE2-NEXT: pand %xmm6, %xmm3 418 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 419 ; X86-SSE2-NEXT: por %xmm3, %xmm4 420 ; X86-SSE2-NEXT: pand %xmm4, %xmm0 421 ; X86-SSE2-NEXT: pandn %xmm1, %xmm4 422 ; X86-SSE2-NEXT: por %xmm0, %xmm4 423 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1] 424 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 425 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 426 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2 427 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 428 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 429 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 430 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 431 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 432 ; X86-SSE2-NEXT: pand %xmm5, %xmm1 433 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 434 ; X86-SSE2-NEXT: por %xmm1, %xmm2 435 ; X86-SSE2-NEXT: pand %xmm2, %xmm4 436 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 437 ; X86-SSE2-NEXT: por %xmm4, %xmm2 438 ; X86-SSE2-NEXT: movd %xmm2, %eax 439 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 440 ; X86-SSE2-NEXT: movd %xmm0, %edx 441 ; X86-SSE2-NEXT: retl 442 ; 443 ; X86-SSE42-LABEL: test_reduce_v4i64: 444 ; X86-SSE42: ## %bb.0: 445 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 446 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] 447 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 448 ; X86-SSE42-NEXT: pxor %xmm3, %xmm4 449 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 450 ; X86-SSE42-NEXT: pxor %xmm3, %xmm0 451 ; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 452 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 453 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 454 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 455 ; X86-SSE42-NEXT: pxor %xmm3, %xmm0 456 ; X86-SSE42-NEXT: pxor %xmm2, %xmm3 457 ; X86-SSE42-NEXT: pcmpgtq %xmm0, %xmm3 458 ; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 459 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 460 ; X86-SSE42-NEXT: movd %xmm2, %eax 461 ; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 462 ; X86-SSE42-NEXT: retl 463 ; 464 ; X86-AVX1-LABEL: test_reduce_v4i64: 465 ; X86-AVX1: ## %bb.0: 466 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 467 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 468 ; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 469 ; X86-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm4 470 ; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 471 ; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm4 472 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 473 ; X86-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 474 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 475 ; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 476 ; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4 477 ; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 478 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 479 ; X86-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2 480 ; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2 481 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 482 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 483 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 484 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 485 ; X86-AVX1-NEXT: vzeroupper 486 ; X86-AVX1-NEXT: retl 487 ; 488 ; X86-AVX2-LABEL: test_reduce_v4i64: 489 ; X86-AVX2: ## %bb.0: 490 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 491 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] 492 ; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 493 ; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4 494 ; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 495 ; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 496 ; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 497 ; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 498 ; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 499 ; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 500 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 501 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 502 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 503 ; X86-AVX2-NEXT: vzeroupper 504 ; X86-AVX2-NEXT: retl 505 ; 506 ; X64-SSE2-LABEL: test_reduce_v4i64: 507 ; X64-SSE2: ## %bb.0: 508 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 509 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 510 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3 511 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 512 ; X64-SSE2-NEXT: pxor %xmm2, %xmm4 513 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 514 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 515 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 516 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 517 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 518 ; X64-SSE2-NEXT: pand %xmm6, %xmm3 519 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 520 ; X64-SSE2-NEXT: por %xmm3, %xmm4 521 ; X64-SSE2-NEXT: pand %xmm4, %xmm0 522 ; X64-SSE2-NEXT: pandn %xmm1, %xmm4 523 ; X64-SSE2-NEXT: por %xmm0, %xmm4 524 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1] 525 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 526 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 527 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2 528 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 529 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 530 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 531 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 532 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 533 ; X64-SSE2-NEXT: pand %xmm5, %xmm1 534 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 535 ; X64-SSE2-NEXT: por %xmm1, %xmm2 536 ; X64-SSE2-NEXT: pand %xmm2, %xmm4 537 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 538 ; X64-SSE2-NEXT: por %xmm4, %xmm2 539 ; X64-SSE2-NEXT: movq %xmm2, %rax 540 ; X64-SSE2-NEXT: retq 541 ; 542 ; X64-SSE42-LABEL: test_reduce_v4i64: 543 ; X64-SSE42: ## %bb.0: 544 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 545 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 546 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 547 ; X64-SSE42-NEXT: pxor %xmm3, %xmm4 548 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 549 ; X64-SSE42-NEXT: pxor %xmm3, %xmm0 550 ; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 551 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 552 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 553 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 554 ; X64-SSE42-NEXT: pxor %xmm3, %xmm0 555 ; X64-SSE42-NEXT: pxor %xmm2, %xmm3 556 ; X64-SSE42-NEXT: pcmpgtq %xmm0, %xmm3 557 ; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 558 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 559 ; X64-SSE42-NEXT: movq %xmm2, %rax 560 ; X64-SSE42-NEXT: retq 561 ; 562 ; X64-AVX1-LABEL: test_reduce_v4i64: 563 ; X64-AVX1: ## %bb.0: 564 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 565 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 566 ; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 567 ; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm4 568 ; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 569 ; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm4 570 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 571 ; X64-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 572 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 573 ; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 574 ; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4 575 ; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 576 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 577 ; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2 578 ; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2 579 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 580 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 581 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 582 ; X64-AVX1-NEXT: vzeroupper 583 ; X64-AVX1-NEXT: retq 584 ; 585 ; X64-AVX2-LABEL: test_reduce_v4i64: 586 ; X64-AVX2: ## %bb.0: 587 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 588 ; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 589 ; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 590 ; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4 591 ; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 592 ; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 593 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 594 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 595 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 596 ; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 597 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 598 ; X64-AVX2-NEXT: vmovq %xmm0, %rax 599 ; X64-AVX2-NEXT: vzeroupper 600 ; X64-AVX2-NEXT: retq 601 ; 602 ; X64-AVX512-LABEL: test_reduce_v4i64: 603 ; X64-AVX512: ## %bb.0: 604 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 605 ; X64-AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm0 606 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 607 ; X64-AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm0 608 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 609 ; X64-AVX512-NEXT: vzeroupper 610 ; X64-AVX512-NEXT: retq 611 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 612 %2 = icmp ult <4 x i64> %a0, %1 613 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 614 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 615 %5 = icmp ult <4 x i64> %3, %4 616 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 617 %7 = extractelement <4 x i64> %6, i32 0 618 ret i64 %7 619 } 620 621 define i32 @test_reduce_v8i32(<8 x i32> %a0) { 622 ; X86-SSE2-LABEL: test_reduce_v8i32: 623 ; X86-SSE2: ## %bb.0: 624 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 625 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 626 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3 627 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 628 ; X86-SSE2-NEXT: pxor %xmm2, %xmm4 629 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 630 ; X86-SSE2-NEXT: pand %xmm4, %xmm0 631 ; X86-SSE2-NEXT: pandn %xmm1, %xmm4 632 ; X86-SSE2-NEXT: por %xmm0, %xmm4 633 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1] 634 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 635 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 636 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 637 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3 638 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 639 ; X86-SSE2-NEXT: pand %xmm3, %xmm4 640 ; X86-SSE2-NEXT: pandn %xmm0, %xmm3 641 ; X86-SSE2-NEXT: por %xmm4, %xmm3 642 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 643 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 644 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 645 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2 646 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 647 ; X86-SSE2-NEXT: pand %xmm2, %xmm3 648 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2 649 ; X86-SSE2-NEXT: por %xmm3, %xmm2 650 ; X86-SSE2-NEXT: movd %xmm2, %eax 651 ; X86-SSE2-NEXT: retl 652 ; 653 ; X86-SSE42-LABEL: test_reduce_v8i32: 654 ; X86-SSE42: ## %bb.0: 655 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0 656 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 657 ; X86-SSE42-NEXT: pminud %xmm0, %xmm1 658 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 659 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0 660 ; X86-SSE42-NEXT: movd %xmm0, %eax 661 ; X86-SSE42-NEXT: retl 662 ; 663 ; X86-AVX1-LABEL: test_reduce_v8i32: 664 ; X86-AVX1: ## %bb.0: 665 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 666 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 667 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 668 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 669 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 670 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 671 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 672 ; X86-AVX1-NEXT: vzeroupper 673 ; X86-AVX1-NEXT: retl 674 ; 675 ; X86-AVX2-LABEL: test_reduce_v8i32: 676 ; X86-AVX2: ## %bb.0: 677 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 678 ; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 679 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 680 ; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 681 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 682 ; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 683 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 684 ; X86-AVX2-NEXT: vzeroupper 685 ; X86-AVX2-NEXT: retl 686 ; 687 ; X64-SSE2-LABEL: test_reduce_v8i32: 688 ; X64-SSE2: ## %bb.0: 689 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 690 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 691 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3 692 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 693 ; X64-SSE2-NEXT: pxor %xmm2, %xmm4 694 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 695 ; X64-SSE2-NEXT: pand %xmm4, %xmm0 696 ; X64-SSE2-NEXT: pandn %xmm1, %xmm4 697 ; X64-SSE2-NEXT: por %xmm0, %xmm4 698 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1] 699 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 700 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 701 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 702 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3 703 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 704 ; X64-SSE2-NEXT: pand %xmm3, %xmm4 705 ; X64-SSE2-NEXT: pandn %xmm0, %xmm3 706 ; X64-SSE2-NEXT: por %xmm4, %xmm3 707 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 708 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm1 709 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 710 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2 711 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 712 ; X64-SSE2-NEXT: pand %xmm2, %xmm3 713 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2 714 ; X64-SSE2-NEXT: por %xmm3, %xmm2 715 ; X64-SSE2-NEXT: movd %xmm2, %eax 716 ; X64-SSE2-NEXT: retq 717 ; 718 ; X64-SSE42-LABEL: test_reduce_v8i32: 719 ; X64-SSE42: ## %bb.0: 720 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0 721 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 722 ; X64-SSE42-NEXT: pminud %xmm0, %xmm1 723 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 724 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0 725 ; X64-SSE42-NEXT: movd %xmm0, %eax 726 ; X64-SSE42-NEXT: retq 727 ; 728 ; X64-AVX1-LABEL: test_reduce_v8i32: 729 ; X64-AVX1: ## %bb.0: 730 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 731 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 732 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 733 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 734 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 735 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 736 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 737 ; X64-AVX1-NEXT: vzeroupper 738 ; X64-AVX1-NEXT: retq 739 ; 740 ; X64-AVX2-LABEL: test_reduce_v8i32: 741 ; X64-AVX2: ## %bb.0: 742 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 743 ; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 744 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 745 ; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 746 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 747 ; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 748 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 749 ; X64-AVX2-NEXT: vzeroupper 750 ; X64-AVX2-NEXT: retq 751 ; 752 ; X64-AVX512-LABEL: test_reduce_v8i32: 753 ; X64-AVX512: ## %bb.0: 754 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 755 ; X64-AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0 756 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 757 ; X64-AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0 758 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 759 ; X64-AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0 760 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 761 ; X64-AVX512-NEXT: vzeroupper 762 ; X64-AVX512-NEXT: retq 763 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 764 %2 = icmp ult <8 x i32> %a0, %1 765 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 766 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 767 %5 = icmp ult <8 x i32> %3, %4 768 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 769 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 770 %8 = icmp ult <8 x i32> %6, %7 771 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 772 %10 = extractelement <8 x i32> %9, i32 0 773 ret i32 %10 774 } 775 776 define i16 @test_reduce_v16i16(<16 x i16> %a0) { 777 ; X86-SSE2-LABEL: test_reduce_v16i16: 778 ; X86-SSE2: ## %bb.0: 779 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 780 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 781 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 782 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 783 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 784 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 785 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 786 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 787 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 788 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 789 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 790 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 791 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 792 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 793 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 794 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 795 ; X86-SSE2-NEXT: psrld $16, %xmm1 796 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 797 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 798 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 799 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 800 ; X86-SSE2-NEXT: movd %xmm1, %eax 801 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 802 ; X86-SSE2-NEXT: retl 803 ; 804 ; X86-SSE42-LABEL: test_reduce_v16i16: 805 ; X86-SSE42: ## %bb.0: 806 ; X86-SSE42-NEXT: pminuw %xmm1, %xmm0 807 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 808 ; X86-SSE42-NEXT: movd %xmm0, %eax 809 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 810 ; X86-SSE42-NEXT: retl 811 ; 812 ; X86-AVX1-LABEL: test_reduce_v16i16: 813 ; X86-AVX1: ## %bb.0: 814 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 815 ; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 816 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 817 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 818 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 819 ; X86-AVX1-NEXT: vzeroupper 820 ; X86-AVX1-NEXT: retl 821 ; 822 ; X86-AVX2-LABEL: test_reduce_v16i16: 823 ; X86-AVX2: ## %bb.0: 824 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 825 ; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 826 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 827 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 828 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 829 ; X86-AVX2-NEXT: vzeroupper 830 ; X86-AVX2-NEXT: retl 831 ; 832 ; X64-SSE2-LABEL: test_reduce_v16i16: 833 ; X64-SSE2: ## %bb.0: 834 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 835 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 836 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 837 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 838 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 839 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 840 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 841 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 842 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 843 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 844 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 845 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 846 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 847 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 848 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 849 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 850 ; X64-SSE2-NEXT: psrld $16, %xmm1 851 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 852 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 853 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 854 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 855 ; X64-SSE2-NEXT: movd %xmm1, %eax 856 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 857 ; X64-SSE2-NEXT: retq 858 ; 859 ; X64-SSE42-LABEL: test_reduce_v16i16: 860 ; X64-SSE42: ## %bb.0: 861 ; X64-SSE42-NEXT: pminuw %xmm1, %xmm0 862 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 863 ; X64-SSE42-NEXT: movd %xmm0, %eax 864 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 865 ; X64-SSE42-NEXT: retq 866 ; 867 ; X64-AVX1-LABEL: test_reduce_v16i16: 868 ; X64-AVX1: ## %bb.0: 869 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 870 ; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 871 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 872 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 873 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 874 ; X64-AVX1-NEXT: vzeroupper 875 ; X64-AVX1-NEXT: retq 876 ; 877 ; X64-AVX2-LABEL: test_reduce_v16i16: 878 ; X64-AVX2: ## %bb.0: 879 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 880 ; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 881 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 882 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 883 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 884 ; X64-AVX2-NEXT: vzeroupper 885 ; X64-AVX2-NEXT: retq 886 ; 887 ; X64-AVX512-LABEL: test_reduce_v16i16: 888 ; X64-AVX512: ## %bb.0: 889 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 890 ; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0 891 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 892 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 893 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 894 ; X64-AVX512-NEXT: vzeroupper 895 ; X64-AVX512-NEXT: retq 896 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 897 %2 = icmp ult <16 x i16> %a0, %1 898 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 899 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 900 %5 = icmp ult <16 x i16> %3, %4 901 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 902 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 903 %8 = icmp ult <16 x i16> %6, %7 904 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 905 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 906 %11 = icmp ult <16 x i16> %9, %10 907 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 908 %13 = extractelement <16 x i16> %12, i32 0 909 ret i16 %13 910 } 911 912 define i8 @test_reduce_v32i8(<32 x i8> %a0) { 913 ; X86-SSE2-LABEL: test_reduce_v32i8: 914 ; X86-SSE2: ## %bb.0: 915 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0 916 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 917 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1 918 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 919 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0 920 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 921 ; X86-SSE2-NEXT: psrld $16, %xmm1 922 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1 923 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 924 ; X86-SSE2-NEXT: psrlw $8, %xmm0 925 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0 926 ; X86-SSE2-NEXT: movd %xmm0, %eax 927 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 928 ; X86-SSE2-NEXT: retl 929 ; 930 ; X86-SSE42-LABEL: test_reduce_v32i8: 931 ; X86-SSE42: ## %bb.0: 932 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0 933 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 934 ; X86-SSE42-NEXT: psrlw $8, %xmm1 935 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 936 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 937 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax 938 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 939 ; X86-SSE42-NEXT: retl 940 ; 941 ; X86-AVX1-LABEL: test_reduce_v32i8: 942 ; X86-AVX1: ## %bb.0: 943 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 944 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 945 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 946 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 947 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 948 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax 949 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 950 ; X86-AVX1-NEXT: vzeroupper 951 ; X86-AVX1-NEXT: retl 952 ; 953 ; X86-AVX2-LABEL: test_reduce_v32i8: 954 ; X86-AVX2: ## %bb.0: 955 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 956 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 957 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 958 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 959 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 960 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax 961 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 962 ; X86-AVX2-NEXT: vzeroupper 963 ; X86-AVX2-NEXT: retl 964 ; 965 ; X64-SSE2-LABEL: test_reduce_v32i8: 966 ; X64-SSE2: ## %bb.0: 967 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0 968 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 969 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1 970 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 971 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0 972 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 973 ; X64-SSE2-NEXT: psrld $16, %xmm1 974 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1 975 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 976 ; X64-SSE2-NEXT: psrlw $8, %xmm0 977 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0 978 ; X64-SSE2-NEXT: movd %xmm0, %eax 979 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 980 ; X64-SSE2-NEXT: retq 981 ; 982 ; X64-SSE42-LABEL: test_reduce_v32i8: 983 ; X64-SSE42: ## %bb.0: 984 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0 985 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 986 ; X64-SSE42-NEXT: psrlw $8, %xmm1 987 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 988 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 989 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax 990 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 991 ; X64-SSE42-NEXT: retq 992 ; 993 ; X64-AVX1-LABEL: test_reduce_v32i8: 994 ; X64-AVX1: ## %bb.0: 995 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 996 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 997 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 998 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 999 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1000 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax 1001 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1002 ; X64-AVX1-NEXT: vzeroupper 1003 ; X64-AVX1-NEXT: retq 1004 ; 1005 ; X64-AVX2-LABEL: test_reduce_v32i8: 1006 ; X64-AVX2: ## %bb.0: 1007 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1008 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1009 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1010 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1011 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1012 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax 1013 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1014 ; X64-AVX2-NEXT: vzeroupper 1015 ; X64-AVX2-NEXT: retq 1016 ; 1017 ; X64-AVX512-LABEL: test_reduce_v32i8: 1018 ; X64-AVX512: ## %bb.0: 1019 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1020 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1021 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1022 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1023 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1024 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax 1025 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1026 ; X64-AVX512-NEXT: vzeroupper 1027 ; X64-AVX512-NEXT: retq 1028 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1029 %2 = icmp ult <32 x i8> %a0, %1 1030 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1031 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1032 %5 = icmp ult <32 x i8> %3, %4 1033 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1034 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1035 %8 = icmp ult <32 x i8> %6, %7 1036 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1037 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1038 %11 = icmp ult <32 x i8> %9, %10 1039 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1040 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1041 %14 = icmp ult <32 x i8> %12, %13 1042 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1043 %16 = extractelement <32 x i8> %15, i32 0 1044 ret i8 %16 1045 } 1046 1047 ; 1048 ; 512-bit Vectors 1049 ; 1050 1051 define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1052 ; X86-SSE2-LABEL: test_reduce_v8i64: 1053 ; X86-SSE2: ## %bb.0: 1054 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1055 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm5 1056 ; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1057 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1058 ; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1059 ; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1060 ; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1061 ; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1062 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1063 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1064 ; X86-SSE2-NEXT: pand %xmm5, %xmm6 1065 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1066 ; X86-SSE2-NEXT: por %xmm6, %xmm5 1067 ; X86-SSE2-NEXT: pand %xmm5, %xmm1 1068 ; X86-SSE2-NEXT: pandn %xmm3, %xmm5 1069 ; X86-SSE2-NEXT: por %xmm1, %xmm5 1070 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1071 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1072 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1073 ; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1074 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1075 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1076 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1077 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2] 1078 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1079 ; X86-SSE2-NEXT: pand %xmm1, %xmm3 1080 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] 1081 ; X86-SSE2-NEXT: por %xmm3, %xmm1 1082 ; X86-SSE2-NEXT: pand %xmm1, %xmm0 1083 ; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1084 ; X86-SSE2-NEXT: por %xmm0, %xmm1 1085 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1086 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1087 ; X86-SSE2-NEXT: movdqa %xmm5, %xmm2 1088 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1089 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1090 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1091 ; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1092 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1093 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1094 ; X86-SSE2-NEXT: pand %xmm0, %xmm2 1095 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1096 ; X86-SSE2-NEXT: por %xmm2, %xmm0 1097 ; X86-SSE2-NEXT: pand %xmm0, %xmm1 1098 ; X86-SSE2-NEXT: pandn %xmm5, %xmm0 1099 ; X86-SSE2-NEXT: por %xmm1, %xmm0 1100 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1101 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1102 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1103 ; X86-SSE2-NEXT: pxor %xmm1, %xmm4 1104 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm3 1105 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1106 ; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1107 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1108 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1109 ; X86-SSE2-NEXT: pand %xmm2, %xmm4 1110 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1111 ; X86-SSE2-NEXT: por %xmm4, %xmm2 1112 ; X86-SSE2-NEXT: pand %xmm2, %xmm0 1113 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1114 ; X86-SSE2-NEXT: por %xmm0, %xmm2 1115 ; X86-SSE2-NEXT: movd %xmm2, %eax 1116 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1117 ; X86-SSE2-NEXT: movd %xmm0, %edx 1118 ; X86-SSE2-NEXT: retl 1119 ; 1120 ; X86-SSE42-LABEL: test_reduce_v8i64: 1121 ; X86-SSE42: ## %bb.0: 1122 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm5 1123 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648] 1124 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm6 1125 ; X86-SSE42-NEXT: pxor %xmm4, %xmm6 1126 ; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1127 ; X86-SSE42-NEXT: pxor %xmm4, %xmm0 1128 ; X86-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1129 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1130 ; X86-SSE42-NEXT: movdqa %xmm5, %xmm1 1131 ; X86-SSE42-NEXT: pxor %xmm4, %xmm1 1132 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 1133 ; X86-SSE42-NEXT: pxor %xmm4, %xmm0 1134 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1135 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm5, %xmm2 1136 ; X86-SSE42-NEXT: movapd %xmm2, %xmm1 1137 ; X86-SSE42-NEXT: xorpd %xmm4, %xmm1 1138 ; X86-SSE42-NEXT: movapd %xmm3, %xmm0 1139 ; X86-SSE42-NEXT: xorpd %xmm4, %xmm0 1140 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1141 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1142 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1] 1143 ; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1144 ; X86-SSE42-NEXT: pxor %xmm4, %xmm0 1145 ; X86-SSE42-NEXT: pxor %xmm1, %xmm4 1146 ; X86-SSE42-NEXT: pcmpgtq %xmm0, %xmm4 1147 ; X86-SSE42-NEXT: movdqa %xmm4, %xmm0 1148 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1149 ; X86-SSE42-NEXT: movd %xmm1, %eax 1150 ; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1151 ; X86-SSE42-NEXT: retl 1152 ; 1153 ; X86-AVX1-LABEL: test_reduce_v8i64: 1154 ; X86-AVX1: ## %bb.0: 1155 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1156 ; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] 1157 ; X86-AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 1158 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1159 ; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 1160 ; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 1161 ; X86-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4 1162 ; X86-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5 1163 ; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 1164 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 1165 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1166 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1167 ; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 1168 ; X86-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4 1169 ; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 1170 ; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm4 1171 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 1172 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1173 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1174 ; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 1175 ; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4 1176 ; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 1177 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1178 ; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 1179 ; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 1180 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1181 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1182 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 1183 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1184 ; X86-AVX1-NEXT: vzeroupper 1185 ; X86-AVX1-NEXT: retl 1186 ; 1187 ; X86-AVX2-LABEL: test_reduce_v8i64: 1188 ; X86-AVX2: ## %bb.0: 1189 ; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] 1190 ; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 1191 ; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4 1192 ; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1193 ; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1194 ; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1195 ; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 1196 ; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4 1197 ; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1198 ; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1199 ; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1200 ; X86-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 1201 ; X86-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 1202 ; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 1203 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1204 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 1205 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1206 ; X86-AVX2-NEXT: vzeroupper 1207 ; X86-AVX2-NEXT: retl 1208 ; 1209 ; X64-SSE2-LABEL: test_reduce_v8i64: 1210 ; X64-SSE2: ## %bb.0: 1211 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1212 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm5 1213 ; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1214 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1215 ; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1216 ; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1217 ; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1218 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1219 ; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1220 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1221 ; X64-SSE2-NEXT: pand %xmm8, %xmm6 1222 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1223 ; X64-SSE2-NEXT: por %xmm6, %xmm5 1224 ; X64-SSE2-NEXT: pand %xmm5, %xmm1 1225 ; X64-SSE2-NEXT: pandn %xmm3, %xmm5 1226 ; X64-SSE2-NEXT: por %xmm1, %xmm5 1227 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1228 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1229 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1230 ; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1231 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1232 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1233 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1234 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1235 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1236 ; X64-SSE2-NEXT: pand %xmm7, %xmm1 1237 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1238 ; X64-SSE2-NEXT: por %xmm1, %xmm3 1239 ; X64-SSE2-NEXT: pand %xmm3, %xmm0 1240 ; X64-SSE2-NEXT: pandn %xmm2, %xmm3 1241 ; X64-SSE2-NEXT: por %xmm0, %xmm3 1242 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1243 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1244 ; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1245 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1246 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1247 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1248 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2] 1249 ; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1250 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1251 ; X64-SSE2-NEXT: pand %xmm6, %xmm0 1252 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1253 ; X64-SSE2-NEXT: por %xmm0, %xmm1 1254 ; X64-SSE2-NEXT: pand %xmm1, %xmm3 1255 ; X64-SSE2-NEXT: pandn %xmm5, %xmm1 1256 ; X64-SSE2-NEXT: por %xmm3, %xmm1 1257 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1258 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1259 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1260 ; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1261 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm3 1262 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1263 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1264 ; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1265 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1266 ; X64-SSE2-NEXT: pand %xmm5, %xmm2 1267 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1268 ; X64-SSE2-NEXT: por %xmm2, %xmm3 1269 ; X64-SSE2-NEXT: pand %xmm3, %xmm1 1270 ; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1271 ; X64-SSE2-NEXT: por %xmm1, %xmm3 1272 ; X64-SSE2-NEXT: movq %xmm3, %rax 1273 ; X64-SSE2-NEXT: retq 1274 ; 1275 ; X64-SSE42-LABEL: test_reduce_v8i64: 1276 ; X64-SSE42: ## %bb.0: 1277 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm5 1278 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 1279 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm6 1280 ; X64-SSE42-NEXT: pxor %xmm4, %xmm6 1281 ; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1282 ; X64-SSE42-NEXT: pxor %xmm4, %xmm0 1283 ; X64-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1284 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1285 ; X64-SSE42-NEXT: movdqa %xmm5, %xmm1 1286 ; X64-SSE42-NEXT: pxor %xmm4, %xmm1 1287 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 1288 ; X64-SSE42-NEXT: pxor %xmm4, %xmm0 1289 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1290 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm5, %xmm2 1291 ; X64-SSE42-NEXT: movapd %xmm2, %xmm1 1292 ; X64-SSE42-NEXT: xorpd %xmm4, %xmm1 1293 ; X64-SSE42-NEXT: movapd %xmm3, %xmm0 1294 ; X64-SSE42-NEXT: xorpd %xmm4, %xmm0 1295 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1296 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1297 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1] 1298 ; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1299 ; X64-SSE42-NEXT: pxor %xmm4, %xmm0 1300 ; X64-SSE42-NEXT: pxor %xmm1, %xmm4 1301 ; X64-SSE42-NEXT: pcmpgtq %xmm0, %xmm4 1302 ; X64-SSE42-NEXT: movdqa %xmm4, %xmm0 1303 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1304 ; X64-SSE42-NEXT: movq %xmm1, %rax 1305 ; X64-SSE42-NEXT: retq 1306 ; 1307 ; X64-AVX1-LABEL: test_reduce_v8i64: 1308 ; X64-AVX1: ## %bb.0: 1309 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1310 ; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 1311 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2 1312 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1313 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm4 1314 ; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 1315 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4 1316 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5 1317 ; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 1318 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 1319 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1320 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1321 ; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 1322 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4 1323 ; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 1324 ; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm4 1325 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 1326 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1327 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1328 ; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 1329 ; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4 1330 ; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 1331 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1332 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 1333 ; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 1334 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 1335 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1336 ; X64-AVX1-NEXT: vmovq %xmm0, %rax 1337 ; X64-AVX1-NEXT: vzeroupper 1338 ; X64-AVX1-NEXT: retq 1339 ; 1340 ; X64-AVX2-LABEL: test_reduce_v8i64: 1341 ; X64-AVX2: ## %bb.0: 1342 ; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1343 ; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 1344 ; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4 1345 ; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1346 ; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1347 ; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1348 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 1349 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm4 1350 ; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1351 ; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1352 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 1353 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 1354 ; X64-AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm2 1355 ; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 1356 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1357 ; X64-AVX2-NEXT: vmovq %xmm0, %rax 1358 ; X64-AVX2-NEXT: vzeroupper 1359 ; X64-AVX2-NEXT: retq 1360 ; 1361 ; X64-AVX512-LABEL: test_reduce_v8i64: 1362 ; X64-AVX512: ## %bb.0: 1363 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1364 ; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 1365 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1366 ; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 1367 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1368 ; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 1369 ; X64-AVX512-NEXT: vmovq %xmm0, %rax 1370 ; X64-AVX512-NEXT: vzeroupper 1371 ; X64-AVX512-NEXT: retq 1372 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1373 %2 = icmp ult <8 x i64> %a0, %1 1374 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1375 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1376 %5 = icmp ult <8 x i64> %3, %4 1377 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1378 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1379 %8 = icmp ult <8 x i64> %6, %7 1380 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1381 %10 = extractelement <8 x i64> %9, i32 0 1382 ret i64 %10 1383 } 1384 1385 define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1386 ; X86-SSE2-LABEL: test_reduce_v16i32: 1387 ; X86-SSE2: ## %bb.0: 1388 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1389 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm5 1390 ; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1391 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1392 ; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1393 ; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1394 ; X86-SSE2-NEXT: pand %xmm6, %xmm1 1395 ; X86-SSE2-NEXT: pandn %xmm3, %xmm6 1396 ; X86-SSE2-NEXT: por %xmm1, %xmm6 1397 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1398 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1399 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1400 ; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1401 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1402 ; X86-SSE2-NEXT: pand %xmm3, %xmm0 1403 ; X86-SSE2-NEXT: pandn %xmm2, %xmm3 1404 ; X86-SSE2-NEXT: por %xmm0, %xmm3 1405 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1406 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1407 ; X86-SSE2-NEXT: movdqa %xmm6, %xmm1 1408 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1409 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1410 ; X86-SSE2-NEXT: pand %xmm1, %xmm3 1411 ; X86-SSE2-NEXT: pandn %xmm6, %xmm1 1412 ; X86-SSE2-NEXT: por %xmm3, %xmm1 1413 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1414 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1415 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1416 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 1417 ; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1418 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1419 ; X86-SSE2-NEXT: pand %xmm3, %xmm1 1420 ; X86-SSE2-NEXT: pandn %xmm0, %xmm3 1421 ; X86-SSE2-NEXT: por %xmm1, %xmm3 1422 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 1423 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 1424 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1425 ; X86-SSE2-NEXT: pxor %xmm0, %xmm4 1426 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm4 1427 ; X86-SSE2-NEXT: pand %xmm4, %xmm3 1428 ; X86-SSE2-NEXT: pandn %xmm0, %xmm4 1429 ; X86-SSE2-NEXT: por %xmm3, %xmm4 1430 ; X86-SSE2-NEXT: movd %xmm4, %eax 1431 ; X86-SSE2-NEXT: retl 1432 ; 1433 ; X86-SSE42-LABEL: test_reduce_v16i32: 1434 ; X86-SSE42: ## %bb.0: 1435 ; X86-SSE42-NEXT: pminud %xmm3, %xmm1 1436 ; X86-SSE42-NEXT: pminud %xmm2, %xmm0 1437 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0 1438 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1439 ; X86-SSE42-NEXT: pminud %xmm0, %xmm1 1440 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1441 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0 1442 ; X86-SSE42-NEXT: movd %xmm0, %eax 1443 ; X86-SSE42-NEXT: retl 1444 ; 1445 ; X86-AVX1-LABEL: test_reduce_v16i32: 1446 ; X86-AVX1: ## %bb.0: 1447 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1448 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1449 ; X86-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1450 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1451 ; X86-AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0 1452 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1453 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1454 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1455 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1456 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 1457 ; X86-AVX1-NEXT: vzeroupper 1458 ; X86-AVX1-NEXT: retl 1459 ; 1460 ; X86-AVX2-LABEL: test_reduce_v16i32: 1461 ; X86-AVX2: ## %bb.0: 1462 ; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1463 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1464 ; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1465 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1466 ; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1467 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1468 ; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1469 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 1470 ; X86-AVX2-NEXT: vzeroupper 1471 ; X86-AVX2-NEXT: retl 1472 ; 1473 ; X64-SSE2-LABEL: test_reduce_v16i32: 1474 ; X64-SSE2: ## %bb.0: 1475 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1476 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm5 1477 ; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1478 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1479 ; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1480 ; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1481 ; X64-SSE2-NEXT: pand %xmm6, %xmm1 1482 ; X64-SSE2-NEXT: pandn %xmm3, %xmm6 1483 ; X64-SSE2-NEXT: por %xmm1, %xmm6 1484 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1485 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1486 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1487 ; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1488 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1489 ; X64-SSE2-NEXT: pand %xmm3, %xmm0 1490 ; X64-SSE2-NEXT: pandn %xmm2, %xmm3 1491 ; X64-SSE2-NEXT: por %xmm0, %xmm3 1492 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1493 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1494 ; X64-SSE2-NEXT: movdqa %xmm6, %xmm1 1495 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1496 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1497 ; X64-SSE2-NEXT: pand %xmm1, %xmm3 1498 ; X64-SSE2-NEXT: pandn %xmm6, %xmm1 1499 ; X64-SSE2-NEXT: por %xmm3, %xmm1 1500 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1501 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1502 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1503 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 1504 ; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1505 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1506 ; X64-SSE2-NEXT: pand %xmm3, %xmm1 1507 ; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1508 ; X64-SSE2-NEXT: por %xmm1, %xmm3 1509 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 1510 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm1 1511 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1512 ; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1513 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm4 1514 ; X64-SSE2-NEXT: pand %xmm4, %xmm3 1515 ; X64-SSE2-NEXT: pandn %xmm0, %xmm4 1516 ; X64-SSE2-NEXT: por %xmm3, %xmm4 1517 ; X64-SSE2-NEXT: movd %xmm4, %eax 1518 ; X64-SSE2-NEXT: retq 1519 ; 1520 ; X64-SSE42-LABEL: test_reduce_v16i32: 1521 ; X64-SSE42: ## %bb.0: 1522 ; X64-SSE42-NEXT: pminud %xmm3, %xmm1 1523 ; X64-SSE42-NEXT: pminud %xmm2, %xmm0 1524 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0 1525 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1526 ; X64-SSE42-NEXT: pminud %xmm0, %xmm1 1527 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1528 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0 1529 ; X64-SSE42-NEXT: movd %xmm0, %eax 1530 ; X64-SSE42-NEXT: retq 1531 ; 1532 ; X64-AVX1-LABEL: test_reduce_v16i32: 1533 ; X64-AVX1: ## %bb.0: 1534 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1535 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1536 ; X64-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1537 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1538 ; X64-AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0 1539 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1540 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1541 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1542 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1543 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 1544 ; X64-AVX1-NEXT: vzeroupper 1545 ; X64-AVX1-NEXT: retq 1546 ; 1547 ; X64-AVX2-LABEL: test_reduce_v16i32: 1548 ; X64-AVX2: ## %bb.0: 1549 ; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1550 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1551 ; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1552 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1553 ; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1554 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1555 ; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1556 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 1557 ; X64-AVX2-NEXT: vzeroupper 1558 ; X64-AVX2-NEXT: retq 1559 ; 1560 ; X64-AVX512-LABEL: test_reduce_v16i32: 1561 ; X64-AVX512: ## %bb.0: 1562 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1563 ; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0 1564 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1565 ; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0 1566 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1567 ; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0 1568 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1569 ; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0 1570 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 1571 ; X64-AVX512-NEXT: vzeroupper 1572 ; X64-AVX512-NEXT: retq 1573 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1574 %2 = icmp ult <16 x i32> %a0, %1 1575 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1576 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1577 %5 = icmp ult <16 x i32> %3, %4 1578 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1579 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1580 %8 = icmp ult <16 x i32> %6, %7 1581 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1582 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1583 %11 = icmp ult <16 x i32> %9, %10 1584 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1585 %13 = extractelement <16 x i32> %12, i32 0 1586 ret i32 %13 1587 } 1588 1589 define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1590 ; X86-SSE2-LABEL: test_reduce_v32i16: 1591 ; X86-SSE2: ## %bb.0: 1592 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768] 1593 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1594 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1595 ; X86-SSE2-NEXT: pminsw %xmm2, %xmm0 1596 ; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1597 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1598 ; X86-SSE2-NEXT: pminsw %xmm3, %xmm1 1599 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm2 1600 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1601 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 1602 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2 1603 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm2 1604 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1605 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 1606 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1607 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1608 ; X86-SSE2-NEXT: pminsw %xmm2, %xmm0 1609 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1610 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1611 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1612 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1613 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1614 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1615 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1616 ; X86-SSE2-NEXT: psrld $16, %xmm0 1617 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1618 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1619 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1620 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1621 ; X86-SSE2-NEXT: movd %xmm0, %eax 1622 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1623 ; X86-SSE2-NEXT: retl 1624 ; 1625 ; X86-SSE42-LABEL: test_reduce_v32i16: 1626 ; X86-SSE42: ## %bb.0: 1627 ; X86-SSE42-NEXT: pminuw %xmm3, %xmm1 1628 ; X86-SSE42-NEXT: pminuw %xmm2, %xmm0 1629 ; X86-SSE42-NEXT: pminuw %xmm1, %xmm0 1630 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1631 ; X86-SSE42-NEXT: movd %xmm0, %eax 1632 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1633 ; X86-SSE42-NEXT: retl 1634 ; 1635 ; X86-AVX1-LABEL: test_reduce_v32i16: 1636 ; X86-AVX1: ## %bb.0: 1637 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1638 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1639 ; X86-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2 1640 ; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1641 ; X86-AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0 1642 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1643 ; X86-AVX1-NEXT: vmovd %xmm0, %eax 1644 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1645 ; X86-AVX1-NEXT: vzeroupper 1646 ; X86-AVX1-NEXT: retl 1647 ; 1648 ; X86-AVX2-LABEL: test_reduce_v32i16: 1649 ; X86-AVX2: ## %bb.0: 1650 ; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1651 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1652 ; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1653 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1654 ; X86-AVX2-NEXT: vmovd %xmm0, %eax 1655 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1656 ; X86-AVX2-NEXT: vzeroupper 1657 ; X86-AVX2-NEXT: retl 1658 ; 1659 ; X64-SSE2-LABEL: test_reduce_v32i16: 1660 ; X64-SSE2: ## %bb.0: 1661 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768] 1662 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1663 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1664 ; X64-SSE2-NEXT: pminsw %xmm2, %xmm0 1665 ; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1666 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1667 ; X64-SSE2-NEXT: pminsw %xmm3, %xmm1 1668 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm2 1669 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1670 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 1671 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2 1672 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm2 1673 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1674 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 1675 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1676 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1677 ; X64-SSE2-NEXT: pminsw %xmm2, %xmm0 1678 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1679 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1680 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1681 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1682 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1683 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1684 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1685 ; X64-SSE2-NEXT: psrld $16, %xmm0 1686 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1687 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1688 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1689 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1690 ; X64-SSE2-NEXT: movd %xmm0, %eax 1691 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1692 ; X64-SSE2-NEXT: retq 1693 ; 1694 ; X64-SSE42-LABEL: test_reduce_v32i16: 1695 ; X64-SSE42: ## %bb.0: 1696 ; X64-SSE42-NEXT: pminuw %xmm3, %xmm1 1697 ; X64-SSE42-NEXT: pminuw %xmm2, %xmm0 1698 ; X64-SSE42-NEXT: pminuw %xmm1, %xmm0 1699 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1700 ; X64-SSE42-NEXT: movd %xmm0, %eax 1701 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1702 ; X64-SSE42-NEXT: retq 1703 ; 1704 ; X64-AVX1-LABEL: test_reduce_v32i16: 1705 ; X64-AVX1: ## %bb.0: 1706 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1707 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1708 ; X64-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2 1709 ; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1710 ; X64-AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0 1711 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1712 ; X64-AVX1-NEXT: vmovd %xmm0, %eax 1713 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1714 ; X64-AVX1-NEXT: vzeroupper 1715 ; X64-AVX1-NEXT: retq 1716 ; 1717 ; X64-AVX2-LABEL: test_reduce_v32i16: 1718 ; X64-AVX2: ## %bb.0: 1719 ; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1720 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1721 ; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1722 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1723 ; X64-AVX2-NEXT: vmovd %xmm0, %eax 1724 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1725 ; X64-AVX2-NEXT: vzeroupper 1726 ; X64-AVX2-NEXT: retq 1727 ; 1728 ; X64-AVX512-LABEL: test_reduce_v32i16: 1729 ; X64-AVX512: ## %bb.0: 1730 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1731 ; X64-AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1732 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1733 ; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1734 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1735 ; X64-AVX512-NEXT: vmovd %xmm0, %eax 1736 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1737 ; X64-AVX512-NEXT: vzeroupper 1738 ; X64-AVX512-NEXT: retq 1739 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1740 %2 = icmp ult <32 x i16> %a0, %1 1741 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1742 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1743 %5 = icmp ult <32 x i16> %3, %4 1744 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1745 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1746 %8 = icmp ult <32 x i16> %6, %7 1747 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1748 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1749 %11 = icmp ult <32 x i16> %9, %10 1750 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1751 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1752 %14 = icmp ult <32 x i16> %12, %13 1753 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1754 %16 = extractelement <32 x i16> %15, i32 0 1755 ret i16 %16 1756 } 1757 1758 define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1759 ; X86-SSE2-LABEL: test_reduce_v64i8: 1760 ; X86-SSE2: ## %bb.0: 1761 ; X86-SSE2-NEXT: pminub %xmm3, %xmm1 1762 ; X86-SSE2-NEXT: pminub %xmm2, %xmm0 1763 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0 1764 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1765 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1 1766 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1767 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0 1768 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1769 ; X86-SSE2-NEXT: psrld $16, %xmm1 1770 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1 1771 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1772 ; X86-SSE2-NEXT: psrlw $8, %xmm0 1773 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0 1774 ; X86-SSE2-NEXT: movd %xmm0, %eax 1775 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1776 ; X86-SSE2-NEXT: retl 1777 ; 1778 ; X86-SSE42-LABEL: test_reduce_v64i8: 1779 ; X86-SSE42: ## %bb.0: 1780 ; X86-SSE42-NEXT: pminub %xmm3, %xmm1 1781 ; X86-SSE42-NEXT: pminub %xmm2, %xmm0 1782 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0 1783 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 1784 ; X86-SSE42-NEXT: psrlw $8, %xmm1 1785 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 1786 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 1787 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax 1788 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1789 ; X86-SSE42-NEXT: retl 1790 ; 1791 ; X86-AVX1-LABEL: test_reduce_v64i8: 1792 ; X86-AVX1: ## %bb.0: 1793 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1794 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1795 ; X86-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2 1796 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1797 ; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1798 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1799 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1800 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1801 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax 1802 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1803 ; X86-AVX1-NEXT: vzeroupper 1804 ; X86-AVX1-NEXT: retl 1805 ; 1806 ; X86-AVX2-LABEL: test_reduce_v64i8: 1807 ; X86-AVX2: ## %bb.0: 1808 ; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0 1809 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1810 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1811 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1812 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1813 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1814 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax 1815 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1816 ; X86-AVX2-NEXT: vzeroupper 1817 ; X86-AVX2-NEXT: retl 1818 ; 1819 ; X64-SSE2-LABEL: test_reduce_v64i8: 1820 ; X64-SSE2: ## %bb.0: 1821 ; X64-SSE2-NEXT: pminub %xmm3, %xmm1 1822 ; X64-SSE2-NEXT: pminub %xmm2, %xmm0 1823 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0 1824 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1825 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1 1826 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1827 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0 1828 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1829 ; X64-SSE2-NEXT: psrld $16, %xmm1 1830 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1 1831 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1832 ; X64-SSE2-NEXT: psrlw $8, %xmm0 1833 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0 1834 ; X64-SSE2-NEXT: movd %xmm0, %eax 1835 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1836 ; X64-SSE2-NEXT: retq 1837 ; 1838 ; X64-SSE42-LABEL: test_reduce_v64i8: 1839 ; X64-SSE42: ## %bb.0: 1840 ; X64-SSE42-NEXT: pminub %xmm3, %xmm1 1841 ; X64-SSE42-NEXT: pminub %xmm2, %xmm0 1842 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0 1843 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 1844 ; X64-SSE42-NEXT: psrlw $8, %xmm1 1845 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 1846 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 1847 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax 1848 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1849 ; X64-SSE42-NEXT: retq 1850 ; 1851 ; X64-AVX1-LABEL: test_reduce_v64i8: 1852 ; X64-AVX1: ## %bb.0: 1853 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1854 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1855 ; X64-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2 1856 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1857 ; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1858 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1859 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1860 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1861 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax 1862 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1863 ; X64-AVX1-NEXT: vzeroupper 1864 ; X64-AVX1-NEXT: retq 1865 ; 1866 ; X64-AVX2-LABEL: test_reduce_v64i8: 1867 ; X64-AVX2: ## %bb.0: 1868 ; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0 1869 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1870 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1871 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1872 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1873 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1874 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax 1875 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1876 ; X64-AVX2-NEXT: vzeroupper 1877 ; X64-AVX2-NEXT: retq 1878 ; 1879 ; X64-AVX512-LABEL: test_reduce_v64i8: 1880 ; X64-AVX512: ## %bb.0: 1881 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1882 ; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0 1883 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1884 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1885 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1886 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1887 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1888 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax 1889 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1890 ; X64-AVX512-NEXT: vzeroupper 1891 ; X64-AVX512-NEXT: retq 1892 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1893 %2 = icmp ult <64 x i8> %a0, %1 1894 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1895 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1896 %5 = icmp ult <64 x i8> %3, %4 1897 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1898 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1899 %8 = icmp ult <64 x i8> %6, %7 1900 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1901 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1902 %11 = icmp ult <64 x i8> %9, %10 1903 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1904 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1905 %14 = icmp ult <64 x i8> %12, %13 1906 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1907 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1908 %17 = icmp ult <64 x i8> %15, %16 1909 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1910 %19 = extractelement <64 x i8> %18, i32 0 1911 ret i8 %19 1912 } 1913