1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6 7 ; PR33276 - https://bugs.llvm.org/show_bug.cgi?id=33276 8 ; If both operands of an unsigned icmp are known non-negative, then 9 ; we don't need to flip the sign bits in order to map to signed pcmpgt*. 10 11 define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) { 12 ; SSE-LABEL: ugt_v2i64: 13 ; SSE: # %bb.0: 14 ; SSE-NEXT: psrlq $1, %xmm0 15 ; SSE-NEXT: psrlq $1, %xmm1 16 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 17 ; SSE-NEXT: pxor %xmm2, %xmm1 18 ; SSE-NEXT: pxor %xmm2, %xmm0 19 ; SSE-NEXT: movdqa %xmm0, %xmm2 20 ; SSE-NEXT: pcmpgtd %xmm1, %xmm2 21 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 23 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24 ; SSE-NEXT: pand %xmm3, %xmm1 25 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26 ; SSE-NEXT: por %xmm1, %xmm0 27 ; SSE-NEXT: retq 28 ; 29 ; AVX-LABEL: ugt_v2i64: 30 ; AVX: # %bb.0: 31 ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 32 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 33 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 34 ; AVX-NEXT: retq 35 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> 36 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> 37 %cmp = icmp ugt <2 x i64> %sh1, %sh2 38 ret <2 x i1> %cmp 39 } 40 41 define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) { 42 ; SSE-LABEL: ult_v2i64: 43 ; SSE: # %bb.0: 44 ; SSE-NEXT: psrlq $1, %xmm0 45 ; SSE-NEXT: psrlq $1, %xmm1 46 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 47 ; SSE-NEXT: pxor %xmm2, %xmm0 48 ; SSE-NEXT: pxor %xmm2, %xmm1 49 ; SSE-NEXT: movdqa %xmm1, %xmm2 50 ; SSE-NEXT: pcmpgtd %xmm0, %xmm2 51 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 52 ; SSE-NEXT: pcmpeqd %xmm0, %xmm1 53 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 54 ; SSE-NEXT: pand %xmm3, %xmm1 55 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 56 ; SSE-NEXT: por %xmm1, %xmm0 57 ; SSE-NEXT: retq 58 ; 59 ; AVX-LABEL: ult_v2i64: 60 ; AVX: # %bb.0: 61 ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 62 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 63 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 64 ; AVX-NEXT: retq 65 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> 66 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> 67 %cmp = icmp ult <2 x i64> %sh1, %sh2 68 ret <2 x i1> %cmp 69 } 70 71 define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) { 72 ; SSE-LABEL: uge_v2i64: 73 ; SSE: # %bb.0: 74 ; SSE-NEXT: psrlq $1, %xmm0 75 ; SSE-NEXT: psrlq $1, %xmm1 76 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 77 ; SSE-NEXT: pxor %xmm2, %xmm0 78 ; SSE-NEXT: pxor %xmm2, %xmm1 79 ; SSE-NEXT: movdqa %xmm1, %xmm2 80 ; SSE-NEXT: pcmpgtd %xmm0, %xmm2 81 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 82 ; SSE-NEXT: pcmpeqd %xmm0, %xmm1 83 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 84 ; SSE-NEXT: pand %xmm3, %xmm0 85 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 86 ; SSE-NEXT: por %xmm0, %xmm1 87 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0 88 ; SSE-NEXT: pxor %xmm1, %xmm0 89 ; SSE-NEXT: retq 90 ; 91 ; AVX-LABEL: uge_v2i64: 92 ; AVX: # %bb.0: 93 ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 94 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 95 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 96 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 97 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 98 ; AVX-NEXT: retq 99 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> 100 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> 101 %cmp = icmp uge <2 x i64> %sh1, %sh2 102 ret <2 x i1> %cmp 103 } 104 105 define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) { 106 ; SSE-LABEL: ule_v2i64: 107 ; SSE: # %bb.0: 108 ; SSE-NEXT: psrlq $1, %xmm0 109 ; SSE-NEXT: psrlq $1, %xmm1 110 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 111 ; SSE-NEXT: pxor %xmm2, %xmm1 112 ; SSE-NEXT: pxor %xmm2, %xmm0 113 ; SSE-NEXT: movdqa %xmm0, %xmm2 114 ; SSE-NEXT: pcmpgtd %xmm1, %xmm2 115 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 116 ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 117 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 118 ; SSE-NEXT: pand %xmm3, %xmm0 119 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 120 ; SSE-NEXT: por %xmm0, %xmm1 121 ; SSE-NEXT: pcmpeqd %xmm0, %xmm0 122 ; SSE-NEXT: pxor %xmm1, %xmm0 123 ; SSE-NEXT: retq 124 ; 125 ; AVX-LABEL: ule_v2i64: 126 ; AVX: # %bb.0: 127 ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 128 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 129 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 130 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 131 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 132 ; AVX-NEXT: retq 133 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> 134 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> 135 %cmp = icmp ule <2 x i64> %sh1, %sh2 136 ret <2 x i1> %cmp 137 } 138 139 define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) { 140 ; SSE-LABEL: ugt_v4i32: 141 ; SSE: # %bb.0: 142 ; SSE-NEXT: psrld $1, %xmm0 143 ; SSE-NEXT: psrld $1, %xmm1 144 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 145 ; SSE-NEXT: retq 146 ; 147 ; AVX-LABEL: ugt_v4i32: 148 ; AVX: # %bb.0: 149 ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 150 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 151 ; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 152 ; AVX-NEXT: retq 153 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 154 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> 155 %cmp = icmp ugt <4 x i32> %sh1, %sh2 156 ret <4 x i1> %cmp 157 } 158 159 define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) { 160 ; SSE-LABEL: ult_v4i32: 161 ; SSE: # %bb.0: 162 ; SSE-NEXT: psrld $1, %xmm0 163 ; SSE-NEXT: psrld $1, %xmm1 164 ; SSE-NEXT: pcmpgtd %xmm0, %xmm1 165 ; SSE-NEXT: movdqa %xmm1, %xmm0 166 ; SSE-NEXT: retq 167 ; 168 ; AVX-LABEL: ult_v4i32: 169 ; AVX: # %bb.0: 170 ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 171 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 172 ; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 173 ; AVX-NEXT: retq 174 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 175 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> 176 %cmp = icmp ult <4 x i32> %sh1, %sh2 177 ret <4 x i1> %cmp 178 } 179 180 define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) { 181 ; SSE2-LABEL: uge_v4i32: 182 ; SSE2: # %bb.0: 183 ; SSE2-NEXT: psrld $1, %xmm0 184 ; SSE2-NEXT: psrld $1, %xmm1 185 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 186 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 187 ; SSE2-NEXT: pxor %xmm1, %xmm0 188 ; SSE2-NEXT: retq 189 ; 190 ; SSE41-LABEL: uge_v4i32: 191 ; SSE41: # %bb.0: 192 ; SSE41-NEXT: psrld $1, %xmm0 193 ; SSE41-NEXT: psrld $1, %xmm1 194 ; SSE41-NEXT: pmaxud %xmm0, %xmm1 195 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 196 ; SSE41-NEXT: retq 197 ; 198 ; AVX-LABEL: uge_v4i32: 199 ; AVX: # %bb.0: 200 ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 201 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 202 ; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm1 203 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 204 ; AVX-NEXT: retq 205 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 206 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> 207 %cmp = icmp uge <4 x i32> %sh1, %sh2 208 ret <4 x i1> %cmp 209 } 210 211 define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) { 212 ; SSE2-LABEL: ule_v4i32: 213 ; SSE2: # %bb.0: 214 ; SSE2-NEXT: psrld $1, %xmm0 215 ; SSE2-NEXT: psrld $1, %xmm1 216 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 217 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 218 ; SSE2-NEXT: pxor %xmm1, %xmm0 219 ; SSE2-NEXT: retq 220 ; 221 ; SSE41-LABEL: ule_v4i32: 222 ; SSE41: # %bb.0: 223 ; SSE41-NEXT: psrld $1, %xmm0 224 ; SSE41-NEXT: psrld $1, %xmm1 225 ; SSE41-NEXT: pminud %xmm0, %xmm1 226 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 227 ; SSE41-NEXT: retq 228 ; 229 ; AVX-LABEL: ule_v4i32: 230 ; AVX: # %bb.0: 231 ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 232 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 233 ; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm1 234 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 235 ; AVX-NEXT: retq 236 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 237 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> 238 %cmp = icmp ule <4 x i32> %sh1, %sh2 239 ret <4 x i1> %cmp 240 } 241 242 define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) { 243 ; SSE-LABEL: ugt_v8i16: 244 ; SSE: # %bb.0: 245 ; SSE-NEXT: psrlw $1, %xmm0 246 ; SSE-NEXT: psrlw $1, %xmm1 247 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 248 ; SSE-NEXT: retq 249 ; 250 ; AVX-LABEL: ugt_v8i16: 251 ; AVX: # %bb.0: 252 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 253 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 254 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 255 ; AVX-NEXT: retq 256 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 257 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 258 %cmp = icmp ugt <8 x i16> %sh1, %sh2 259 ret <8 x i1> %cmp 260 } 261 262 define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) { 263 ; SSE-LABEL: ult_v8i16: 264 ; SSE: # %bb.0: 265 ; SSE-NEXT: psrlw $1, %xmm0 266 ; SSE-NEXT: psrlw $1, %xmm1 267 ; SSE-NEXT: pcmpgtw %xmm0, %xmm1 268 ; SSE-NEXT: movdqa %xmm1, %xmm0 269 ; SSE-NEXT: retq 270 ; 271 ; AVX-LABEL: ult_v8i16: 272 ; AVX: # %bb.0: 273 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 274 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 275 ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 276 ; AVX-NEXT: retq 277 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 278 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 279 %cmp = icmp ult <8 x i16> %sh1, %sh2 280 ret <8 x i1> %cmp 281 } 282 283 define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) { 284 ; SSE2-LABEL: uge_v8i16: 285 ; SSE2: # %bb.0: 286 ; SSE2-NEXT: psrlw $1, %xmm0 287 ; SSE2-NEXT: psrlw $1, %xmm1 288 ; SSE2-NEXT: psubusw %xmm0, %xmm1 289 ; SSE2-NEXT: pxor %xmm0, %xmm0 290 ; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 291 ; SSE2-NEXT: retq 292 ; 293 ; SSE41-LABEL: uge_v8i16: 294 ; SSE41: # %bb.0: 295 ; SSE41-NEXT: psrlw $1, %xmm0 296 ; SSE41-NEXT: psrlw $1, %xmm1 297 ; SSE41-NEXT: pmaxuw %xmm0, %xmm1 298 ; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 299 ; SSE41-NEXT: retq 300 ; 301 ; AVX-LABEL: uge_v8i16: 302 ; AVX: # %bb.0: 303 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 304 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 305 ; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 306 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 307 ; AVX-NEXT: retq 308 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 309 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 310 %cmp = icmp uge <8 x i16> %sh1, %sh2 311 ret <8 x i1> %cmp 312 } 313 314 define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) { 315 ; SSE2-LABEL: ule_v8i16: 316 ; SSE2: # %bb.0: 317 ; SSE2-NEXT: psrlw $1, %xmm0 318 ; SSE2-NEXT: psrlw $1, %xmm1 319 ; SSE2-NEXT: psubusw %xmm1, %xmm0 320 ; SSE2-NEXT: pxor %xmm1, %xmm1 321 ; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 322 ; SSE2-NEXT: retq 323 ; 324 ; SSE41-LABEL: ule_v8i16: 325 ; SSE41: # %bb.0: 326 ; SSE41-NEXT: psrlw $1, %xmm0 327 ; SSE41-NEXT: psrlw $1, %xmm1 328 ; SSE41-NEXT: pminuw %xmm0, %xmm1 329 ; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 330 ; SSE41-NEXT: retq 331 ; 332 ; AVX-LABEL: ule_v8i16: 333 ; AVX: # %bb.0: 334 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 335 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 336 ; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm1 337 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 338 ; AVX-NEXT: retq 339 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 340 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 341 %cmp = icmp ule <8 x i16> %sh1, %sh2 342 ret <8 x i1> %cmp 343 } 344 345 define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) { 346 ; SSE-LABEL: ugt_v16i8: 347 ; SSE: # %bb.0: 348 ; SSE-NEXT: psrlw $1, %xmm0 349 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 350 ; SSE-NEXT: pand %xmm2, %xmm0 351 ; SSE-NEXT: psrlw $1, %xmm1 352 ; SSE-NEXT: pand %xmm2, %xmm1 353 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 354 ; SSE-NEXT: retq 355 ; 356 ; AVX-LABEL: ugt_v16i8: 357 ; AVX: # %bb.0: 358 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 359 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 360 ; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 361 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 362 ; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 363 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 364 ; AVX-NEXT: retq 365 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 366 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 367 %cmp = icmp ugt <16 x i8> %sh1, %sh2 368 ret <16 x i1> %cmp 369 } 370 371 define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) { 372 ; SSE-LABEL: ult_v16i8: 373 ; SSE: # %bb.0: 374 ; SSE-NEXT: psrlw $1, %xmm0 375 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 376 ; SSE-NEXT: pand %xmm2, %xmm0 377 ; SSE-NEXT: psrlw $1, %xmm1 378 ; SSE-NEXT: pand %xmm1, %xmm2 379 ; SSE-NEXT: pcmpgtb %xmm0, %xmm2 380 ; SSE-NEXT: movdqa %xmm2, %xmm0 381 ; SSE-NEXT: retq 382 ; 383 ; AVX-LABEL: ult_v16i8: 384 ; AVX: # %bb.0: 385 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 386 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 387 ; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 388 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 389 ; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 390 ; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 391 ; AVX-NEXT: retq 392 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 393 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 394 %cmp = icmp ult <16 x i8> %sh1, %sh2 395 ret <16 x i1> %cmp 396 } 397 398 define <16 x i1> @uge_v16i8(<16 x i8> %x, <16 x i8> %y) { 399 ; SSE-LABEL: uge_v16i8: 400 ; SSE: # %bb.0: 401 ; SSE-NEXT: psrlw $1, %xmm0 402 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 403 ; SSE-NEXT: pand %xmm2, %xmm0 404 ; SSE-NEXT: psrlw $1, %xmm1 405 ; SSE-NEXT: pand %xmm2, %xmm1 406 ; SSE-NEXT: pmaxub %xmm0, %xmm1 407 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0 408 ; SSE-NEXT: retq 409 ; 410 ; AVX-LABEL: uge_v16i8: 411 ; AVX: # %bb.0: 412 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 413 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 414 ; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 415 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 416 ; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 417 ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 418 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 419 ; AVX-NEXT: retq 420 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 421 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 422 %cmp = icmp uge <16 x i8> %sh1, %sh2 423 ret <16 x i1> %cmp 424 } 425 426 define <16 x i1> @ule_v16i8(<16 x i8> %x, <16 x i8> %y) { 427 ; SSE-LABEL: ule_v16i8: 428 ; SSE: # %bb.0: 429 ; SSE-NEXT: psrlw $1, %xmm0 430 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 431 ; SSE-NEXT: pand %xmm2, %xmm0 432 ; SSE-NEXT: psrlw $1, %xmm1 433 ; SSE-NEXT: pand %xmm2, %xmm1 434 ; SSE-NEXT: pminub %xmm0, %xmm1 435 ; SSE-NEXT: pcmpeqb %xmm1, %xmm0 436 ; SSE-NEXT: retq 437 ; 438 ; AVX-LABEL: ule_v16i8: 439 ; AVX: # %bb.0: 440 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 441 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 442 ; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 443 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 444 ; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 445 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm1 446 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 447 ; AVX-NEXT: retq 448 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 449 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 450 %cmp = icmp ule <16 x i8> %sh1, %sh2 451 ret <16 x i1> %cmp 452 } 453 454