1 /* 2 Copyright (c) 2010, Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef L 32 # define L(label) .L##label 33 #endif 34 35 #ifndef cfi_startproc 36 # define cfi_startproc .cfi_startproc 37 #endif 38 39 #ifndef cfi_endproc 40 # define cfi_endproc .cfi_endproc 41 #endif 42 43 #ifndef cfi_rel_offset 44 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45 #endif 46 47 #ifndef cfi_restore 48 # define cfi_restore(reg) .cfi_restore reg 49 #endif 50 51 #ifndef cfi_adjust_cfa_offset 52 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53 #endif 54 55 #ifndef cfi_remember_state 56 # define cfi_remember_state .cfi_remember_state 57 #endif 58 59 #ifndef cfi_restore_state 60 # define cfi_restore_state .cfi_restore_state 61 #endif 62 63 #ifndef ENTRY 64 # define ENTRY(name) \ 65 .type name, @function; \ 66 .globl name; \ 67 .p2align 4; \ 68 name: \ 69 cfi_startproc 70 #endif 71 72 #ifndef END 73 # define END(name) \ 74 cfi_endproc; \ 75 .size name, .-name 76 #endif 77 78 #define CFI_PUSH(REG) \ 79 cfi_adjust_cfa_offset (4); \ 80 cfi_rel_offset (REG, 0) 81 82 #define CFI_POP(REG) \ 83 cfi_adjust_cfa_offset (-4); \ 84 cfi_restore (REG) 85 86 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 87 #define POP(REG) popl REG; CFI_POP (REG) 88 89 #ifndef USE_AS_STRNCMP 90 # define STR1 4 91 # define STR2 STR1+4 92 # define RETURN ret 93 94 # define UPDATE_STRNCMP_COUNTER 95 #else 96 # define STR1 8 97 # define STR2 STR1+4 98 # define CNT STR2+4 99 # define RETURN POP (%ebp); ret; CFI_PUSH (%ebp) 100 101 # define UPDATE_STRNCMP_COUNTER \ 102 /* calculate left number to compare */ \ 103 mov $16, %esi; \ 104 sub %ecx, %esi; \ 105 cmp %esi, %ebp; \ 106 jbe L(more8byteseq); \ 107 sub %esi, %ebp 108 #endif 109 110 .section .text.ssse3,"ax",@progbits 111 ENTRY (ssse3_strcmp_latest) 112 #ifdef USE_AS_STRNCMP 113 PUSH (%ebp) 114 #endif 115 movl STR1(%esp), %edx 116 movl STR2(%esp), %eax 117 #ifdef USE_AS_STRNCMP 118 movl CNT(%esp), %ebp 119 cmp $16, %ebp 120 jb L(less16bytes_sncmp) 121 jmp L(more16bytes) 122 #endif 123 124 movzbl (%eax), %ecx 125 cmpb %cl, (%edx) 126 jne L(neq) 127 cmpl $0, %ecx 128 je L(eq) 129 130 movzbl 1(%eax), %ecx 131 cmpb %cl, 1(%edx) 132 jne L(neq) 133 cmpl $0, %ecx 134 je L(eq) 135 136 movzbl 2(%eax), %ecx 137 cmpb %cl, 2(%edx) 138 jne L(neq) 139 cmpl $0, %ecx 140 je L(eq) 141 142 movzbl 3(%eax), %ecx 143 cmpb %cl, 3(%edx) 144 jne L(neq) 145 cmpl $0, %ecx 146 je L(eq) 147 148 movzbl 4(%eax), %ecx 149 cmpb %cl, 4(%edx) 150 jne L(neq) 151 cmpl $0, %ecx 152 je L(eq) 153 154 movzbl 5(%eax), %ecx 155 cmpb %cl, 5(%edx) 156 jne L(neq) 157 cmpl $0, %ecx 158 je L(eq) 159 160 movzbl 6(%eax), %ecx 161 cmpb %cl, 6(%edx) 162 jne L(neq) 163 cmpl $0, %ecx 164 je L(eq) 165 166 movzbl 7(%eax), %ecx 167 cmpb %cl, 7(%edx) 168 jne L(neq) 169 cmpl $0, %ecx 170 je L(eq) 171 172 add $8, %edx 173 add $8, %eax 174 #ifdef USE_AS_STRNCMP 175 cmp $8, %ebp 176 lea -8(%ebp), %ebp 177 je L(eq) 178 L(more16bytes): 179 #endif 180 movl %edx, %ecx 181 and $0xfff, %ecx 182 cmp $0xff0, %ecx 183 ja L(crosspage) 184 mov %eax, %ecx 185 and $0xfff, %ecx 186 cmp $0xff0, %ecx 187 ja L(crosspage) 188 pxor %xmm0, %xmm0 189 movlpd (%eax), %xmm1 190 movlpd (%edx), %xmm2 191 movhpd 8(%eax), %xmm1 192 movhpd 8(%edx), %xmm2 193 pcmpeqb %xmm1, %xmm0 194 pcmpeqb %xmm2, %xmm1 195 psubb %xmm0, %xmm1 196 pmovmskb %xmm1, %ecx 197 sub $0xffff, %ecx 198 jnz L(less16bytes) 199 #ifdef USE_AS_STRNCMP 200 cmp $16, %ebp 201 lea -16(%ebp), %ebp 202 jbe L(eq) 203 #endif 204 add $16, %eax 205 add $16, %edx 206 207 L(crosspage): 208 209 PUSH (%ebx) 210 PUSH (%edi) 211 PUSH (%esi) 212 #ifdef USE_AS_STRNCMP 213 cfi_remember_state 214 #endif 215 216 movl %edx, %edi 217 movl %eax, %ecx 218 and $0xf, %ecx 219 and $0xf, %edi 220 xor %ecx, %eax 221 xor %edi, %edx 222 xor %ebx, %ebx 223 cmp %edi, %ecx 224 je L(ashr_0) 225 ja L(bigger) 226 or $0x20, %ebx 227 xchg %edx, %eax 228 xchg %ecx, %edi 229 L(bigger): 230 lea 15(%edi), %edi 231 sub %ecx, %edi 232 cmp $8, %edi 233 jle L(ashr_less_8) 234 cmp $14, %edi 235 je L(ashr_15) 236 cmp $13, %edi 237 je L(ashr_14) 238 cmp $12, %edi 239 je L(ashr_13) 240 cmp $11, %edi 241 je L(ashr_12) 242 cmp $10, %edi 243 je L(ashr_11) 244 cmp $9, %edi 245 je L(ashr_10) 246 L(ashr_less_8): 247 je L(ashr_9) 248 cmp $7, %edi 249 je L(ashr_8) 250 cmp $6, %edi 251 je L(ashr_7) 252 cmp $5, %edi 253 je L(ashr_6) 254 cmp $4, %edi 255 je L(ashr_5) 256 cmp $3, %edi 257 je L(ashr_4) 258 cmp $2, %edi 259 je L(ashr_3) 260 cmp $1, %edi 261 je L(ashr_2) 262 cmp $0, %edi 263 je L(ashr_1) 264 265 /* 266 * The following cases will be handled by ashr_0 267 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 268 * n(0~15) n(0~15) 15(15+ n-n) ashr_0 269 */ 270 .p2align 4 271 L(ashr_0): 272 mov $0xffff, %esi 273 movdqa (%eax), %xmm1 274 pxor %xmm0, %xmm0 275 pcmpeqb %xmm1, %xmm0 276 pcmpeqb (%edx), %xmm1 277 psubb %xmm0, %xmm1 278 pmovmskb %xmm1, %edi 279 shr %cl, %esi 280 shr %cl, %edi 281 sub %edi, %esi 282 mov %ecx, %edi 283 jne L(less32bytes) 284 UPDATE_STRNCMP_COUNTER 285 mov $0x10, %ebx 286 mov $0x10, %ecx 287 pxor %xmm0, %xmm0 288 .p2align 4 289 L(loop_ashr_0): 290 movdqa (%eax, %ecx), %xmm1 291 movdqa (%edx, %ecx), %xmm2 292 293 pcmpeqb %xmm1, %xmm0 294 pcmpeqb %xmm2, %xmm1 295 psubb %xmm0, %xmm1 296 pmovmskb %xmm1, %esi 297 sub $0xffff, %esi 298 jnz L(exit) 299 #ifdef USE_AS_STRNCMP 300 cmp $16, %ebp 301 lea -16(%ebp), %ebp 302 jbe L(more8byteseq) 303 #endif 304 add $16, %ecx 305 jmp L(loop_ashr_0) 306 307 /* 308 * The following cases will be handled by ashr_1 309 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 310 * n(15) n -15 0(15 +(n-15) - n) ashr_1 311 */ 312 .p2align 4 313 L(ashr_1): 314 mov $0xffff, %esi 315 pxor %xmm0, %xmm0 316 movdqa (%edx), %xmm2 317 movdqa (%eax), %xmm1 318 pcmpeqb %xmm1, %xmm0 319 pslldq $15, %xmm2 320 pcmpeqb %xmm1, %xmm2 321 psubb %xmm0, %xmm2 322 pmovmskb %xmm2, %edi 323 shr %cl, %esi 324 shr %cl, %edi 325 sub %edi, %esi 326 lea -15(%ecx), %edi 327 jnz L(less32bytes) 328 329 UPDATE_STRNCMP_COUNTER 330 331 movdqa (%edx), %xmm3 332 pxor %xmm0, %xmm0 333 mov $16, %ecx 334 or $1, %ebx 335 lea 1(%edx), %edi 336 and $0xfff, %edi 337 sub $0x1000, %edi 338 339 .p2align 4 340 L(loop_ashr_1): 341 add $16, %edi 342 jg L(nibble_ashr_1) 343 344 L(gobble_ashr_1): 345 movdqa (%eax, %ecx), %xmm1 346 movdqa (%edx, %ecx), %xmm2 347 movdqa %xmm2, %xmm4 348 349 palignr $1, %xmm3, %xmm2 350 351 pcmpeqb %xmm1, %xmm0 352 pcmpeqb %xmm2, %xmm1 353 psubb %xmm0, %xmm1 354 pmovmskb %xmm1, %esi 355 sub $0xffff, %esi 356 jnz L(exit) 357 #ifdef USE_AS_STRNCMP 358 cmp $16, %ebp 359 lea -16(%ebp), %ebp 360 jbe L(more8byteseq) 361 #endif 362 363 add $16, %ecx 364 movdqa %xmm4, %xmm3 365 366 add $16, %edi 367 jg L(nibble_ashr_1) 368 369 movdqa (%eax, %ecx), %xmm1 370 movdqa (%edx, %ecx), %xmm2 371 movdqa %xmm2, %xmm4 372 373 palignr $1, %xmm3, %xmm2 374 375 pcmpeqb %xmm1, %xmm0 376 pcmpeqb %xmm2, %xmm1 377 psubb %xmm0, %xmm1 378 pmovmskb %xmm1, %esi 379 sub $0xffff, %esi 380 jnz L(exit) 381 382 #ifdef USE_AS_STRNCMP 383 cmp $16, %ebp 384 lea -16(%ebp), %ebp 385 jbe L(more8byteseq) 386 #endif 387 add $16, %ecx 388 movdqa %xmm4, %xmm3 389 jmp L(loop_ashr_1) 390 391 .p2align 4 392 L(nibble_ashr_1): 393 pcmpeqb %xmm3, %xmm0 394 pmovmskb %xmm0, %esi 395 test $0xfffe, %esi 396 jnz L(ashr_1_exittail) 397 398 #ifdef USE_AS_STRNCMP 399 cmp $15, %ebp 400 jbe L(ashr_1_exittail) 401 #endif 402 pxor %xmm0, %xmm0 403 sub $0x1000, %edi 404 jmp L(gobble_ashr_1) 405 406 .p2align 4 407 L(ashr_1_exittail): 408 movdqa (%eax, %ecx), %xmm1 409 psrldq $1, %xmm0 410 psrldq $1, %xmm3 411 jmp L(aftertail) 412 413 /* 414 * The following cases will be handled by ashr_2 415 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 416 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 417 */ 418 .p2align 4 419 L(ashr_2): 420 mov $0xffff, %esi 421 pxor %xmm0, %xmm0 422 movdqa (%edx), %xmm2 423 movdqa (%eax), %xmm1 424 pcmpeqb %xmm1, %xmm0 425 pslldq $14, %xmm2 426 pcmpeqb %xmm1, %xmm2 427 psubb %xmm0, %xmm2 428 pmovmskb %xmm2, %edi 429 shr %cl, %esi 430 shr %cl, %edi 431 sub %edi, %esi 432 lea -14(%ecx), %edi 433 jnz L(less32bytes) 434 435 UPDATE_STRNCMP_COUNTER 436 437 movdqa (%edx), %xmm3 438 pxor %xmm0, %xmm0 439 mov $16, %ecx 440 or $2, %ebx 441 lea 2(%edx), %edi 442 and $0xfff, %edi 443 sub $0x1000, %edi 444 445 .p2align 4 446 L(loop_ashr_2): 447 add $16, %edi 448 jg L(nibble_ashr_2) 449 450 L(gobble_ashr_2): 451 movdqa (%eax, %ecx), %xmm1 452 movdqa (%edx, %ecx), %xmm2 453 movdqa %xmm2, %xmm4 454 455 palignr $2, %xmm3, %xmm2 456 457 pcmpeqb %xmm1, %xmm0 458 pcmpeqb %xmm2, %xmm1 459 psubb %xmm0, %xmm1 460 pmovmskb %xmm1, %esi 461 sub $0xffff, %esi 462 jnz L(exit) 463 464 #ifdef USE_AS_STRNCMP 465 cmp $16, %ebp 466 lea -16(%ebp), %ebp 467 jbe L(more8byteseq) 468 #endif 469 add $16, %ecx 470 movdqa %xmm4, %xmm3 471 472 add $16, %edi 473 jg L(nibble_ashr_2) 474 475 movdqa (%eax, %ecx), %xmm1 476 movdqa (%edx, %ecx), %xmm2 477 movdqa %xmm2, %xmm4 478 479 palignr $2, %xmm3, %xmm2 480 481 pcmpeqb %xmm1, %xmm0 482 pcmpeqb %xmm2, %xmm1 483 psubb %xmm0, %xmm1 484 pmovmskb %xmm1, %esi 485 sub $0xffff, %esi 486 jnz L(exit) 487 488 #ifdef USE_AS_STRNCMP 489 cmp $16, %ebp 490 lea -16(%ebp), %ebp 491 jbe L(more8byteseq) 492 #endif 493 add $16, %ecx 494 movdqa %xmm4, %xmm3 495 jmp L(loop_ashr_2) 496 497 .p2align 4 498 L(nibble_ashr_2): 499 pcmpeqb %xmm3, %xmm0 500 pmovmskb %xmm0, %esi 501 test $0xfffc, %esi 502 jnz L(ashr_2_exittail) 503 504 #ifdef USE_AS_STRNCMP 505 cmp $14, %ebp 506 jbe L(ashr_2_exittail) 507 #endif 508 509 pxor %xmm0, %xmm0 510 sub $0x1000, %edi 511 jmp L(gobble_ashr_2) 512 513 .p2align 4 514 L(ashr_2_exittail): 515 movdqa (%eax, %ecx), %xmm1 516 psrldq $2, %xmm0 517 psrldq $2, %xmm3 518 jmp L(aftertail) 519 520 /* 521 * The following cases will be handled by ashr_3 522 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 523 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 524 */ 525 .p2align 4 526 L(ashr_3): 527 mov $0xffff, %esi 528 pxor %xmm0, %xmm0 529 movdqa (%edx), %xmm2 530 movdqa (%eax), %xmm1 531 pcmpeqb %xmm1, %xmm0 532 pslldq $13, %xmm2 533 pcmpeqb %xmm1, %xmm2 534 psubb %xmm0, %xmm2 535 pmovmskb %xmm2, %edi 536 shr %cl, %esi 537 shr %cl, %edi 538 sub %edi, %esi 539 lea -13(%ecx), %edi 540 jnz L(less32bytes) 541 542 UPDATE_STRNCMP_COUNTER 543 544 movdqa (%edx), %xmm3 545 pxor %xmm0, %xmm0 546 mov $16, %ecx 547 or $3, %ebx 548 lea 3(%edx), %edi 549 and $0xfff, %edi 550 sub $0x1000, %edi 551 552 .p2align 4 553 L(loop_ashr_3): 554 add $16, %edi 555 jg L(nibble_ashr_3) 556 557 L(gobble_ashr_3): 558 movdqa (%eax, %ecx), %xmm1 559 movdqa (%edx, %ecx), %xmm2 560 movdqa %xmm2, %xmm4 561 562 palignr $3, %xmm3, %xmm2 563 564 pcmpeqb %xmm1, %xmm0 565 pcmpeqb %xmm2, %xmm1 566 psubb %xmm0, %xmm1 567 pmovmskb %xmm1, %esi 568 sub $0xffff, %esi 569 jnz L(exit) 570 571 #ifdef USE_AS_STRNCMP 572 cmp $16, %ebp 573 lea -16(%ebp), %ebp 574 jbe L(more8byteseq) 575 #endif 576 add $16, %ecx 577 movdqa %xmm4, %xmm3 578 579 add $16, %edi 580 jg L(nibble_ashr_3) 581 582 movdqa (%eax, %ecx), %xmm1 583 movdqa (%edx, %ecx), %xmm2 584 movdqa %xmm2, %xmm4 585 586 palignr $3, %xmm3, %xmm2 587 588 pcmpeqb %xmm1, %xmm0 589 pcmpeqb %xmm2, %xmm1 590 psubb %xmm0, %xmm1 591 pmovmskb %xmm1, %esi 592 sub $0xffff, %esi 593 jnz L(exit) 594 595 #ifdef USE_AS_STRNCMP 596 cmp $16, %ebp 597 lea -16(%ebp), %ebp 598 jbe L(more8byteseq) 599 #endif 600 add $16, %ecx 601 movdqa %xmm4, %xmm3 602 jmp L(loop_ashr_3) 603 604 .p2align 4 605 L(nibble_ashr_3): 606 pcmpeqb %xmm3, %xmm0 607 pmovmskb %xmm0, %esi 608 test $0xfff8, %esi 609 jnz L(ashr_3_exittail) 610 611 #ifdef USE_AS_STRNCMP 612 cmp $13, %ebp 613 jbe L(ashr_3_exittail) 614 #endif 615 pxor %xmm0, %xmm0 616 sub $0x1000, %edi 617 jmp L(gobble_ashr_3) 618 619 .p2align 4 620 L(ashr_3_exittail): 621 movdqa (%eax, %ecx), %xmm1 622 psrldq $3, %xmm0 623 psrldq $3, %xmm3 624 jmp L(aftertail) 625 626 /* 627 * The following cases will be handled by ashr_4 628 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 629 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 630 */ 631 .p2align 4 632 L(ashr_4): 633 mov $0xffff, %esi 634 pxor %xmm0, %xmm0 635 movdqa (%edx), %xmm2 636 movdqa (%eax), %xmm1 637 pcmpeqb %xmm1, %xmm0 638 pslldq $12, %xmm2 639 pcmpeqb %xmm1, %xmm2 640 psubb %xmm0, %xmm2 641 pmovmskb %xmm2, %edi 642 shr %cl, %esi 643 shr %cl, %edi 644 sub %edi, %esi 645 lea -12(%ecx), %edi 646 jnz L(less32bytes) 647 648 UPDATE_STRNCMP_COUNTER 649 650 movdqa (%edx), %xmm3 651 pxor %xmm0, %xmm0 652 mov $16, %ecx 653 or $4, %ebx 654 lea 4(%edx), %edi 655 and $0xfff, %edi 656 sub $0x1000, %edi 657 658 .p2align 4 659 L(loop_ashr_4): 660 add $16, %edi 661 jg L(nibble_ashr_4) 662 663 L(gobble_ashr_4): 664 movdqa (%eax, %ecx), %xmm1 665 movdqa (%edx, %ecx), %xmm2 666 movdqa %xmm2, %xmm4 667 668 palignr $4, %xmm3, %xmm2 669 670 pcmpeqb %xmm1, %xmm0 671 pcmpeqb %xmm2, %xmm1 672 psubb %xmm0, %xmm1 673 pmovmskb %xmm1, %esi 674 sub $0xffff, %esi 675 jnz L(exit) 676 677 #ifdef USE_AS_STRNCMP 678 cmp $16, %ebp 679 lea -16(%ebp), %ebp 680 jbe L(more8byteseq) 681 #endif 682 683 add $16, %ecx 684 movdqa %xmm4, %xmm3 685 686 add $16, %edi 687 jg L(nibble_ashr_4) 688 689 movdqa (%eax, %ecx), %xmm1 690 movdqa (%edx, %ecx), %xmm2 691 movdqa %xmm2, %xmm4 692 693 palignr $4, %xmm3, %xmm2 694 695 pcmpeqb %xmm1, %xmm0 696 pcmpeqb %xmm2, %xmm1 697 psubb %xmm0, %xmm1 698 pmovmskb %xmm1, %esi 699 sub $0xffff, %esi 700 jnz L(exit) 701 702 #ifdef USE_AS_STRNCMP 703 cmp $16, %ebp 704 lea -16(%ebp), %ebp 705 jbe L(more8byteseq) 706 #endif 707 708 add $16, %ecx 709 movdqa %xmm4, %xmm3 710 jmp L(loop_ashr_4) 711 712 .p2align 4 713 L(nibble_ashr_4): 714 pcmpeqb %xmm3, %xmm0 715 pmovmskb %xmm0, %esi 716 test $0xfff0, %esi 717 jnz L(ashr_4_exittail) 718 719 #ifdef USE_AS_STRNCMP 720 cmp $12, %ebp 721 jbe L(ashr_4_exittail) 722 #endif 723 724 pxor %xmm0, %xmm0 725 sub $0x1000, %edi 726 jmp L(gobble_ashr_4) 727 728 .p2align 4 729 L(ashr_4_exittail): 730 movdqa (%eax, %ecx), %xmm1 731 psrldq $4, %xmm0 732 psrldq $4, %xmm3 733 jmp L(aftertail) 734 735 /* 736 * The following cases will be handled by ashr_5 737 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 738 * n(11~15) n -11 4(15 +(n-11) - n) ashr_5 739 */ 740 .p2align 4 741 L(ashr_5): 742 mov $0xffff, %esi 743 pxor %xmm0, %xmm0 744 movdqa (%edx), %xmm2 745 movdqa (%eax), %xmm1 746 pcmpeqb %xmm1, %xmm0 747 pslldq $11, %xmm2 748 pcmpeqb %xmm1, %xmm2 749 psubb %xmm0, %xmm2 750 pmovmskb %xmm2, %edi 751 shr %cl, %esi 752 shr %cl, %edi 753 sub %edi, %esi 754 lea -11(%ecx), %edi 755 jnz L(less32bytes) 756 757 UPDATE_STRNCMP_COUNTER 758 759 movdqa (%edx), %xmm3 760 pxor %xmm0, %xmm0 761 mov $16, %ecx 762 or $5, %ebx 763 lea 5(%edx), %edi 764 and $0xfff, %edi 765 sub $0x1000, %edi 766 767 .p2align 4 768 L(loop_ashr_5): 769 add $16, %edi 770 jg L(nibble_ashr_5) 771 772 L(gobble_ashr_5): 773 movdqa (%eax, %ecx), %xmm1 774 movdqa (%edx, %ecx), %xmm2 775 movdqa %xmm2, %xmm4 776 777 palignr $5, %xmm3, %xmm2 778 779 pcmpeqb %xmm1, %xmm0 780 pcmpeqb %xmm2, %xmm1 781 psubb %xmm0, %xmm1 782 pmovmskb %xmm1, %esi 783 sub $0xffff, %esi 784 jnz L(exit) 785 786 #ifdef USE_AS_STRNCMP 787 cmp $16, %ebp 788 lea -16(%ebp), %ebp 789 jbe L(more8byteseq) 790 #endif 791 add $16, %ecx 792 movdqa %xmm4, %xmm3 793 794 add $16, %edi 795 jg L(nibble_ashr_5) 796 797 movdqa (%eax, %ecx), %xmm1 798 movdqa (%edx, %ecx), %xmm2 799 movdqa %xmm2, %xmm4 800 801 palignr $5, %xmm3, %xmm2 802 803 pcmpeqb %xmm1, %xmm0 804 pcmpeqb %xmm2, %xmm1 805 psubb %xmm0, %xmm1 806 pmovmskb %xmm1, %esi 807 sub $0xffff, %esi 808 jnz L(exit) 809 810 #ifdef USE_AS_STRNCMP 811 cmp $16, %ebp 812 lea -16(%ebp), %ebp 813 jbe L(more8byteseq) 814 #endif 815 add $16, %ecx 816 movdqa %xmm4, %xmm3 817 jmp L(loop_ashr_5) 818 819 .p2align 4 820 L(nibble_ashr_5): 821 pcmpeqb %xmm3, %xmm0 822 pmovmskb %xmm0, %esi 823 test $0xffe0, %esi 824 jnz L(ashr_5_exittail) 825 826 #ifdef USE_AS_STRNCMP 827 cmp $11, %ebp 828 jbe L(ashr_5_exittail) 829 #endif 830 pxor %xmm0, %xmm0 831 sub $0x1000, %edi 832 jmp L(gobble_ashr_5) 833 834 .p2align 4 835 L(ashr_5_exittail): 836 movdqa (%eax, %ecx), %xmm1 837 psrldq $5, %xmm0 838 psrldq $5, %xmm3 839 jmp L(aftertail) 840 841 /* 842 * The following cases will be handled by ashr_6 843 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 844 * n(10~15) n -10 5(15 +(n-10) - n) ashr_6 845 */ 846 847 .p2align 4 848 L(ashr_6): 849 mov $0xffff, %esi 850 pxor %xmm0, %xmm0 851 movdqa (%edx), %xmm2 852 movdqa (%eax), %xmm1 853 pcmpeqb %xmm1, %xmm0 854 pslldq $10, %xmm2 855 pcmpeqb %xmm1, %xmm2 856 psubb %xmm0, %xmm2 857 pmovmskb %xmm2, %edi 858 shr %cl, %esi 859 shr %cl, %edi 860 sub %edi, %esi 861 lea -10(%ecx), %edi 862 jnz L(less32bytes) 863 864 UPDATE_STRNCMP_COUNTER 865 866 movdqa (%edx), %xmm3 867 pxor %xmm0, %xmm0 868 mov $16, %ecx 869 or $6, %ebx 870 lea 6(%edx), %edi 871 and $0xfff, %edi 872 sub $0x1000, %edi 873 874 .p2align 4 875 L(loop_ashr_6): 876 add $16, %edi 877 jg L(nibble_ashr_6) 878 879 L(gobble_ashr_6): 880 movdqa (%eax, %ecx), %xmm1 881 movdqa (%edx, %ecx), %xmm2 882 movdqa %xmm2, %xmm4 883 884 palignr $6, %xmm3, %xmm2 885 886 pcmpeqb %xmm1, %xmm0 887 pcmpeqb %xmm2, %xmm1 888 psubb %xmm0, %xmm1 889 pmovmskb %xmm1, %esi 890 sub $0xffff, %esi 891 jnz L(exit) 892 893 #ifdef USE_AS_STRNCMP 894 cmp $16, %ebp 895 lea -16(%ebp), %ebp 896 jbe L(more8byteseq) 897 #endif 898 899 add $16, %ecx 900 movdqa %xmm4, %xmm3 901 902 add $16, %edi 903 jg L(nibble_ashr_6) 904 905 movdqa (%eax, %ecx), %xmm1 906 movdqa (%edx, %ecx), %xmm2 907 movdqa %xmm2, %xmm4 908 909 palignr $6, %xmm3, %xmm2 910 911 pcmpeqb %xmm1, %xmm0 912 pcmpeqb %xmm2, %xmm1 913 psubb %xmm0, %xmm1 914 pmovmskb %xmm1, %esi 915 sub $0xffff, %esi 916 jnz L(exit) 917 #ifdef USE_AS_STRNCMP 918 cmp $16, %ebp 919 lea -16(%ebp), %ebp 920 jbe L(more8byteseq) 921 #endif 922 923 add $16, %ecx 924 movdqa %xmm4, %xmm3 925 jmp L(loop_ashr_6) 926 927 .p2align 4 928 L(nibble_ashr_6): 929 pcmpeqb %xmm3, %xmm0 930 pmovmskb %xmm0, %esi 931 test $0xffc0, %esi 932 jnz L(ashr_6_exittail) 933 934 #ifdef USE_AS_STRNCMP 935 cmp $10, %ebp 936 jbe L(ashr_6_exittail) 937 #endif 938 pxor %xmm0, %xmm0 939 sub $0x1000, %edi 940 jmp L(gobble_ashr_6) 941 942 .p2align 4 943 L(ashr_6_exittail): 944 movdqa (%eax, %ecx), %xmm1 945 psrldq $6, %xmm0 946 psrldq $6, %xmm3 947 jmp L(aftertail) 948 949 /* 950 * The following cases will be handled by ashr_7 951 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 952 * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7 953 */ 954 955 .p2align 4 956 L(ashr_7): 957 mov $0xffff, %esi 958 pxor %xmm0, %xmm0 959 movdqa (%edx), %xmm2 960 movdqa (%eax), %xmm1 961 pcmpeqb %xmm1, %xmm0 962 pslldq $9, %xmm2 963 pcmpeqb %xmm1, %xmm2 964 psubb %xmm0, %xmm2 965 pmovmskb %xmm2, %edi 966 shr %cl, %esi 967 shr %cl, %edi 968 sub %edi, %esi 969 lea -9(%ecx), %edi 970 jnz L(less32bytes) 971 972 UPDATE_STRNCMP_COUNTER 973 974 movdqa (%edx), %xmm3 975 pxor %xmm0, %xmm0 976 mov $16, %ecx 977 or $7, %ebx 978 lea 8(%edx), %edi 979 and $0xfff, %edi 980 sub $0x1000, %edi 981 982 .p2align 4 983 L(loop_ashr_7): 984 add $16, %edi 985 jg L(nibble_ashr_7) 986 987 L(gobble_ashr_7): 988 movdqa (%eax, %ecx), %xmm1 989 movdqa (%edx, %ecx), %xmm2 990 movdqa %xmm2, %xmm4 991 992 palignr $7, %xmm3, %xmm2 993 994 pcmpeqb %xmm1, %xmm0 995 pcmpeqb %xmm2, %xmm1 996 psubb %xmm0, %xmm1 997 pmovmskb %xmm1, %esi 998 sub $0xffff, %esi 999 jnz L(exit) 1000 1001 #ifdef USE_AS_STRNCMP 1002 cmp $16, %ebp 1003 lea -16(%ebp), %ebp 1004 jbe L(more8byteseq) 1005 #endif 1006 1007 add $16, %ecx 1008 movdqa %xmm4, %xmm3 1009 1010 add $16, %edi 1011 jg L(nibble_ashr_7) 1012 1013 movdqa (%eax, %ecx), %xmm1 1014 movdqa (%edx, %ecx), %xmm2 1015 movdqa %xmm2, %xmm4 1016 1017 palignr $7, %xmm3, %xmm2 1018 1019 pcmpeqb %xmm1, %xmm0 1020 pcmpeqb %xmm2, %xmm1 1021 psubb %xmm0, %xmm1 1022 pmovmskb %xmm1, %esi 1023 sub $0xffff, %esi 1024 jnz L(exit) 1025 1026 #ifdef USE_AS_STRNCMP 1027 cmp $16, %ebp 1028 lea -16(%ebp), %ebp 1029 jbe L(more8byteseq) 1030 #endif 1031 1032 add $16, %ecx 1033 movdqa %xmm4, %xmm3 1034 jmp L(loop_ashr_7) 1035 1036 .p2align 4 1037 L(nibble_ashr_7): 1038 pcmpeqb %xmm3, %xmm0 1039 pmovmskb %xmm0, %esi 1040 test $0xff80, %esi 1041 jnz L(ashr_7_exittail) 1042 1043 #ifdef USE_AS_STRNCMP 1044 cmp $9, %ebp 1045 jbe L(ashr_7_exittail) 1046 #endif 1047 pxor %xmm0, %xmm0 1048 pxor %xmm0, %xmm0 1049 sub $0x1000, %edi 1050 jmp L(gobble_ashr_7) 1051 1052 .p2align 4 1053 L(ashr_7_exittail): 1054 movdqa (%eax, %ecx), %xmm1 1055 psrldq $7, %xmm0 1056 psrldq $7, %xmm3 1057 jmp L(aftertail) 1058 1059 /* 1060 * The following cases will be handled by ashr_8 1061 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1062 * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8 1063 */ 1064 .p2align 4 1065 L(ashr_8): 1066 mov $0xffff, %esi 1067 pxor %xmm0, %xmm0 1068 movdqa (%edx), %xmm2 1069 movdqa (%eax), %xmm1 1070 pcmpeqb %xmm1, %xmm0 1071 pslldq $8, %xmm2 1072 pcmpeqb %xmm1, %xmm2 1073 psubb %xmm0, %xmm2 1074 pmovmskb %xmm2, %edi 1075 shr %cl, %esi 1076 shr %cl, %edi 1077 sub %edi, %esi 1078 lea -8(%ecx), %edi 1079 jnz L(less32bytes) 1080 1081 UPDATE_STRNCMP_COUNTER 1082 1083 movdqa (%edx), %xmm3 1084 pxor %xmm0, %xmm0 1085 mov $16, %ecx 1086 or $8, %ebx 1087 lea 8(%edx), %edi 1088 and $0xfff, %edi 1089 sub $0x1000, %edi 1090 1091 .p2align 4 1092 L(loop_ashr_8): 1093 add $16, %edi 1094 jg L(nibble_ashr_8) 1095 1096 L(gobble_ashr_8): 1097 movdqa (%eax, %ecx), %xmm1 1098 movdqa (%edx, %ecx), %xmm2 1099 movdqa %xmm2, %xmm4 1100 1101 palignr $8, %xmm3, %xmm2 1102 1103 pcmpeqb %xmm1, %xmm0 1104 pcmpeqb %xmm2, %xmm1 1105 psubb %xmm0, %xmm1 1106 pmovmskb %xmm1, %esi 1107 sub $0xffff, %esi 1108 jnz L(exit) 1109 1110 #ifdef USE_AS_STRNCMP 1111 cmp $16, %ebp 1112 lea -16(%ebp), %ebp 1113 jbe L(more8byteseq) 1114 #endif 1115 add $16, %ecx 1116 movdqa %xmm4, %xmm3 1117 1118 add $16, %edi 1119 jg L(nibble_ashr_8) 1120 1121 movdqa (%eax, %ecx), %xmm1 1122 movdqa (%edx, %ecx), %xmm2 1123 movdqa %xmm2, %xmm4 1124 1125 palignr $8, %xmm3, %xmm2 1126 1127 pcmpeqb %xmm1, %xmm0 1128 pcmpeqb %xmm2, %xmm1 1129 psubb %xmm0, %xmm1 1130 pmovmskb %xmm1, %esi 1131 sub $0xffff, %esi 1132 jnz L(exit) 1133 1134 #ifdef USE_AS_STRNCMP 1135 cmp $16, %ebp 1136 lea -16(%ebp), %ebp 1137 jbe L(more8byteseq) 1138 #endif 1139 add $16, %ecx 1140 movdqa %xmm4, %xmm3 1141 jmp L(loop_ashr_8) 1142 1143 .p2align 4 1144 L(nibble_ashr_8): 1145 pcmpeqb %xmm3, %xmm0 1146 pmovmskb %xmm0, %esi 1147 test $0xff00, %esi 1148 jnz L(ashr_8_exittail) 1149 1150 #ifdef USE_AS_STRNCMP 1151 cmp $8, %ebp 1152 jbe L(ashr_8_exittail) 1153 #endif 1154 pxor %xmm0, %xmm0 1155 pxor %xmm0, %xmm0 1156 sub $0x1000, %edi 1157 jmp L(gobble_ashr_8) 1158 1159 .p2align 4 1160 L(ashr_8_exittail): 1161 movdqa (%eax, %ecx), %xmm1 1162 psrldq $8, %xmm0 1163 psrldq $8, %xmm3 1164 jmp L(aftertail) 1165 1166 /* 1167 * The following cases will be handled by ashr_9 1168 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1169 * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9 1170 */ 1171 .p2align 4 1172 L(ashr_9): 1173 mov $0xffff, %esi 1174 pxor %xmm0, %xmm0 1175 movdqa (%edx), %xmm2 1176 movdqa (%eax), %xmm1 1177 pcmpeqb %xmm1, %xmm0 1178 pslldq $7, %xmm2 1179 pcmpeqb %xmm1, %xmm2 1180 psubb %xmm0, %xmm2 1181 pmovmskb %xmm2, %edi 1182 shr %cl, %esi 1183 shr %cl, %edi 1184 sub %edi, %esi 1185 lea -7(%ecx), %edi 1186 jnz L(less32bytes) 1187 1188 UPDATE_STRNCMP_COUNTER 1189 1190 movdqa (%edx), %xmm3 1191 pxor %xmm0, %xmm0 1192 mov $16, %ecx 1193 or $9, %ebx 1194 lea 9(%edx), %edi 1195 and $0xfff, %edi 1196 sub $0x1000, %edi 1197 1198 .p2align 4 1199 L(loop_ashr_9): 1200 add $16, %edi 1201 jg L(nibble_ashr_9) 1202 1203 L(gobble_ashr_9): 1204 movdqa (%eax, %ecx), %xmm1 1205 movdqa (%edx, %ecx), %xmm2 1206 movdqa %xmm2, %xmm4 1207 1208 palignr $9, %xmm3, %xmm2 1209 1210 pcmpeqb %xmm1, %xmm0 1211 pcmpeqb %xmm2, %xmm1 1212 psubb %xmm0, %xmm1 1213 pmovmskb %xmm1, %esi 1214 sub $0xffff, %esi 1215 jnz L(exit) 1216 1217 #ifdef USE_AS_STRNCMP 1218 cmp $16, %ebp 1219 lea -16(%ebp), %ebp 1220 jbe L(more8byteseq) 1221 #endif 1222 add $16, %ecx 1223 movdqa %xmm4, %xmm3 1224 1225 add $16, %edi 1226 jg L(nibble_ashr_9) 1227 1228 movdqa (%eax, %ecx), %xmm1 1229 movdqa (%edx, %ecx), %xmm2 1230 movdqa %xmm2, %xmm4 1231 1232 palignr $9, %xmm3, %xmm2 1233 1234 pcmpeqb %xmm1, %xmm0 1235 pcmpeqb %xmm2, %xmm1 1236 psubb %xmm0, %xmm1 1237 pmovmskb %xmm1, %esi 1238 sub $0xffff, %esi 1239 jnz L(exit) 1240 1241 #ifdef USE_AS_STRNCMP 1242 cmp $16, %ebp 1243 lea -16(%ebp), %ebp 1244 jbe L(more8byteseq) 1245 #endif 1246 add $16, %ecx 1247 movdqa %xmm4, %xmm3 1248 jmp L(loop_ashr_9) 1249 1250 .p2align 4 1251 L(nibble_ashr_9): 1252 pcmpeqb %xmm3, %xmm0 1253 pmovmskb %xmm0, %esi 1254 test $0xfe00, %esi 1255 jnz L(ashr_9_exittail) 1256 1257 #ifdef USE_AS_STRNCMP 1258 cmp $7, %ebp 1259 jbe L(ashr_9_exittail) 1260 #endif 1261 pxor %xmm0, %xmm0 1262 sub $0x1000, %edi 1263 jmp L(gobble_ashr_9) 1264 1265 .p2align 4 1266 L(ashr_9_exittail): 1267 movdqa (%eax, %ecx), %xmm1 1268 psrldq $9, %xmm0 1269 psrldq $9, %xmm3 1270 jmp L(aftertail) 1271 1272 /* 1273 * The following cases will be handled by ashr_10 1274 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1275 * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10 1276 */ 1277 .p2align 4 1278 L(ashr_10): 1279 mov $0xffff, %esi 1280 pxor %xmm0, %xmm0 1281 movdqa (%edx), %xmm2 1282 movdqa (%eax), %xmm1 1283 pcmpeqb %xmm1, %xmm0 1284 pslldq $6, %xmm2 1285 pcmpeqb %xmm1, %xmm2 1286 psubb %xmm0, %xmm2 1287 pmovmskb %xmm2, %edi 1288 shr %cl, %esi 1289 shr %cl, %edi 1290 sub %edi, %esi 1291 lea -6(%ecx), %edi 1292 jnz L(less32bytes) 1293 1294 UPDATE_STRNCMP_COUNTER 1295 1296 movdqa (%edx), %xmm3 1297 pxor %xmm0, %xmm0 1298 mov $16, %ecx 1299 or $10, %ebx 1300 lea 10(%edx), %edi 1301 and $0xfff, %edi 1302 sub $0x1000, %edi 1303 1304 .p2align 4 1305 L(loop_ashr_10): 1306 add $16, %edi 1307 jg L(nibble_ashr_10) 1308 1309 L(gobble_ashr_10): 1310 movdqa (%eax, %ecx), %xmm1 1311 movdqa (%edx, %ecx), %xmm2 1312 movdqa %xmm2, %xmm4 1313 1314 palignr $10, %xmm3, %xmm2 1315 1316 pcmpeqb %xmm1, %xmm0 1317 pcmpeqb %xmm2, %xmm1 1318 psubb %xmm0, %xmm1 1319 pmovmskb %xmm1, %esi 1320 sub $0xffff, %esi 1321 jnz L(exit) 1322 1323 #ifdef USE_AS_STRNCMP 1324 cmp $16, %ebp 1325 lea -16(%ebp), %ebp 1326 jbe L(more8byteseq) 1327 #endif 1328 add $16, %ecx 1329 movdqa %xmm4, %xmm3 1330 1331 add $16, %edi 1332 jg L(nibble_ashr_10) 1333 1334 movdqa (%eax, %ecx), %xmm1 1335 movdqa (%edx, %ecx), %xmm2 1336 movdqa %xmm2, %xmm4 1337 1338 palignr $10, %xmm3, %xmm2 1339 1340 pcmpeqb %xmm1, %xmm0 1341 pcmpeqb %xmm2, %xmm1 1342 psubb %xmm0, %xmm1 1343 pmovmskb %xmm1, %esi 1344 sub $0xffff, %esi 1345 jnz L(exit) 1346 1347 #ifdef USE_AS_STRNCMP 1348 cmp $16, %ebp 1349 lea -16(%ebp), %ebp 1350 jbe L(more8byteseq) 1351 #endif 1352 add $16, %ecx 1353 movdqa %xmm4, %xmm3 1354 jmp L(loop_ashr_10) 1355 1356 .p2align 4 1357 L(nibble_ashr_10): 1358 pcmpeqb %xmm3, %xmm0 1359 pmovmskb %xmm0, %esi 1360 test $0xfc00, %esi 1361 jnz L(ashr_10_exittail) 1362 1363 #ifdef USE_AS_STRNCMP 1364 cmp $6, %ebp 1365 jbe L(ashr_10_exittail) 1366 #endif 1367 pxor %xmm0, %xmm0 1368 sub $0x1000, %edi 1369 jmp L(gobble_ashr_10) 1370 1371 .p2align 4 1372 L(ashr_10_exittail): 1373 movdqa (%eax, %ecx), %xmm1 1374 psrldq $10, %xmm0 1375 psrldq $10, %xmm3 1376 jmp L(aftertail) 1377 1378 /* 1379 * The following cases will be handled by ashr_11 1380 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1381 * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11 1382 */ 1383 .p2align 4 1384 L(ashr_11): 1385 mov $0xffff, %esi 1386 pxor %xmm0, %xmm0 1387 movdqa (%edx), %xmm2 1388 movdqa (%eax), %xmm1 1389 pcmpeqb %xmm1, %xmm0 1390 pslldq $5, %xmm2 1391 pcmpeqb %xmm1, %xmm2 1392 psubb %xmm0, %xmm2 1393 pmovmskb %xmm2, %edi 1394 shr %cl, %esi 1395 shr %cl, %edi 1396 sub %edi, %esi 1397 lea -5(%ecx), %edi 1398 jnz L(less32bytes) 1399 1400 UPDATE_STRNCMP_COUNTER 1401 1402 movdqa (%edx), %xmm3 1403 pxor %xmm0, %xmm0 1404 mov $16, %ecx 1405 or $11, %ebx 1406 lea 11(%edx), %edi 1407 and $0xfff, %edi 1408 sub $0x1000, %edi 1409 1410 .p2align 4 1411 L(loop_ashr_11): 1412 add $16, %edi 1413 jg L(nibble_ashr_11) 1414 1415 L(gobble_ashr_11): 1416 movdqa (%eax, %ecx), %xmm1 1417 movdqa (%edx, %ecx), %xmm2 1418 movdqa %xmm2, %xmm4 1419 1420 palignr $11, %xmm3, %xmm2 1421 1422 pcmpeqb %xmm1, %xmm0 1423 pcmpeqb %xmm2, %xmm1 1424 psubb %xmm0, %xmm1 1425 pmovmskb %xmm1, %esi 1426 sub $0xffff, %esi 1427 jnz L(exit) 1428 1429 #ifdef USE_AS_STRNCMP 1430 cmp $16, %ebp 1431 lea -16(%ebp), %ebp 1432 jbe L(more8byteseq) 1433 #endif 1434 add $16, %ecx 1435 movdqa %xmm4, %xmm3 1436 1437 add $16, %edi 1438 jg L(nibble_ashr_11) 1439 1440 movdqa (%eax, %ecx), %xmm1 1441 movdqa (%edx, %ecx), %xmm2 1442 movdqa %xmm2, %xmm4 1443 1444 palignr $11, %xmm3, %xmm2 1445 1446 pcmpeqb %xmm1, %xmm0 1447 pcmpeqb %xmm2, %xmm1 1448 psubb %xmm0, %xmm1 1449 pmovmskb %xmm1, %esi 1450 sub $0xffff, %esi 1451 jnz L(exit) 1452 1453 #ifdef USE_AS_STRNCMP 1454 cmp $16, %ebp 1455 lea -16(%ebp), %ebp 1456 jbe L(more8byteseq) 1457 #endif 1458 add $16, %ecx 1459 movdqa %xmm4, %xmm3 1460 jmp L(loop_ashr_11) 1461 1462 .p2align 4 1463 L(nibble_ashr_11): 1464 pcmpeqb %xmm3, %xmm0 1465 pmovmskb %xmm0, %esi 1466 test $0xf800, %esi 1467 jnz L(ashr_11_exittail) 1468 1469 #ifdef USE_AS_STRNCMP 1470 cmp $5, %ebp 1471 jbe L(ashr_11_exittail) 1472 #endif 1473 pxor %xmm0, %xmm0 1474 sub $0x1000, %edi 1475 jmp L(gobble_ashr_11) 1476 1477 .p2align 4 1478 L(ashr_11_exittail): 1479 movdqa (%eax, %ecx), %xmm1 1480 psrldq $11, %xmm0 1481 psrldq $11, %xmm3 1482 jmp L(aftertail) 1483 1484 /* 1485 * The following cases will be handled by ashr_12 1486 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1487 * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12 1488 */ 1489 .p2align 4 1490 L(ashr_12): 1491 mov $0xffff, %esi 1492 pxor %xmm0, %xmm0 1493 movdqa (%edx), %xmm2 1494 movdqa (%eax), %xmm1 1495 pcmpeqb %xmm1, %xmm0 1496 pslldq $4, %xmm2 1497 pcmpeqb %xmm1, %xmm2 1498 psubb %xmm0, %xmm2 1499 pmovmskb %xmm2, %edi 1500 shr %cl, %esi 1501 shr %cl, %edi 1502 sub %edi, %esi 1503 lea -4(%ecx), %edi 1504 jnz L(less32bytes) 1505 1506 UPDATE_STRNCMP_COUNTER 1507 1508 movdqa (%edx), %xmm3 1509 pxor %xmm0, %xmm0 1510 mov $16, %ecx 1511 or $12, %ebx 1512 lea 12(%edx), %edi 1513 and $0xfff, %edi 1514 sub $0x1000, %edi 1515 1516 .p2align 4 1517 L(loop_ashr_12): 1518 add $16, %edi 1519 jg L(nibble_ashr_12) 1520 1521 L(gobble_ashr_12): 1522 movdqa (%eax, %ecx), %xmm1 1523 movdqa (%edx, %ecx), %xmm2 1524 movdqa %xmm2, %xmm4 1525 1526 palignr $12, %xmm3, %xmm2 1527 1528 pcmpeqb %xmm1, %xmm0 1529 pcmpeqb %xmm2, %xmm1 1530 psubb %xmm0, %xmm1 1531 pmovmskb %xmm1, %esi 1532 sub $0xffff, %esi 1533 jnz L(exit) 1534 1535 #ifdef USE_AS_STRNCMP 1536 cmp $16, %ebp 1537 lea -16(%ebp), %ebp 1538 jbe L(more8byteseq) 1539 #endif 1540 1541 add $16, %ecx 1542 movdqa %xmm4, %xmm3 1543 1544 add $16, %edi 1545 jg L(nibble_ashr_12) 1546 1547 movdqa (%eax, %ecx), %xmm1 1548 movdqa (%edx, %ecx), %xmm2 1549 movdqa %xmm2, %xmm4 1550 1551 palignr $12, %xmm3, %xmm2 1552 1553 pcmpeqb %xmm1, %xmm0 1554 pcmpeqb %xmm2, %xmm1 1555 psubb %xmm0, %xmm1 1556 pmovmskb %xmm1, %esi 1557 sub $0xffff, %esi 1558 jnz L(exit) 1559 1560 #ifdef USE_AS_STRNCMP 1561 cmp $16, %ebp 1562 lea -16(%ebp), %ebp 1563 jbe L(more8byteseq) 1564 #endif 1565 add $16, %ecx 1566 movdqa %xmm4, %xmm3 1567 jmp L(loop_ashr_12) 1568 1569 .p2align 4 1570 L(nibble_ashr_12): 1571 pcmpeqb %xmm3, %xmm0 1572 pmovmskb %xmm0, %esi 1573 test $0xf000, %esi 1574 jnz L(ashr_12_exittail) 1575 1576 #ifdef USE_AS_STRNCMP 1577 cmp $4, %ebp 1578 jbe L(ashr_12_exittail) 1579 #endif 1580 pxor %xmm0, %xmm0 1581 sub $0x1000, %edi 1582 jmp L(gobble_ashr_12) 1583 1584 .p2align 4 1585 L(ashr_12_exittail): 1586 movdqa (%eax, %ecx), %xmm1 1587 psrldq $12, %xmm0 1588 psrldq $12, %xmm3 1589 jmp L(aftertail) 1590 1591 /* 1592 * The following cases will be handled by ashr_13 1593 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1594 * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13 1595 */ 1596 .p2align 4 1597 L(ashr_13): 1598 mov $0xffff, %esi 1599 pxor %xmm0, %xmm0 1600 movdqa (%edx), %xmm2 1601 movdqa (%eax), %xmm1 1602 pcmpeqb %xmm1, %xmm0 1603 pslldq $3, %xmm2 1604 pcmpeqb %xmm1, %xmm2 1605 psubb %xmm0, %xmm2 1606 pmovmskb %xmm2, %edi 1607 shr %cl, %esi 1608 shr %cl, %edi 1609 sub %edi, %esi 1610 lea -3(%ecx), %edi 1611 jnz L(less32bytes) 1612 1613 UPDATE_STRNCMP_COUNTER 1614 1615 movdqa (%edx), %xmm3 1616 pxor %xmm0, %xmm0 1617 mov $16, %ecx 1618 or $13, %ebx 1619 lea 13(%edx), %edi 1620 and $0xfff, %edi 1621 sub $0x1000, %edi 1622 1623 .p2align 4 1624 L(loop_ashr_13): 1625 add $16, %edi 1626 jg L(nibble_ashr_13) 1627 1628 L(gobble_ashr_13): 1629 movdqa (%eax, %ecx), %xmm1 1630 movdqa (%edx, %ecx), %xmm2 1631 movdqa %xmm2, %xmm4 1632 1633 palignr $13, %xmm3, %xmm2 1634 1635 pcmpeqb %xmm1, %xmm0 1636 pcmpeqb %xmm2, %xmm1 1637 psubb %xmm0, %xmm1 1638 pmovmskb %xmm1, %esi 1639 sub $0xffff, %esi 1640 jnz L(exit) 1641 1642 #ifdef USE_AS_STRNCMP 1643 cmp $16, %ebp 1644 lea -16(%ebp), %ebp 1645 jbe L(more8byteseq) 1646 #endif 1647 add $16, %ecx 1648 movdqa %xmm4, %xmm3 1649 1650 add $16, %edi 1651 jg L(nibble_ashr_13) 1652 1653 movdqa (%eax, %ecx), %xmm1 1654 movdqa (%edx, %ecx), %xmm2 1655 movdqa %xmm2, %xmm4 1656 1657 palignr $13, %xmm3, %xmm2 1658 1659 pcmpeqb %xmm1, %xmm0 1660 pcmpeqb %xmm2, %xmm1 1661 psubb %xmm0, %xmm1 1662 pmovmskb %xmm1, %esi 1663 sub $0xffff, %esi 1664 jnz L(exit) 1665 1666 #ifdef USE_AS_STRNCMP 1667 cmp $16, %ebp 1668 lea -16(%ebp), %ebp 1669 jbe L(more8byteseq) 1670 #endif 1671 add $16, %ecx 1672 movdqa %xmm4, %xmm3 1673 jmp L(loop_ashr_13) 1674 1675 .p2align 4 1676 L(nibble_ashr_13): 1677 pcmpeqb %xmm3, %xmm0 1678 pmovmskb %xmm0, %esi 1679 test $0xe000, %esi 1680 jnz L(ashr_13_exittail) 1681 1682 #ifdef USE_AS_STRNCMP 1683 cmp $3, %ebp 1684 jbe L(ashr_13_exittail) 1685 #endif 1686 pxor %xmm0, %xmm0 1687 sub $0x1000, %edi 1688 jmp L(gobble_ashr_13) 1689 1690 .p2align 4 1691 L(ashr_13_exittail): 1692 movdqa (%eax, %ecx), %xmm1 1693 psrldq $13, %xmm0 1694 psrldq $13, %xmm3 1695 jmp L(aftertail) 1696 1697 /* 1698 * The following cases will be handled by ashr_14 1699 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1700 * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14 1701 */ 1702 .p2align 4 1703 L(ashr_14): 1704 mov $0xffff, %esi 1705 pxor %xmm0, %xmm0 1706 movdqa (%edx), %xmm2 1707 movdqa (%eax), %xmm1 1708 pcmpeqb %xmm1, %xmm0 1709 pslldq $2, %xmm2 1710 pcmpeqb %xmm1, %xmm2 1711 psubb %xmm0, %xmm2 1712 pmovmskb %xmm2, %edi 1713 shr %cl, %esi 1714 shr %cl, %edi 1715 sub %edi, %esi 1716 lea -2(%ecx), %edi 1717 jnz L(less32bytes) 1718 1719 UPDATE_STRNCMP_COUNTER 1720 1721 movdqa (%edx), %xmm3 1722 pxor %xmm0, %xmm0 1723 mov $16, %ecx 1724 or $14, %ebx 1725 lea 14(%edx), %edi 1726 and $0xfff, %edi 1727 sub $0x1000, %edi 1728 1729 .p2align 4 1730 L(loop_ashr_14): 1731 add $16, %edi 1732 jg L(nibble_ashr_14) 1733 1734 L(gobble_ashr_14): 1735 movdqa (%eax, %ecx), %xmm1 1736 movdqa (%edx, %ecx), %xmm2 1737 movdqa %xmm2, %xmm4 1738 1739 palignr $14, %xmm3, %xmm2 1740 1741 pcmpeqb %xmm1, %xmm0 1742 pcmpeqb %xmm2, %xmm1 1743 psubb %xmm0, %xmm1 1744 pmovmskb %xmm1, %esi 1745 sub $0xffff, %esi 1746 jnz L(exit) 1747 1748 #ifdef USE_AS_STRNCMP 1749 cmp $16, %ebp 1750 lea -16(%ebp), %ebp 1751 jbe L(more8byteseq) 1752 #endif 1753 add $16, %ecx 1754 movdqa %xmm4, %xmm3 1755 1756 add $16, %edi 1757 jg L(nibble_ashr_14) 1758 1759 movdqa (%eax, %ecx), %xmm1 1760 movdqa (%edx, %ecx), %xmm2 1761 movdqa %xmm2, %xmm4 1762 1763 palignr $14, %xmm3, %xmm2 1764 1765 pcmpeqb %xmm1, %xmm0 1766 pcmpeqb %xmm2, %xmm1 1767 psubb %xmm0, %xmm1 1768 pmovmskb %xmm1, %esi 1769 sub $0xffff, %esi 1770 jnz L(exit) 1771 1772 #ifdef USE_AS_STRNCMP 1773 cmp $16, %ebp 1774 lea -16(%ebp), %ebp 1775 jbe L(more8byteseq) 1776 #endif 1777 add $16, %ecx 1778 movdqa %xmm4, %xmm3 1779 jmp L(loop_ashr_14) 1780 1781 .p2align 4 1782 L(nibble_ashr_14): 1783 pcmpeqb %xmm3, %xmm0 1784 pmovmskb %xmm0, %esi 1785 test $0xc000, %esi 1786 jnz L(ashr_14_exittail) 1787 1788 #ifdef USE_AS_STRNCMP 1789 cmp $2, %ebp 1790 jbe L(ashr_14_exittail) 1791 #endif 1792 pxor %xmm0, %xmm0 1793 sub $0x1000, %edi 1794 jmp L(gobble_ashr_14) 1795 1796 .p2align 4 1797 L(ashr_14_exittail): 1798 movdqa (%eax, %ecx), %xmm1 1799 psrldq $14, %xmm0 1800 psrldq $14, %xmm3 1801 jmp L(aftertail) 1802 1803 /* 1804 * The following cases will be handled by ashr_14 1805 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1806 * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15 1807 */ 1808 1809 .p2align 4 1810 L(ashr_15): 1811 mov $0xffff, %esi 1812 pxor %xmm0, %xmm0 1813 movdqa (%edx), %xmm2 1814 movdqa (%eax), %xmm1 1815 pcmpeqb %xmm1, %xmm0 1816 pslldq $1, %xmm2 1817 pcmpeqb %xmm1, %xmm2 1818 psubb %xmm0, %xmm2 1819 pmovmskb %xmm2, %edi 1820 shr %cl, %esi 1821 shr %cl, %edi 1822 sub %edi, %esi 1823 lea -1(%ecx), %edi 1824 jnz L(less32bytes) 1825 1826 UPDATE_STRNCMP_COUNTER 1827 1828 movdqa (%edx), %xmm3 1829 pxor %xmm0, %xmm0 1830 mov $16, %ecx 1831 or $15, %ebx 1832 lea 15(%edx), %edi 1833 and $0xfff, %edi 1834 sub $0x1000, %edi 1835 1836 .p2align 4 1837 L(loop_ashr_15): 1838 add $16, %edi 1839 jg L(nibble_ashr_15) 1840 1841 L(gobble_ashr_15): 1842 movdqa (%eax, %ecx), %xmm1 1843 movdqa (%edx, %ecx), %xmm2 1844 movdqa %xmm2, %xmm4 1845 1846 palignr $15, %xmm3, %xmm2 1847 1848 pcmpeqb %xmm1, %xmm0 1849 pcmpeqb %xmm2, %xmm1 1850 psubb %xmm0, %xmm1 1851 pmovmskb %xmm1, %esi 1852 sub $0xffff, %esi 1853 jnz L(exit) 1854 1855 #ifdef USE_AS_STRNCMP 1856 cmp $16, %ebp 1857 lea -16(%ebp), %ebp 1858 jbe L(more8byteseq) 1859 #endif 1860 add $16, %ecx 1861 movdqa %xmm4, %xmm3 1862 1863 add $16, %edi 1864 jg L(nibble_ashr_15) 1865 1866 movdqa (%eax, %ecx), %xmm1 1867 movdqa (%edx, %ecx), %xmm2 1868 movdqa %xmm2, %xmm4 1869 1870 palignr $15, %xmm3, %xmm2 1871 1872 pcmpeqb %xmm1, %xmm0 1873 pcmpeqb %xmm2, %xmm1 1874 psubb %xmm0, %xmm1 1875 pmovmskb %xmm1, %esi 1876 sub $0xffff, %esi 1877 jnz L(exit) 1878 1879 #ifdef USE_AS_STRNCMP 1880 cmp $16, %ebp 1881 lea -16(%ebp), %ebp 1882 jbe L(more8byteseq) 1883 #endif 1884 add $16, %ecx 1885 movdqa %xmm4, %xmm3 1886 jmp L(loop_ashr_15) 1887 1888 .p2align 4 1889 L(nibble_ashr_15): 1890 pcmpeqb %xmm3, %xmm0 1891 pmovmskb %xmm0, %esi 1892 test $0x8000, %esi 1893 jnz L(ashr_15_exittail) 1894 1895 #ifdef USE_AS_STRNCMP 1896 cmp $1, %ebp 1897 jbe L(ashr_15_exittail) 1898 #endif 1899 pxor %xmm0, %xmm0 1900 sub $0x1000, %edi 1901 jmp L(gobble_ashr_15) 1902 1903 .p2align 4 1904 L(ashr_15_exittail): 1905 movdqa (%eax, %ecx), %xmm1 1906 psrldq $15, %xmm0 1907 psrldq $15, %xmm3 1908 jmp L(aftertail) 1909 1910 .p2align 4 1911 L(aftertail): 1912 pcmpeqb %xmm3, %xmm1 1913 psubb %xmm0, %xmm1 1914 pmovmskb %xmm1, %esi 1915 not %esi 1916 L(exit): 1917 mov %ebx, %edi 1918 and $0x1f, %edi 1919 lea -16(%edi, %ecx), %edi 1920 L(less32bytes): 1921 add %edi, %edx 1922 add %ecx, %eax 1923 test $0x20, %ebx 1924 jz L(ret2) 1925 xchg %eax, %edx 1926 1927 .p2align 4 1928 L(ret2): 1929 mov %esi, %ecx 1930 POP (%esi) 1931 POP (%edi) 1932 POP (%ebx) 1933 L(less16bytes): 1934 test %cl, %cl 1935 jz L(2next_8_bytes) 1936 1937 test $0x01, %cl 1938 jnz L(Byte0) 1939 1940 test $0x02, %cl 1941 jnz L(Byte1) 1942 1943 test $0x04, %cl 1944 jnz L(Byte2) 1945 1946 test $0x08, %cl 1947 jnz L(Byte3) 1948 1949 test $0x10, %cl 1950 jnz L(Byte4) 1951 1952 test $0x20, %cl 1953 jnz L(Byte5) 1954 1955 test $0x40, %cl 1956 jnz L(Byte6) 1957 #ifdef USE_AS_STRNCMP 1958 cmp $7, %ebp 1959 jbe L(eq) 1960 #endif 1961 1962 movzx 7(%eax), %ecx 1963 movzx 7(%edx), %eax 1964 1965 sub %ecx, %eax 1966 RETURN 1967 1968 .p2align 4 1969 L(Byte0): 1970 #ifdef USE_AS_STRNCMP 1971 cmp $0, %ebp 1972 jbe L(eq) 1973 #endif 1974 movzx (%eax), %ecx 1975 movzx (%edx), %eax 1976 1977 sub %ecx, %eax 1978 RETURN 1979 1980 .p2align 4 1981 L(Byte1): 1982 #ifdef USE_AS_STRNCMP 1983 cmp $1, %ebp 1984 jbe L(eq) 1985 #endif 1986 movzx 1(%eax), %ecx 1987 movzx 1(%edx), %eax 1988 1989 sub %ecx, %eax 1990 RETURN 1991 1992 .p2align 4 1993 L(Byte2): 1994 #ifdef USE_AS_STRNCMP 1995 cmp $2, %ebp 1996 jbe L(eq) 1997 #endif 1998 movzx 2(%eax), %ecx 1999 movzx 2(%edx), %eax 2000 2001 sub %ecx, %eax 2002 RETURN 2003 2004 .p2align 4 2005 L(Byte3): 2006 #ifdef USE_AS_STRNCMP 2007 cmp $3, %ebp 2008 jbe L(eq) 2009 #endif 2010 movzx 3(%eax), %ecx 2011 movzx 3(%edx), %eax 2012 2013 sub %ecx, %eax 2014 RETURN 2015 2016 .p2align 4 2017 L(Byte4): 2018 #ifdef USE_AS_STRNCMP 2019 cmp $4, %ebp 2020 jbe L(eq) 2021 #endif 2022 movzx 4(%eax), %ecx 2023 movzx 4(%edx), %eax 2024 2025 sub %ecx, %eax 2026 RETURN 2027 2028 .p2align 4 2029 L(Byte5): 2030 #ifdef USE_AS_STRNCMP 2031 cmp $5, %ebp 2032 jbe L(eq) 2033 #endif 2034 movzx 5(%eax), %ecx 2035 movzx 5(%edx), %eax 2036 2037 sub %ecx, %eax 2038 RETURN 2039 2040 .p2align 4 2041 L(Byte6): 2042 #ifdef USE_AS_STRNCMP 2043 cmp $6, %ebp 2044 jbe L(eq) 2045 #endif 2046 movzx 6(%eax), %ecx 2047 movzx 6(%edx), %eax 2048 2049 sub %ecx, %eax 2050 RETURN 2051 2052 .p2align 4 2053 L(2next_8_bytes): 2054 add $8, %eax 2055 add $8, %edx 2056 #ifdef USE_AS_STRNCMP 2057 cmp $8, %ebp 2058 lea -8(%ebp), %ebp 2059 jbe L(eq) 2060 #endif 2061 2062 test $0x01, %ch 2063 jnz L(Byte0) 2064 2065 test $0x02, %ch 2066 jnz L(Byte1) 2067 2068 test $0x04, %ch 2069 jnz L(Byte2) 2070 2071 test $0x08, %ch 2072 jnz L(Byte3) 2073 2074 test $0x10, %ch 2075 jnz L(Byte4) 2076 2077 test $0x20, %ch 2078 jnz L(Byte5) 2079 2080 test $0x40, %ch 2081 jnz L(Byte6) 2082 2083 #ifdef USE_AS_STRNCMP 2084 cmp $7, %ebp 2085 jbe L(eq) 2086 #endif 2087 movzx 7(%eax), %ecx 2088 movzx 7(%edx), %eax 2089 2090 sub %ecx, %eax 2091 RETURN 2092 2093 .p2align 4 2094 L(neq): 2095 mov $1, %eax 2096 ja L(neq_bigger) 2097 neg %eax 2098 L(neq_bigger): 2099 RETURN 2100 2101 #ifdef USE_AS_STRNCMP 2102 cfi_restore_state 2103 .p2align 4 2104 L(more8byteseq): 2105 POP (%esi) 2106 POP (%edi) 2107 POP (%ebx) 2108 #endif 2109 2110 L(eq): 2111 2112 #ifdef USE_AS_STRNCMP 2113 POP (%ebp) 2114 #endif 2115 xorl %eax, %eax 2116 ret 2117 2118 #ifdef USE_AS_STRNCMP 2119 CFI_PUSH (%ebp) 2120 2121 .p2align 4 2122 L(less16bytes_sncmp): 2123 test %ebp, %ebp 2124 jz L(eq) 2125 2126 movzbl (%eax), %ecx 2127 cmpb %cl, (%edx) 2128 jne L(neq) 2129 test %cl, %cl 2130 je L(eq) 2131 2132 cmp $1, %ebp 2133 je L(eq) 2134 2135 movzbl 1(%eax), %ecx 2136 cmpb %cl, 1(%edx) 2137 jne L(neq) 2138 test %cl, %cl 2139 je L(eq) 2140 2141 cmp $2, %ebp 2142 je L(eq) 2143 2144 movzbl 2(%eax), %ecx 2145 cmpb %cl, 2(%edx) 2146 jne L(neq) 2147 test %cl, %cl 2148 je L(eq) 2149 2150 cmp $3, %ebp 2151 je L(eq) 2152 2153 movzbl 3(%eax), %ecx 2154 cmpb %cl, 3(%edx) 2155 jne L(neq) 2156 test %cl, %cl 2157 je L(eq) 2158 2159 cmp $4, %ebp 2160 je L(eq) 2161 2162 movzbl 4(%eax), %ecx 2163 cmpb %cl, 4(%edx) 2164 jne L(neq) 2165 test %cl, %cl 2166 je L(eq) 2167 2168 cmp $5, %ebp 2169 je L(eq) 2170 2171 movzbl 5(%eax), %ecx 2172 cmpb %cl, 5(%edx) 2173 jne L(neq) 2174 test %cl, %cl 2175 je L(eq) 2176 2177 cmp $6, %ebp 2178 je L(eq) 2179 2180 movzbl 6(%eax), %ecx 2181 cmpb %cl, 6(%edx) 2182 jne L(neq) 2183 test %cl, %cl 2184 je L(eq) 2185 2186 cmp $7, %ebp 2187 je L(eq) 2188 2189 movzbl 7(%eax), %ecx 2190 cmpb %cl, 7(%edx) 2191 jne L(neq) 2192 test %cl, %cl 2193 je L(eq) 2194 2195 2196 cmp $8, %ebp 2197 je L(eq) 2198 2199 movzbl 8(%eax), %ecx 2200 cmpb %cl, 8(%edx) 2201 jne L(neq) 2202 test %cl, %cl 2203 je L(eq) 2204 2205 cmp $9, %ebp 2206 je L(eq) 2207 2208 movzbl 9(%eax), %ecx 2209 cmpb %cl, 9(%edx) 2210 jne L(neq) 2211 test %cl, %cl 2212 je L(eq) 2213 2214 cmp $10, %ebp 2215 je L(eq) 2216 2217 movzbl 10(%eax), %ecx 2218 cmpb %cl, 10(%edx) 2219 jne L(neq) 2220 test %cl, %cl 2221 je L(eq) 2222 2223 cmp $11, %ebp 2224 je L(eq) 2225 2226 movzbl 11(%eax), %ecx 2227 cmpb %cl, 11(%edx) 2228 jne L(neq) 2229 test %cl, %cl 2230 je L(eq) 2231 2232 2233 cmp $12, %ebp 2234 je L(eq) 2235 2236 movzbl 12(%eax), %ecx 2237 cmpb %cl, 12(%edx) 2238 jne L(neq) 2239 test %cl, %cl 2240 je L(eq) 2241 2242 cmp $13, %ebp 2243 je L(eq) 2244 2245 movzbl 13(%eax), %ecx 2246 cmpb %cl, 13(%edx) 2247 jne L(neq) 2248 test %cl, %cl 2249 je L(eq) 2250 2251 cmp $14, %ebp 2252 je L(eq) 2253 2254 movzbl 14(%eax), %ecx 2255 cmpb %cl, 14(%edx) 2256 jne L(neq) 2257 test %cl, %cl 2258 je L(eq) 2259 2260 cmp $15, %ebp 2261 je L(eq) 2262 2263 movzbl 15(%eax), %ecx 2264 cmpb %cl, 15(%edx) 2265 jne L(neq) 2266 test %cl, %cl 2267 je L(eq) 2268 2269 POP (%ebp) 2270 xor %eax, %eax 2271 ret 2272 #endif 2273 2274 END (ssse3_strcmp_latest) 2275