1 /* 2 Copyright (c) 2011, Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef L 32 # define L(label) .L##label 33 #endif 34 35 #ifndef cfi_startproc 36 # define cfi_startproc .cfi_startproc 37 #endif 38 39 #ifndef cfi_endproc 40 # define cfi_endproc .cfi_endproc 41 #endif 42 43 #ifndef cfi_rel_offset 44 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45 #endif 46 47 #ifndef cfi_restore 48 # define cfi_restore(reg) .cfi_restore reg 49 #endif 50 51 #ifndef cfi_adjust_cfa_offset 52 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53 #endif 54 55 #ifndef ENTRY 56 # define ENTRY(name) \ 57 .type name, @function; \ 58 .globl name; \ 59 .p2align 4; \ 60 name: \ 61 cfi_startproc 62 #endif 63 64 #ifndef END 65 # define END(name) \ 66 cfi_endproc; \ 67 .size name, .-name 68 #endif 69 70 #define CFI_PUSH(REG) \ 71 cfi_adjust_cfa_offset (4); \ 72 cfi_rel_offset (REG, 0) 73 74 #define CFI_POP(REG) \ 75 cfi_adjust_cfa_offset (-4); \ 76 cfi_restore (REG) 77 78 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 79 #define POP(REG) popl REG; CFI_POP (REG) 80 81 #define PARMS 8 82 #define ENTRANCE PUSH(%edi); 83 #define RETURN POP (%edi); ret; CFI_PUSH (%edi); 84 85 #define STR1 PARMS 86 #define STR2 STR1+4 87 88 .text 89 ENTRY (strrchr) 90 91 ENTRANCE 92 mov STR1(%esp), %ecx 93 movd STR2(%esp), %xmm1 94 95 pxor %xmm2, %xmm2 96 mov %ecx, %edi 97 punpcklbw %xmm1, %xmm1 98 punpcklbw %xmm1, %xmm1 99 /* ECX has OFFSET. */ 100 and $63, %ecx 101 pshufd $0, %xmm1, %xmm1 102 cmp $48, %ecx 103 ja L(crosscache) 104 105 /* unaligned string. */ 106 movdqu (%edi), %xmm0 107 pcmpeqb %xmm0, %xmm2 108 pcmpeqb %xmm1, %xmm0 109 /* Find where NULL is. */ 110 pmovmskb %xmm2, %ecx 111 /* Check if there is a match. */ 112 pmovmskb %xmm0, %eax 113 add $16, %edi 114 115 test %eax, %eax 116 jnz L(unaligned_match1) 117 118 test %ecx, %ecx 119 jnz L(return_null) 120 121 and $-16, %edi 122 123 PUSH (%esi) 124 PUSH (%ebx) 125 126 xor %ebx, %ebx 127 jmp L(loop) 128 129 CFI_POP (%esi) 130 CFI_POP (%ebx) 131 132 .p2align 4 133 L(unaligned_match1): 134 test %ecx, %ecx 135 jnz L(prolog_find_zero_1) 136 137 PUSH (%esi) 138 PUSH (%ebx) 139 140 mov %eax, %ebx 141 mov %edi, %esi 142 and $-16, %edi 143 jmp L(loop) 144 145 CFI_POP (%esi) 146 CFI_POP (%ebx) 147 148 .p2align 4 149 L(crosscache): 150 /* Hancle unaligned string. */ 151 and $15, %ecx 152 and $-16, %edi 153 pxor %xmm3, %xmm3 154 movdqa (%edi), %xmm0 155 pcmpeqb %xmm0, %xmm3 156 pcmpeqb %xmm1, %xmm0 157 /* Find where NULL is. */ 158 pmovmskb %xmm3, %edx 159 /* Check if there is a match. */ 160 pmovmskb %xmm0, %eax 161 /* Remove the leading bytes. */ 162 shr %cl, %edx 163 shr %cl, %eax 164 add $16, %edi 165 166 test %eax, %eax 167 jnz L(unaligned_match) 168 169 test %edx, %edx 170 jnz L(return_null) 171 172 PUSH (%esi) 173 PUSH (%ebx) 174 175 xor %ebx, %ebx 176 jmp L(loop) 177 178 CFI_POP (%esi) 179 CFI_POP (%ebx) 180 181 .p2align 4 182 L(unaligned_match): 183 test %edx, %edx 184 jnz L(prolog_find_zero) 185 186 PUSH (%esi) 187 PUSH (%ebx) 188 189 mov %eax, %ebx 190 lea (%edi, %ecx), %esi 191 192 /* Loop start on aligned string. */ 193 .p2align 4 194 L(loop): 195 movdqa (%edi), %xmm0 196 pcmpeqb %xmm0, %xmm2 197 add $16, %edi 198 pcmpeqb %xmm1, %xmm0 199 pmovmskb %xmm2, %ecx 200 pmovmskb %xmm0, %eax 201 or %eax, %ecx 202 jnz L(matches) 203 204 movdqa (%edi), %xmm0 205 pcmpeqb %xmm0, %xmm2 206 add $16, %edi 207 pcmpeqb %xmm1, %xmm0 208 pmovmskb %xmm2, %ecx 209 pmovmskb %xmm0, %eax 210 or %eax, %ecx 211 jnz L(matches) 212 213 movdqa (%edi), %xmm0 214 pcmpeqb %xmm0, %xmm2 215 add $16, %edi 216 pcmpeqb %xmm1, %xmm0 217 pmovmskb %xmm2, %ecx 218 pmovmskb %xmm0, %eax 219 or %eax, %ecx 220 jnz L(matches) 221 222 movdqa (%edi), %xmm0 223 pcmpeqb %xmm0, %xmm2 224 add $16, %edi 225 pcmpeqb %xmm1, %xmm0 226 pmovmskb %xmm2, %ecx 227 pmovmskb %xmm0, %eax 228 or %eax, %ecx 229 jz L(loop) 230 231 L(matches): 232 test %eax, %eax 233 jnz L(match) 234 L(return_value): 235 test %ebx, %ebx 236 jz L(return_null_1) 237 mov %ebx, %eax 238 mov %esi, %edi 239 240 POP (%ebx) 241 POP (%esi) 242 243 jmp L(match_case1) 244 245 CFI_PUSH (%ebx) 246 CFI_PUSH (%esi) 247 248 .p2align 4 249 L(return_null_1): 250 POP (%ebx) 251 POP (%esi) 252 253 xor %eax, %eax 254 RETURN 255 256 CFI_PUSH (%ebx) 257 CFI_PUSH (%esi) 258 259 .p2align 4 260 L(match): 261 pmovmskb %xmm2, %ecx 262 test %ecx, %ecx 263 jnz L(find_zero) 264 mov %eax, %ebx 265 mov %edi, %esi 266 jmp L(loop) 267 268 .p2align 4 269 L(find_zero): 270 test %cl, %cl 271 jz L(find_zero_high) 272 mov %cl, %dl 273 and $15, %dl 274 jz L(find_zero_8) 275 test $0x01, %cl 276 jnz L(FindZeroExit1) 277 test $0x02, %cl 278 jnz L(FindZeroExit2) 279 test $0x04, %cl 280 jnz L(FindZeroExit3) 281 and $1 << 4 - 1, %eax 282 jz L(return_value) 283 284 POP (%ebx) 285 POP (%esi) 286 jmp L(match_case1) 287 288 CFI_PUSH (%ebx) 289 CFI_PUSH (%esi) 290 291 .p2align 4 292 L(find_zero_8): 293 test $0x10, %cl 294 jnz L(FindZeroExit5) 295 test $0x20, %cl 296 jnz L(FindZeroExit6) 297 test $0x40, %cl 298 jnz L(FindZeroExit7) 299 and $1 << 8 - 1, %eax 300 jz L(return_value) 301 302 POP (%ebx) 303 POP (%esi) 304 jmp L(match_case1) 305 306 CFI_PUSH (%ebx) 307 CFI_PUSH (%esi) 308 309 .p2align 4 310 L(find_zero_high): 311 mov %ch, %dh 312 and $15, %dh 313 jz L(find_zero_high_8) 314 test $0x01, %ch 315 jnz L(FindZeroExit9) 316 test $0x02, %ch 317 jnz L(FindZeroExit10) 318 test $0x04, %ch 319 jnz L(FindZeroExit11) 320 and $1 << 12 - 1, %eax 321 jz L(return_value) 322 323 POP (%ebx) 324 POP (%esi) 325 jmp L(match_case1) 326 327 CFI_PUSH (%ebx) 328 CFI_PUSH (%esi) 329 330 .p2align 4 331 L(find_zero_high_8): 332 test $0x10, %ch 333 jnz L(FindZeroExit13) 334 test $0x20, %ch 335 jnz L(FindZeroExit14) 336 test $0x40, %ch 337 jnz L(FindZeroExit15) 338 and $1 << 16 - 1, %eax 339 jz L(return_value) 340 341 POP (%ebx) 342 POP (%esi) 343 jmp L(match_case1) 344 345 CFI_PUSH (%ebx) 346 CFI_PUSH (%esi) 347 348 .p2align 4 349 L(FindZeroExit1): 350 and $1, %eax 351 jz L(return_value) 352 353 POP (%ebx) 354 POP (%esi) 355 jmp L(match_case1) 356 357 CFI_PUSH (%ebx) 358 CFI_PUSH (%esi) 359 360 .p2align 4 361 L(FindZeroExit2): 362 and $1 << 2 - 1, %eax 363 jz L(return_value) 364 365 POP (%ebx) 366 POP (%esi) 367 jmp L(match_case1) 368 369 CFI_PUSH (%ebx) 370 CFI_PUSH (%esi) 371 372 .p2align 4 373 L(FindZeroExit3): 374 and $1 << 3 - 1, %eax 375 jz L(return_value) 376 377 POP (%ebx) 378 POP (%esi) 379 jmp L(match_case1) 380 381 CFI_PUSH (%ebx) 382 CFI_PUSH (%esi) 383 384 .p2align 4 385 L(FindZeroExit5): 386 and $1 << 5 - 1, %eax 387 jz L(return_value) 388 389 POP (%ebx) 390 POP (%esi) 391 jmp L(match_case1) 392 393 CFI_PUSH (%ebx) 394 CFI_PUSH (%esi) 395 396 .p2align 4 397 L(FindZeroExit6): 398 and $1 << 6 - 1, %eax 399 jz L(return_value) 400 401 POP (%ebx) 402 POP (%esi) 403 jmp L(match_case1) 404 405 CFI_PUSH (%ebx) 406 CFI_PUSH (%esi) 407 408 .p2align 4 409 L(FindZeroExit7): 410 and $1 << 7 - 1, %eax 411 jz L(return_value) 412 413 POP (%ebx) 414 POP (%esi) 415 jmp L(match_case1) 416 417 CFI_PUSH (%ebx) 418 CFI_PUSH (%esi) 419 420 .p2align 4 421 L(FindZeroExit9): 422 and $1 << 9 - 1, %eax 423 jz L(return_value) 424 425 POP (%ebx) 426 POP (%esi) 427 jmp L(match_case1) 428 429 CFI_PUSH (%ebx) 430 CFI_PUSH (%esi) 431 432 .p2align 4 433 L(FindZeroExit10): 434 and $1 << 10 - 1, %eax 435 jz L(return_value) 436 437 POP (%ebx) 438 POP (%esi) 439 jmp L(match_case1) 440 441 CFI_PUSH (%ebx) 442 CFI_PUSH (%esi) 443 444 .p2align 4 445 L(FindZeroExit11): 446 and $1 << 11 - 1, %eax 447 jz L(return_value) 448 449 POP (%ebx) 450 POP (%esi) 451 jmp L(match_case1) 452 453 CFI_PUSH (%ebx) 454 CFI_PUSH (%esi) 455 456 .p2align 4 457 L(FindZeroExit13): 458 and $1 << 13 - 1, %eax 459 jz L(return_value) 460 461 POP (%ebx) 462 POP (%esi) 463 jmp L(match_case1) 464 465 CFI_PUSH (%ebx) 466 CFI_PUSH (%esi) 467 468 .p2align 4 469 L(FindZeroExit14): 470 and $1 << 14 - 1, %eax 471 jz L(return_value) 472 473 POP (%ebx) 474 POP (%esi) 475 jmp L(match_case1) 476 477 CFI_PUSH (%ebx) 478 CFI_PUSH (%esi) 479 480 .p2align 4 481 L(FindZeroExit15): 482 and $1 << 15 - 1, %eax 483 jz L(return_value) 484 485 POP (%ebx) 486 POP (%esi) 487 488 .p2align 4 489 L(match_case1): 490 test %ah, %ah 491 jnz L(match_case1_high) 492 mov %al, %dl 493 and $15 << 4, %dl 494 jnz L(match_case1_8) 495 test $0x08, %al 496 jnz L(Exit4) 497 test $0x04, %al 498 jnz L(Exit3) 499 test $0x02, %al 500 jnz L(Exit2) 501 lea -16(%edi), %eax 502 RETURN 503 504 .p2align 4 505 L(match_case1_8): 506 test $0x80, %al 507 jnz L(Exit8) 508 test $0x40, %al 509 jnz L(Exit7) 510 test $0x20, %al 511 jnz L(Exit6) 512 lea -12(%edi), %eax 513 RETURN 514 515 .p2align 4 516 L(match_case1_high): 517 mov %ah, %dh 518 and $15 << 4, %dh 519 jnz L(match_case1_high_8) 520 test $0x08, %ah 521 jnz L(Exit12) 522 test $0x04, %ah 523 jnz L(Exit11) 524 test $0x02, %ah 525 jnz L(Exit10) 526 lea -8(%edi), %eax 527 RETURN 528 529 .p2align 4 530 L(match_case1_high_8): 531 test $0x80, %ah 532 jnz L(Exit16) 533 test $0x40, %ah 534 jnz L(Exit15) 535 test $0x20, %ah 536 jnz L(Exit14) 537 lea -4(%edi), %eax 538 RETURN 539 540 .p2align 4 541 L(Exit2): 542 lea -15(%edi), %eax 543 RETURN 544 545 .p2align 4 546 L(Exit3): 547 lea -14(%edi), %eax 548 RETURN 549 550 .p2align 4 551 L(Exit4): 552 lea -13(%edi), %eax 553 RETURN 554 555 .p2align 4 556 L(Exit6): 557 lea -11(%edi), %eax 558 RETURN 559 560 .p2align 4 561 L(Exit7): 562 lea -10(%edi), %eax 563 RETURN 564 565 .p2align 4 566 L(Exit8): 567 lea -9(%edi), %eax 568 RETURN 569 570 .p2align 4 571 L(Exit10): 572 lea -7(%edi), %eax 573 RETURN 574 575 .p2align 4 576 L(Exit11): 577 lea -6(%edi), %eax 578 RETURN 579 580 .p2align 4 581 L(Exit12): 582 lea -5(%edi), %eax 583 RETURN 584 585 .p2align 4 586 L(Exit14): 587 lea -3(%edi), %eax 588 RETURN 589 590 .p2align 4 591 L(Exit15): 592 lea -2(%edi), %eax 593 RETURN 594 595 .p2align 4 596 L(Exit16): 597 lea -1(%edi), %eax 598 RETURN 599 600 /* Return NULL. */ 601 .p2align 4 602 L(return_null): 603 xor %eax, %eax 604 RETURN 605 606 .p2align 4 607 L(prolog_find_zero): 608 add %ecx, %edi 609 mov %edx, %ecx 610 L(prolog_find_zero_1): 611 test %cl, %cl 612 jz L(prolog_find_zero_high) 613 mov %cl, %dl 614 and $15, %dl 615 jz L(prolog_find_zero_8) 616 test $0x01, %cl 617 jnz L(PrologFindZeroExit1) 618 test $0x02, %cl 619 jnz L(PrologFindZeroExit2) 620 test $0x04, %cl 621 jnz L(PrologFindZeroExit3) 622 and $1 << 4 - 1, %eax 623 jnz L(match_case1) 624 xor %eax, %eax 625 RETURN 626 627 .p2align 4 628 L(prolog_find_zero_8): 629 test $0x10, %cl 630 jnz L(PrologFindZeroExit5) 631 test $0x20, %cl 632 jnz L(PrologFindZeroExit6) 633 test $0x40, %cl 634 jnz L(PrologFindZeroExit7) 635 and $1 << 8 - 1, %eax 636 jnz L(match_case1) 637 xor %eax, %eax 638 RETURN 639 640 .p2align 4 641 L(prolog_find_zero_high): 642 mov %ch, %dh 643 and $15, %dh 644 jz L(prolog_find_zero_high_8) 645 test $0x01, %ch 646 jnz L(PrologFindZeroExit9) 647 test $0x02, %ch 648 jnz L(PrologFindZeroExit10) 649 test $0x04, %ch 650 jnz L(PrologFindZeroExit11) 651 and $1 << 12 - 1, %eax 652 jnz L(match_case1) 653 xor %eax, %eax 654 RETURN 655 656 .p2align 4 657 L(prolog_find_zero_high_8): 658 test $0x10, %ch 659 jnz L(PrologFindZeroExit13) 660 test $0x20, %ch 661 jnz L(PrologFindZeroExit14) 662 test $0x40, %ch 663 jnz L(PrologFindZeroExit15) 664 and $1 << 16 - 1, %eax 665 jnz L(match_case1) 666 xor %eax, %eax 667 RETURN 668 669 .p2align 4 670 L(PrologFindZeroExit1): 671 and $1, %eax 672 jnz L(match_case1) 673 xor %eax, %eax 674 RETURN 675 676 .p2align 4 677 L(PrologFindZeroExit2): 678 and $1 << 2 - 1, %eax 679 jnz L(match_case1) 680 xor %eax, %eax 681 RETURN 682 683 .p2align 4 684 L(PrologFindZeroExit3): 685 and $1 << 3 - 1, %eax 686 jnz L(match_case1) 687 xor %eax, %eax 688 RETURN 689 690 .p2align 4 691 L(PrologFindZeroExit5): 692 and $1 << 5 - 1, %eax 693 jnz L(match_case1) 694 xor %eax, %eax 695 RETURN 696 697 .p2align 4 698 L(PrologFindZeroExit6): 699 and $1 << 6 - 1, %eax 700 jnz L(match_case1) 701 xor %eax, %eax 702 RETURN 703 704 .p2align 4 705 L(PrologFindZeroExit7): 706 and $1 << 7 - 1, %eax 707 jnz L(match_case1) 708 xor %eax, %eax 709 RETURN 710 711 .p2align 4 712 L(PrologFindZeroExit9): 713 and $1 << 9 - 1, %eax 714 jnz L(match_case1) 715 xor %eax, %eax 716 RETURN 717 718 .p2align 4 719 L(PrologFindZeroExit10): 720 and $1 << 10 - 1, %eax 721 jnz L(match_case1) 722 xor %eax, %eax 723 RETURN 724 725 .p2align 4 726 L(PrologFindZeroExit11): 727 and $1 << 11 - 1, %eax 728 jnz L(match_case1) 729 xor %eax, %eax 730 RETURN 731 732 .p2align 4 733 L(PrologFindZeroExit13): 734 and $1 << 13 - 1, %eax 735 jnz L(match_case1) 736 xor %eax, %eax 737 RETURN 738 739 .p2align 4 740 L(PrologFindZeroExit14): 741 and $1 << 14 - 1, %eax 742 jnz L(match_case1) 743 xor %eax, %eax 744 RETURN 745 746 .p2align 4 747 L(PrologFindZeroExit15): 748 and $1 << 15 - 1, %eax 749 jnz L(match_case1) 750 xor %eax, %eax 751 RETURN 752 753 END (strrchr) 754