1 /* 2 Copyright (c) 2011, Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* Optimized strlcat with SSSE3 */ 32 33 #ifndef cfi_startproc 34 # define cfi_startproc .cfi_startproc 35 #endif 36 37 #ifndef cfi_endproc 38 # define cfi_endproc .cfi_endproc 39 #endif 40 41 #ifndef cfi_rel_offset 42 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 43 #endif 44 45 #ifndef cfi_restore 46 # define cfi_restore(reg) .cfi_restore reg 47 #endif 48 49 #ifndef cfi_adjust_cfa_offset 50 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 51 #endif 52 53 #ifndef ENTRY 54 # define ENTRY(name) \ 55 .type name, @function; \ 56 .globl name; \ 57 .p2align 4; \ 58 name: \ 59 cfi_startproc 60 #endif 61 62 #ifndef END 63 # define END(name) \ 64 cfi_endproc; \ 65 .size name, .-name 66 #endif 67 68 #define CFI_PUSH(REG) \ 69 cfi_adjust_cfa_offset (4); \ 70 cfi_rel_offset (REG, 0) 71 72 #define CFI_POP(REG) \ 73 cfi_adjust_cfa_offset (-4); \ 74 cfi_restore (REG) 75 76 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 77 #define POP(REG) popl REG; CFI_POP (REG) 78 #define L(label) .L##Prolog_##label 79 80 #define DST 4 81 #define SRC DST+8 82 #define LEN SRC+4 83 84 .text 85 ENTRY (strlcat_ssse3) 86 mov DST(%esp), %edx 87 PUSH (%ebx) 88 mov LEN(%esp), %ebx 89 sub $4, %ebx 90 jbe L(len_less4_prolog) 91 92 #define RETURN jmp L(StrcpyStep) 93 #define edi ebx 94 95 #define USE_AS_STRNLEN 96 #define USE_AS_STRCAT 97 #define USE_AS_STRLCAT 98 99 #include "sse2-strlen-atom.S" 100 101 .p2align 4 102 L(StrcpyStep): 103 104 #undef edi 105 #undef L 106 #define L(label) .L##label 107 #undef RETURN 108 #define RETURN POP (%ebx); ret; CFI_PUSH (%ebx); 109 #define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi) 110 111 movl SRC(%esp), %ecx 112 movl LEN(%esp), %ebx 113 114 cmp %eax, %ebx 115 je L(CalculateLengthOfSrcProlog) 116 sub %eax, %ebx 117 118 test %ebx, %ebx 119 jz L(CalculateLengthOfSrcProlog) 120 121 mov DST + 4(%esp), %edx 122 123 PUSH (%edi) 124 add %eax, %edx 125 mov %ecx, %edi 126 sub %eax, %edi 127 128 cmp $8, %ebx 129 jbe L(StrncpyExit8Bytes) 130 131 cmpb $0, (%ecx) 132 jz L(Exit1) 133 cmpb $0, 1(%ecx) 134 jz L(Exit2) 135 cmpb $0, 2(%ecx) 136 jz L(Exit3) 137 cmpb $0, 3(%ecx) 138 jz L(Exit4) 139 cmpb $0, 4(%ecx) 140 jz L(Exit5) 141 cmpb $0, 5(%ecx) 142 jz L(Exit6) 143 cmpb $0, 6(%ecx) 144 jz L(Exit7) 145 cmpb $0, 7(%ecx) 146 jz L(Exit8) 147 cmp $16, %ebx 148 jb L(StrncpyExit15Bytes) 149 cmpb $0, 8(%ecx) 150 jz L(Exit9) 151 cmpb $0, 9(%ecx) 152 jz L(Exit10) 153 cmpb $0, 10(%ecx) 154 jz L(Exit11) 155 cmpb $0, 11(%ecx) 156 jz L(Exit12) 157 cmpb $0, 12(%ecx) 158 jz L(Exit13) 159 cmpb $0, 13(%ecx) 160 jz L(Exit14) 161 cmpb $0, 14(%ecx) 162 jz L(Exit15) 163 cmpb $0, 15(%ecx) 164 jz L(Exit16) 165 cmp $16, %ebx 166 je L(StrlcpyExit16) 167 168 #define USE_AS_STRNCPY 169 #include "ssse3-strcpy-atom.S" 170 171 .p2align 4 172 L(CopyFrom1To16Bytes): 173 add %esi, %edx 174 add %esi, %ecx 175 176 POP (%esi) 177 test %al, %al 178 jz L(ExitHigh8) 179 180 L(CopyFrom1To16BytesLess8): 181 mov %al, %ah 182 and $15, %ah 183 jz L(ExitHigh4) 184 185 test $0x01, %al 186 jnz L(Exit1) 187 test $0x02, %al 188 jnz L(Exit2) 189 test $0x04, %al 190 jnz L(Exit3) 191 L(Exit4): 192 movl (%ecx), %eax 193 movl %eax, (%edx) 194 195 lea 3(%ecx), %eax 196 sub %edi, %eax 197 RETURN1 198 199 .p2align 4 200 L(ExitHigh4): 201 test $0x10, %al 202 jnz L(Exit5) 203 test $0x20, %al 204 jnz L(Exit6) 205 test $0x40, %al 206 jnz L(Exit7) 207 L(Exit8): 208 movlpd (%ecx), %xmm0 209 movlpd %xmm0, (%edx) 210 211 lea 7(%ecx), %eax 212 sub %edi, %eax 213 RETURN1 214 215 .p2align 4 216 L(ExitHigh8): 217 mov %ah, %al 218 and $15, %al 219 jz L(ExitHigh12) 220 221 test $0x01, %ah 222 jnz L(Exit9) 223 test $0x02, %ah 224 jnz L(Exit10) 225 test $0x04, %ah 226 jnz L(Exit11) 227 L(Exit12): 228 movlpd (%ecx), %xmm0 229 movlpd %xmm0, (%edx) 230 movl 8(%ecx), %eax 231 movl %eax, 8(%edx) 232 233 lea 11(%ecx), %eax 234 sub %edi, %eax 235 RETURN1 236 237 .p2align 4 238 L(ExitHigh12): 239 test $0x10, %ah 240 jnz L(Exit13) 241 test $0x20, %ah 242 jnz L(Exit14) 243 test $0x40, %ah 244 jnz L(Exit15) 245 L(Exit16): 246 movlpd (%ecx), %xmm0 247 movlpd 8(%ecx), %xmm1 248 movlpd %xmm0, (%edx) 249 movlpd %xmm1, 8(%edx) 250 251 lea 15(%ecx), %eax 252 sub %edi, %eax 253 RETURN1 254 255 CFI_PUSH(%esi) 256 257 .p2align 4 258 L(CopyFrom1To16BytesCase2): 259 add $16, %ebx 260 add %esi, %ecx 261 add %esi, %edx 262 263 POP (%esi) 264 265 test %al, %al 266 jz L(ExitHighCase2) 267 268 cmp $8, %ebx 269 ja L(CopyFrom1To16BytesLess8) 270 271 test $0x01, %al 272 jnz L(Exit1) 273 cmp $1, %ebx 274 je L(StrlcpyExit1) 275 test $0x02, %al 276 jnz L(Exit2) 277 cmp $2, %ebx 278 je L(StrlcpyExit2) 279 test $0x04, %al 280 jnz L(Exit3) 281 cmp $3, %ebx 282 je L(StrlcpyExit3) 283 test $0x08, %al 284 jnz L(Exit4) 285 cmp $4, %ebx 286 je L(StrlcpyExit4) 287 test $0x10, %al 288 jnz L(Exit5) 289 cmp $5, %ebx 290 je L(StrlcpyExit5) 291 test $0x20, %al 292 jnz L(Exit6) 293 cmp $6, %ebx 294 je L(StrlcpyExit6) 295 test $0x40, %al 296 jnz L(Exit7) 297 cmp $7, %ebx 298 je L(StrlcpyExit7) 299 test $0x80, %al 300 jnz L(Exit8) 301 jmp L(StrlcpyExit8) 302 303 .p2align 4 304 L(ExitHighCase2): 305 cmp $8, %ebx 306 jbe L(CopyFrom1To16BytesLess8Case3) 307 308 test $0x01, %ah 309 jnz L(Exit9) 310 cmp $9, %ebx 311 je L(StrlcpyExit9) 312 test $0x02, %ah 313 jnz L(Exit10) 314 cmp $10, %ebx 315 je L(StrlcpyExit10) 316 test $0x04, %ah 317 jnz L(Exit11) 318 cmp $11, %ebx 319 je L(StrlcpyExit11) 320 test $0x8, %ah 321 jnz L(Exit12) 322 cmp $12, %ebx 323 je L(StrlcpyExit12) 324 test $0x10, %ah 325 jnz L(Exit13) 326 cmp $13, %ebx 327 je L(StrlcpyExit13) 328 test $0x20, %ah 329 jnz L(Exit14) 330 cmp $14, %ebx 331 je L(StrlcpyExit14) 332 test $0x40, %ah 333 jnz L(Exit15) 334 cmp $15, %ebx 335 je L(StrlcpyExit15) 336 test $0x80, %ah 337 jnz L(Exit16) 338 jmp L(StrlcpyExit16) 339 340 CFI_PUSH(%esi) 341 342 .p2align 4 343 L(CopyFrom1To16BytesCase2OrCase3): 344 test %eax, %eax 345 jnz L(CopyFrom1To16BytesCase2) 346 347 .p2align 4 348 L(CopyFrom1To16BytesCase3): 349 add $16, %ebx 350 add %esi, %edx 351 add %esi, %ecx 352 353 POP (%esi) 354 355 cmp $8, %ebx 356 ja L(ExitHigh8Case3) 357 358 L(CopyFrom1To16BytesLess8Case3): 359 cmp $4, %ebx 360 ja L(ExitHigh4Case3) 361 362 cmp $1, %ebx 363 je L(StrlcpyExit1) 364 cmp $2, %ebx 365 je L(StrlcpyExit2) 366 cmp $3, %ebx 367 je L(StrlcpyExit3) 368 L(StrlcpyExit4): 369 movb %bh, 3(%edx) 370 movw (%ecx), %ax 371 movw %ax, (%edx) 372 movb 2(%ecx), %al 373 movb %al, 2(%edx) 374 375 lea 4(%ecx), %edx 376 mov %edi, %ecx 377 POP (%edi) 378 jmp L(CalculateLengthOfSrc) 379 CFI_PUSH (%edi) 380 381 .p2align 4 382 L(ExitHigh4Case3): 383 cmp $5, %ebx 384 je L(StrlcpyExit5) 385 cmp $6, %ebx 386 je L(StrlcpyExit6) 387 cmp $7, %ebx 388 je L(StrlcpyExit7) 389 L(StrlcpyExit8): 390 movb %bh, 7(%edx) 391 movl (%ecx), %eax 392 movl %eax, (%edx) 393 movl 3(%ecx), %eax 394 movl %eax, 3(%edx) 395 396 lea 8(%ecx), %edx 397 mov %edi, %ecx 398 POP (%edi) 399 jmp L(CalculateLengthOfSrc) 400 CFI_PUSH (%edi) 401 402 .p2align 4 403 L(ExitHigh8Case3): 404 cmp $12, %ebx 405 ja L(ExitHigh12Case3) 406 407 cmp $9, %ebx 408 je L(StrlcpyExit9) 409 cmp $10, %ebx 410 je L(StrlcpyExit10) 411 cmp $11, %ebx 412 je L(StrlcpyExit11) 413 L(StrlcpyExit12): 414 movb %bh, 11(%edx) 415 movlpd (%ecx), %xmm0 416 movlpd %xmm0, (%edx) 417 movl 7(%ecx), %eax 418 movl %eax, 7(%edx) 419 420 lea 12(%ecx), %edx 421 mov %edi, %ecx 422 POP (%edi) 423 jmp L(CalculateLengthOfSrc) 424 CFI_PUSH (%edi) 425 426 .p2align 4 427 L(ExitHigh12Case3): 428 cmp $13, %ebx 429 je L(StrlcpyExit13) 430 cmp $14, %ebx 431 je L(StrlcpyExit14) 432 cmp $15, %ebx 433 je L(StrlcpyExit15) 434 L(StrlcpyExit16): 435 movb %bh, 15(%edx) 436 movlpd (%ecx), %xmm0 437 movlpd %xmm0, (%edx) 438 movlpd 7(%ecx), %xmm0 439 movlpd %xmm0, 7(%edx) 440 441 lea 16(%ecx), %edx 442 mov %edi, %ecx 443 POP (%edi) 444 jmp L(CalculateLengthOfSrc) 445 CFI_PUSH (%edi) 446 447 .p2align 4 448 L(StrlcpyExit1): 449 movb %bh, (%edx) 450 451 lea 1(%ecx), %edx 452 mov %edi, %ecx 453 POP (%edi) 454 jmp L(CalculateLengthOfSrc) 455 CFI_PUSH (%edi) 456 457 .p2align 4 458 L(Exit1): 459 movb (%ecx), %al 460 movb %al, (%edx) 461 462 mov %ecx, %eax 463 sub %edi, %eax 464 RETURN1 465 466 .p2align 4 467 L(StrlcpyExit2): 468 movb %bh, 1(%edx) 469 movb (%ecx), %al 470 movb %al, (%edx) 471 472 lea 2(%ecx), %edx 473 mov %edi, %ecx 474 POP (%edi) 475 jmp L(CalculateLengthOfSrc) 476 CFI_PUSH (%edi) 477 478 .p2align 4 479 L(Exit2): 480 movw (%ecx), %ax 481 movw %ax, (%edx) 482 movl %edi, %eax 483 484 lea 1(%ecx), %eax 485 sub %edi, %eax 486 RETURN1 487 488 .p2align 4 489 L(StrlcpyExit3): 490 movb %bh, 2(%edx) 491 movw (%ecx), %ax 492 movw %ax, (%edx) 493 494 lea 3(%ecx), %edx 495 mov %edi, %ecx 496 POP (%edi) 497 jmp L(CalculateLengthOfSrc) 498 CFI_PUSH (%edi) 499 500 .p2align 4 501 L(Exit3): 502 movw (%ecx), %ax 503 movw %ax, (%edx) 504 movb 2(%ecx), %al 505 movb %al, 2(%edx) 506 507 lea 2(%ecx), %eax 508 sub %edi, %eax 509 RETURN1 510 511 .p2align 4 512 L(StrlcpyExit5): 513 movb %bh, 4(%edx) 514 movl (%ecx), %eax 515 movl %eax, (%edx) 516 movl %edi, %eax 517 518 lea 5(%ecx), %edx 519 mov %edi, %ecx 520 POP (%edi) 521 jmp L(CalculateLengthOfSrc) 522 CFI_PUSH (%edi) 523 524 .p2align 4 525 L(Exit5): 526 movl (%ecx), %eax 527 movl %eax, (%edx) 528 movb 4(%ecx), %al 529 movb %al, 4(%edx) 530 531 lea 4(%ecx), %eax 532 sub %edi, %eax 533 RETURN1 534 535 .p2align 4 536 L(StrlcpyExit6): 537 movb %bh, 5(%edx) 538 movl (%ecx), %eax 539 movl %eax, (%edx) 540 movb 4(%ecx), %al 541 movb %al, 4(%edx) 542 543 lea 6(%ecx), %edx 544 mov %edi, %ecx 545 POP (%edi) 546 jmp L(CalculateLengthOfSrc) 547 CFI_PUSH (%edi) 548 549 .p2align 4 550 L(Exit6): 551 movl (%ecx), %eax 552 movl %eax, (%edx) 553 movw 4(%ecx), %ax 554 movw %ax, 4(%edx) 555 556 lea 5(%ecx), %eax 557 sub %edi, %eax 558 RETURN1 559 560 .p2align 4 561 L(StrlcpyExit7): 562 movb %bh, 6(%edx) 563 movl (%ecx), %eax 564 movl %eax, (%edx) 565 movw 4(%ecx), %ax 566 movw %ax, 4(%edx) 567 568 lea 7(%ecx), %edx 569 mov %edi, %ecx 570 POP (%edi) 571 jmp L(CalculateLengthOfSrc) 572 CFI_PUSH (%edi) 573 574 .p2align 4 575 L(Exit7): 576 movl (%ecx), %eax 577 movl %eax, (%edx) 578 movl 3(%ecx), %eax 579 movl %eax, 3(%edx) 580 581 lea 6(%ecx), %eax 582 sub %edi, %eax 583 RETURN1 584 585 .p2align 4 586 L(StrlcpyExit9): 587 movb %bh, 8(%edx) 588 movlpd (%ecx), %xmm0 589 movlpd %xmm0, (%edx) 590 591 lea 9(%ecx), %edx 592 mov %edi, %ecx 593 POP (%edi) 594 jmp L(CalculateLengthOfSrc) 595 CFI_PUSH (%edi) 596 597 .p2align 4 598 L(Exit9): 599 movlpd (%ecx), %xmm0 600 movlpd %xmm0, (%edx) 601 movb 8(%ecx), %al 602 movb %al, 8(%edx) 603 604 lea 8(%ecx), %eax 605 sub %edi, %eax 606 RETURN1 607 608 .p2align 4 609 L(StrlcpyExit10): 610 movb %bh, 9(%edx) 611 movlpd (%ecx), %xmm0 612 movlpd %xmm0, (%edx) 613 movb 8(%ecx), %al 614 movb %al, 8(%edx) 615 616 lea 10(%ecx), %edx 617 mov %edi, %ecx 618 POP (%edi) 619 jmp L(CalculateLengthOfSrc) 620 CFI_PUSH (%edi) 621 622 .p2align 4 623 L(Exit10): 624 movlpd (%ecx), %xmm0 625 movlpd %xmm0, (%edx) 626 movw 8(%ecx), %ax 627 movw %ax, 8(%edx) 628 629 lea 9(%ecx), %eax 630 sub %edi, %eax 631 RETURN1 632 633 .p2align 4 634 L(StrlcpyExit11): 635 movb %bh, 10(%edx) 636 movlpd (%ecx), %xmm0 637 movlpd %xmm0, (%edx) 638 movw 8(%ecx), %ax 639 movw %ax, 8(%edx) 640 641 lea 11(%ecx), %edx 642 mov %edi, %ecx 643 POP (%edi) 644 jmp L(CalculateLengthOfSrc) 645 CFI_PUSH (%edi) 646 647 .p2align 4 648 L(Exit11): 649 movlpd (%ecx), %xmm0 650 movlpd %xmm0, (%edx) 651 movl 7(%ecx), %eax 652 movl %eax, 7(%edx) 653 654 lea 10(%ecx), %eax 655 sub %edi, %eax 656 RETURN1 657 658 .p2align 4 659 L(StrlcpyExit13): 660 movb %bh, 12(%edx) 661 movlpd (%ecx), %xmm0 662 movlpd %xmm0, (%edx) 663 movl 8(%ecx), %eax 664 movl %eax, 8(%edx) 665 666 lea 13(%ecx), %edx 667 mov %edi, %ecx 668 POP (%edi) 669 jmp L(CalculateLengthOfSrc) 670 CFI_PUSH (%edi) 671 672 .p2align 4 673 L(Exit13): 674 movlpd (%ecx), %xmm0 675 movlpd %xmm0, (%edx) 676 movlpd 5(%ecx), %xmm0 677 movlpd %xmm0, 5(%edx) 678 679 lea 12(%ecx), %eax 680 sub %edi, %eax 681 RETURN1 682 683 .p2align 4 684 L(StrlcpyExit14): 685 movb %bh, 13(%edx) 686 movlpd (%ecx), %xmm0 687 movlpd %xmm0, (%edx) 688 movlpd 5(%ecx), %xmm0 689 movlpd %xmm0, 5(%edx) 690 691 lea 14(%ecx), %edx 692 mov %edi, %ecx 693 POP (%edi) 694 jmp L(CalculateLengthOfSrc) 695 CFI_PUSH (%edi) 696 697 .p2align 4 698 L(Exit14): 699 movlpd (%ecx), %xmm0 700 movlpd %xmm0, (%edx) 701 movlpd 6(%ecx), %xmm0 702 movlpd %xmm0, 6(%edx) 703 704 lea 13(%ecx), %eax 705 sub %edi, %eax 706 RETURN1 707 708 .p2align 4 709 L(StrlcpyExit15): 710 movb %bh, 14(%edx) 711 movlpd (%ecx), %xmm0 712 movlpd %xmm0, (%edx) 713 movlpd 6(%ecx), %xmm0 714 movlpd %xmm0, 6(%edx) 715 716 lea 15(%ecx), %edx 717 mov %edi, %ecx 718 POP (%edi) 719 jmp L(CalculateLengthOfSrc) 720 CFI_PUSH (%edi) 721 722 .p2align 4 723 L(Exit15): 724 movlpd (%ecx), %xmm0 725 movlpd %xmm0, (%edx) 726 movlpd 7(%ecx), %xmm0 727 movlpd %xmm0, 7(%edx) 728 729 lea 14(%ecx), %eax 730 sub %edi, %eax 731 RETURN1 732 733 .p2align 4 734 L(StrncpyExit15Bytes): 735 cmp $12, %ebx 736 ja L(StrncpyExit15Bytes1) 737 738 cmpb $0, 8(%ecx) 739 jz L(Exit9) 740 cmp $9, %ebx 741 je L(StrlcpyExit9) 742 743 cmpb $0, 9(%ecx) 744 jz L(Exit10) 745 cmp $10, %ebx 746 je L(StrlcpyExit10) 747 748 cmpb $0, 10(%ecx) 749 jz L(Exit11) 750 cmp $11, %ebx 751 je L(StrlcpyExit11) 752 753 cmpb $0, 11(%ecx) 754 jz L(Exit12) 755 jmp L(StrlcpyExit12) 756 757 .p2align 4 758 L(StrncpyExit15Bytes1): 759 cmpb $0, 8(%ecx) 760 jz L(Exit9) 761 cmpb $0, 9(%ecx) 762 jz L(Exit10) 763 cmpb $0, 10(%ecx) 764 jz L(Exit11) 765 cmpb $0, 11(%ecx) 766 jz L(Exit12) 767 768 cmpb $0, 12(%ecx) 769 jz L(Exit13) 770 cmp $13, %ebx 771 je L(StrlcpyExit13) 772 773 cmpb $0, 13(%ecx) 774 jz L(Exit14) 775 cmp $14, %ebx 776 je L(StrlcpyExit14) 777 778 cmpb $0, 14(%ecx) 779 jz L(Exit15) 780 jmp L(StrlcpyExit15) 781 782 .p2align 4 783 L(StrncpyExit8Bytes): 784 cmp $4, %ebx 785 ja L(StrncpyExit8Bytes1) 786 787 cmpb $0, (%ecx) 788 jz L(Exit1) 789 cmp $1, %ebx 790 je L(StrlcpyExit1) 791 792 cmpb $0, 1(%ecx) 793 jz L(Exit2) 794 cmp $2, %ebx 795 je L(StrlcpyExit2) 796 797 cmpb $0, 2(%ecx) 798 jz L(Exit3) 799 cmp $3, %ebx 800 je L(StrlcpyExit3) 801 802 cmpb $0, 3(%ecx) 803 jz L(Exit4) 804 jmp L(StrlcpyExit4) 805 806 .p2align 4 807 L(StrncpyExit8Bytes1): 808 cmpb $0, (%ecx) 809 jz L(Exit1) 810 cmpb $0, 1(%ecx) 811 jz L(Exit2) 812 cmpb $0, 2(%ecx) 813 jz L(Exit3) 814 cmpb $0, 3(%ecx) 815 jz L(Exit4) 816 817 cmpb $0, 4(%ecx) 818 jz L(Exit5) 819 cmp $5, %ebx 820 je L(StrlcpyExit5) 821 822 cmpb $0, 5(%ecx) 823 jz L(Exit6) 824 cmp $6, %ebx 825 je L(StrlcpyExit6) 826 827 cmpb $0, 6(%ecx) 828 jz L(Exit7) 829 cmp $7, %ebx 830 je L(StrlcpyExit7) 831 832 cmpb $0, 7(%ecx) 833 jz L(Exit8) 834 jmp L(StrlcpyExit8) 835 836 CFI_POP (%edi) 837 838 839 .p2align 4 840 L(Prolog_return_start_len): 841 movl LEN(%esp), %ebx 842 movl SRC(%esp), %ecx 843 L(CalculateLengthOfSrcProlog): 844 mov %ecx, %edx 845 sub %ebx, %ecx 846 847 .p2align 4 848 L(CalculateLengthOfSrc): 849 cmpb $0, (%edx) 850 jz L(exit_tail0) 851 cmpb $0, 1(%edx) 852 jz L(exit_tail1) 853 cmpb $0, 2(%edx) 854 jz L(exit_tail2) 855 cmpb $0, 3(%edx) 856 jz L(exit_tail3) 857 858 cmpb $0, 4(%edx) 859 jz L(exit_tail4) 860 cmpb $0, 5(%edx) 861 jz L(exit_tail5) 862 cmpb $0, 6(%edx) 863 jz L(exit_tail6) 864 cmpb $0, 7(%edx) 865 jz L(exit_tail7) 866 867 cmpb $0, 8(%edx) 868 jz L(exit_tail8) 869 cmpb $0, 9(%edx) 870 jz L(exit_tail9) 871 cmpb $0, 10(%edx) 872 jz L(exit_tail10) 873 cmpb $0, 11(%edx) 874 jz L(exit_tail11) 875 876 cmpb $0, 12(%edx) 877 jz L(exit_tail12) 878 cmpb $0, 13(%edx) 879 jz L(exit_tail13) 880 cmpb $0, 14(%edx) 881 jz L(exit_tail14) 882 cmpb $0, 15(%edx) 883 jz L(exit_tail15) 884 885 pxor %xmm0, %xmm0 886 lea 16(%edx), %eax 887 add $16, %ecx 888 and $-16, %eax 889 890 pcmpeqb (%eax), %xmm0 891 pmovmskb %xmm0, %edx 892 pxor %xmm1, %xmm1 893 lea 16(%eax), %eax 894 test %edx, %edx 895 jnz L(exit) 896 897 pcmpeqb (%eax), %xmm1 898 pmovmskb %xmm1, %edx 899 pxor %xmm2, %xmm2 900 lea 16(%eax), %eax 901 test %edx, %edx 902 jnz L(exit) 903 904 pcmpeqb (%eax), %xmm2 905 pmovmskb %xmm2, %edx 906 pxor %xmm3, %xmm3 907 lea 16(%eax), %eax 908 test %edx, %edx 909 jnz L(exit) 910 911 pcmpeqb (%eax), %xmm3 912 pmovmskb %xmm3, %edx 913 lea 16(%eax), %eax 914 test %edx, %edx 915 jnz L(exit) 916 917 pcmpeqb (%eax), %xmm0 918 pmovmskb %xmm0, %edx 919 lea 16(%eax), %eax 920 test %edx, %edx 921 jnz L(exit) 922 923 pcmpeqb (%eax), %xmm1 924 pmovmskb %xmm1, %edx 925 lea 16(%eax), %eax 926 test %edx, %edx 927 jnz L(exit) 928 929 pcmpeqb (%eax), %xmm2 930 pmovmskb %xmm2, %edx 931 lea 16(%eax), %eax 932 test %edx, %edx 933 jnz L(exit) 934 935 pcmpeqb (%eax), %xmm3 936 pmovmskb %xmm3, %edx 937 lea 16(%eax), %eax 938 test %edx, %edx 939 jnz L(exit) 940 941 pcmpeqb (%eax), %xmm0 942 pmovmskb %xmm0, %edx 943 lea 16(%eax), %eax 944 test %edx, %edx 945 jnz L(exit) 946 947 pcmpeqb (%eax), %xmm1 948 pmovmskb %xmm1, %edx 949 lea 16(%eax), %eax 950 test %edx, %edx 951 jnz L(exit) 952 953 pcmpeqb (%eax), %xmm2 954 pmovmskb %xmm2, %edx 955 lea 16(%eax), %eax 956 test %edx, %edx 957 jnz L(exit) 958 959 pcmpeqb (%eax), %xmm3 960 pmovmskb %xmm3, %edx 961 lea 16(%eax), %eax 962 test %edx, %edx 963 jnz L(exit) 964 965 pcmpeqb (%eax), %xmm0 966 pmovmskb %xmm0, %edx 967 lea 16(%eax), %eax 968 test %edx, %edx 969 jnz L(exit) 970 971 pcmpeqb (%eax), %xmm1 972 pmovmskb %xmm1, %edx 973 lea 16(%eax), %eax 974 test %edx, %edx 975 jnz L(exit) 976 977 pcmpeqb (%eax), %xmm2 978 pmovmskb %xmm2, %edx 979 lea 16(%eax), %eax 980 test %edx, %edx 981 jnz L(exit) 982 983 pcmpeqb (%eax), %xmm3 984 pmovmskb %xmm3, %edx 985 lea 16(%eax), %eax 986 test %edx, %edx 987 jnz L(exit) 988 989 and $-0x40, %eax 990 991 .p2align 4 992 L(aligned_64_loop): 993 movaps (%eax), %xmm0 994 movaps 16(%eax), %xmm1 995 movaps 32(%eax), %xmm2 996 movaps 48(%eax), %xmm6 997 pminub %xmm1, %xmm0 998 pminub %xmm6, %xmm2 999 pminub %xmm0, %xmm2 1000 pcmpeqb %xmm3, %xmm2 1001 pmovmskb %xmm2, %edx 1002 lea 64(%eax), %eax 1003 test %edx, %edx 1004 jz L(aligned_64_loop) 1005 1006 pcmpeqb -64(%eax), %xmm3 1007 pmovmskb %xmm3, %edx 1008 lea 48(%ecx), %ecx 1009 test %edx, %edx 1010 jnz L(exit) 1011 1012 pcmpeqb %xmm1, %xmm3 1013 pmovmskb %xmm3, %edx 1014 lea -16(%ecx), %ecx 1015 test %edx, %edx 1016 jnz L(exit) 1017 1018 pcmpeqb -32(%eax), %xmm3 1019 pmovmskb %xmm3, %edx 1020 lea -16(%ecx), %ecx 1021 test %edx, %edx 1022 jnz L(exit) 1023 1024 pcmpeqb %xmm6, %xmm3 1025 pmovmskb %xmm3, %edx 1026 lea -16(%ecx), %ecx 1027 1028 .p2align 4 1029 L(exit): 1030 sub %ecx, %eax 1031 test %dl, %dl 1032 jz L(exit_more_8) 1033 1034 mov %dl, %cl 1035 and $15, %cl 1036 jz L(exit_more_4) 1037 test $0x01, %dl 1038 jnz L(exit_0) 1039 test $0x02, %dl 1040 jnz L(exit_1) 1041 test $0x04, %dl 1042 jnz L(exit_2) 1043 add $3, %eax 1044 RETURN 1045 1046 .p2align 4 1047 L(exit_more_4): 1048 test $0x10, %dl 1049 jnz L(exit_4) 1050 test $0x20, %dl 1051 jnz L(exit_5) 1052 test $0x40, %dl 1053 jnz L(exit_6) 1054 add $7, %eax 1055 RETURN 1056 1057 .p2align 4 1058 L(exit_more_8): 1059 mov %dh, %ch 1060 and $15, %ch 1061 jz L(exit_more_12) 1062 test $0x01, %dh 1063 jnz L(exit_8) 1064 test $0x02, %dh 1065 jnz L(exit_9) 1066 test $0x04, %dh 1067 jnz L(exit_10) 1068 add $11, %eax 1069 RETURN 1070 1071 .p2align 4 1072 L(exit_more_12): 1073 test $0x10, %dh 1074 jnz L(exit_12) 1075 test $0x20, %dh 1076 jnz L(exit_13) 1077 test $0x40, %dh 1078 jnz L(exit_14) 1079 add $15, %eax 1080 L(exit_0): 1081 RETURN 1082 1083 .p2align 4 1084 L(exit_1): 1085 add $1, %eax 1086 RETURN 1087 1088 L(exit_2): 1089 add $2, %eax 1090 RETURN 1091 1092 L(exit_3): 1093 add $3, %eax 1094 RETURN 1095 1096 L(exit_4): 1097 add $4, %eax 1098 RETURN 1099 1100 L(exit_5): 1101 add $5, %eax 1102 RETURN 1103 1104 L(exit_6): 1105 add $6, %eax 1106 RETURN 1107 1108 L(exit_7): 1109 add $7, %eax 1110 RETURN 1111 1112 L(exit_8): 1113 add $8, %eax 1114 RETURN 1115 1116 L(exit_9): 1117 add $9, %eax 1118 RETURN 1119 1120 L(exit_10): 1121 add $10, %eax 1122 RETURN 1123 1124 L(exit_11): 1125 add $11, %eax 1126 RETURN 1127 1128 L(exit_12): 1129 add $12, %eax 1130 RETURN 1131 1132 L(exit_13): 1133 add $13, %eax 1134 RETURN 1135 1136 L(exit_14): 1137 add $14, %eax 1138 RETURN 1139 1140 L(exit_15): 1141 add $15, %eax 1142 RETURN 1143 1144 L(exit_tail0): 1145 mov %edx, %eax 1146 sub %ecx, %eax 1147 RETURN 1148 1149 .p2align 4 1150 L(exit_tail1): 1151 lea 1(%edx), %eax 1152 sub %ecx, %eax 1153 RETURN 1154 1155 L(exit_tail2): 1156 lea 2(%edx), %eax 1157 sub %ecx, %eax 1158 RETURN 1159 1160 L(exit_tail3): 1161 lea 3(%edx), %eax 1162 sub %ecx, %eax 1163 RETURN 1164 1165 L(exit_tail4): 1166 lea 4(%edx), %eax 1167 sub %ecx, %eax 1168 RETURN 1169 1170 L(exit_tail5): 1171 lea 5(%edx), %eax 1172 sub %ecx, %eax 1173 RETURN 1174 1175 L(exit_tail6): 1176 lea 6(%edx), %eax 1177 sub %ecx, %eax 1178 RETURN 1179 1180 L(exit_tail7): 1181 lea 7(%edx), %eax 1182 sub %ecx, %eax 1183 RETURN 1184 1185 L(exit_tail8): 1186 lea 8(%edx), %eax 1187 sub %ecx, %eax 1188 RETURN 1189 1190 L(exit_tail9): 1191 lea 9(%edx), %eax 1192 sub %ecx, %eax 1193 RETURN 1194 1195 L(exit_tail10): 1196 lea 10(%edx), %eax 1197 sub %ecx, %eax 1198 RETURN 1199 1200 L(exit_tail11): 1201 lea 11(%edx), %eax 1202 sub %ecx, %eax 1203 RETURN 1204 1205 L(exit_tail12): 1206 lea 12(%edx), %eax 1207 sub %ecx, %eax 1208 RETURN 1209 1210 L(exit_tail13): 1211 lea 13(%edx), %eax 1212 sub %ecx, %eax 1213 RETURN 1214 1215 L(exit_tail14): 1216 lea 14(%edx), %eax 1217 sub %ecx, %eax 1218 RETURN 1219 1220 L(exit_tail15): 1221 lea 15(%edx), %eax 1222 sub %ecx, %eax 1223 RETURN 1224 1225 END (strlcat) 1226