1 /* 2 Copyright (c) 2014, Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef L 32 # define L(label) .L##label 33 #endif 34 35 #ifndef cfi_startproc 36 # define cfi_startproc .cfi_startproc 37 #endif 38 39 #ifndef cfi_endproc 40 # define cfi_endproc .cfi_endproc 41 #endif 42 43 #ifndef ENTRY 44 # define ENTRY(name) \ 45 .type name, @function; \ 46 .globl name; \ 47 .p2align 4; \ 48 name: \ 49 cfi_startproc 50 #endif 51 52 #ifndef END 53 # define END(name) \ 54 cfi_endproc; \ 55 .size name, .-name 56 #endif 57 58 59 #ifndef STRLCPY 60 # define STRLCPY strlcpy 61 #endif 62 63 #define JMPTBL(I, B) I - B 64 #define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 65 lea TABLE(%rip), %r11; \ 66 movslq (%r11, INDEX, SCALE), %rcx; \ 67 lea (%r11, %rcx), %rcx; \ 68 jmp *%rcx 69 70 #define RETURN \ 71 add %r9, %rax; \ 72 ret 73 74 .text 75 ENTRY (STRLCPY) 76 xor %rax, %rax 77 xor %r9, %r9 78 mov %rdx, %r8 79 cmp $0, %r8 80 jz L(CalculateSrcLen) 81 82 #ifdef USE_AS_STRLCAT 83 xor %rcx, %rcx 84 pxor %xmm0, %xmm0 85 86 movdqu (%rdi), %xmm1 87 pcmpeqb %xmm1, %xmm0 88 pmovmskb %xmm0, %rdx 89 90 cmp $17, %r8 91 jb L(SizeEndCase1) 92 test %rdx, %rdx 93 jnz L(StringEndCase1) 94 95 add $16, %rax 96 movdqu 16(%rdi), %xmm1 97 pcmpeqb %xmm1, %xmm0 98 pmovmskb %xmm0, %rdx 99 100 cmp $33, %r8 101 jb L(SizeEndCase1) 102 test %rdx, %rdx 103 jnz L(StringEndCase1) 104 105 mov %rdi, %rcx 106 and $15, %rcx 107 and $-16, %rdi 108 109 add %rcx, %r8 110 sub $16, %r8 111 112 L(DstLenLoop): 113 movdqa (%rdi, %rax), %xmm1 114 pcmpeqb %xmm1, %xmm0 115 pmovmskb %xmm0, %rdx 116 sub $16, %r8 117 jbe L(SizeEndCase2) 118 test %rdx, %rdx 119 jnz L(StringEndCase2) 120 add $16, %rax 121 jmp L(DstLenLoop) 122 123 L(StringEndCase2): 124 add $16, %r8 125 bsf %rdx, %rdx 126 sub %rdx, %r8 127 add %rdx, %rax 128 sub %rcx, %r9 129 add %rax, %rdi 130 jmp L(CopySrcString) 131 132 L(SizeEndCase1): 133 test %rdx, %rdx 134 jz L(SizeEnd) 135 bsf %rdx, %rdx 136 add %rdx, %rax 137 cmp %r8, %rax 138 jb L(StringEnd) 139 L(SizeEnd): 140 mov %r8, %r9 141 jmp L(CalculateSrcLenCase1) 142 143 L(SizeEndCase2): 144 add $16, %r8 145 test %rdx, %rdx 146 jz L(StringEndCase4) 147 bsf %rdx, %rdx 148 cmp %r8, %rdx 149 jb L(StringEndCase3) 150 L(StringEndCase4): 151 add %r8, %rax 152 sub %rcx, %rax 153 mov %rax, %r9 154 jmp L(CalculateSrcLenCase1) 155 156 L(StringEndCase3): 157 add %rdx, %rax 158 sub %rcx, %r9 159 add %rax, %rdi 160 sub %rdx, %r8 161 jmp L(CopySrcString) 162 163 L(StringEndCase1): 164 bsf %rdx, %rdx 165 add %rdx, %rax 166 sub %rcx, %rax 167 L(StringEnd): 168 add %rax, %rdi 169 sub %rax, %r8 170 #endif 171 172 mov %rsi, %rcx 173 and $63, %rcx 174 cmp $32, %rcx 175 jbe L(CopySrcString) 176 177 and $-16, %rsi 178 and $15, %rcx 179 pxor %xmm0, %xmm0 180 pxor %xmm1, %xmm1 181 182 pcmpeqb (%rsi), %xmm1 183 pmovmskb %xmm1, %rdx 184 shr %cl, %rdx 185 mov $16, %r10 186 sub %rcx, %r10 187 cmp %r10, %r8 188 jbe L(CopyFrom1To16BytesTailCase2OrCase3) 189 test %rdx, %rdx 190 jnz L(CopyFrom1To16BytesTail) 191 192 pcmpeqb 16(%rsi), %xmm0 193 pmovmskb %xmm0, %rdx 194 add $16, %r10 195 cmp %r10, %r8 196 jbe L(CopyFrom1To32BytesCase2OrCase3) 197 test %rdx, %rdx 198 jnz L(CopyFrom1To32Bytes) 199 200 movdqu (%rsi, %rcx), %xmm1 201 movdqu %xmm1, (%rdi) 202 #ifdef USE_AS_STRLCAT 203 add %rax, %r9 204 #endif 205 jmp L(LoopStart) 206 207 .p2align 4 208 L(CopySrcString): 209 #ifdef USE_AS_STRLCAT 210 add %rax, %r9 211 xor %rax, %rax 212 #endif 213 pxor %xmm0, %xmm0 214 movdqu (%rsi), %xmm1 215 pcmpeqb %xmm1, %xmm0 216 pmovmskb %xmm0, %rdx 217 218 cmp $17, %r8 219 jb L(CopyFrom1To16BytesTail1Case2OrCase3) 220 test %rdx, %rdx 221 jnz L(CopyFrom1To16BytesTail1) 222 223 movdqu 16(%rsi), %xmm2 224 pcmpeqb %xmm2, %xmm0 225 movdqu %xmm1, (%rdi) 226 pmovmskb %xmm0, %rdx 227 add $16, %rax 228 229 cmp $33, %r8 230 jb L(CopyFrom1To32Bytes1Case2OrCase3) 231 test %rdx, %rdx 232 jnz L(CopyFrom1To32Bytes1) 233 234 mov %rsi, %rcx 235 and $15, %rcx 236 and $-16, %rsi 237 238 L(LoopStart): 239 sub %rcx, %rdi 240 add %rcx, %r8 241 sub $16, %r8 242 mov $16, %rax 243 244 L(16Loop): 245 movdqa (%rsi, %rax), %xmm1 246 pcmpeqb %xmm1, %xmm0 247 pmovmskb %xmm0, %rdx 248 sub $16, %r8 249 jbe L(CopyFrom1To16BytesCase2OrCase3) 250 test %rdx, %rdx 251 jnz L(CopyFrom1To16BytesXmmExit) 252 movdqu %xmm1, (%rdi, %rax) 253 add $16, %rax 254 jmp L(16Loop) 255 256 /*------End of main part with loops---------------------*/ 257 258 /* Case1 */ 259 .p2align 4 260 L(CopyFrom1To16Bytes): 261 add %rcx, %rdi 262 add %rcx, %rsi 263 bsf %rdx, %rdx 264 add %rdx, %rax 265 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) 266 267 .p2align 4 268 L(CopyFrom1To16BytesTail): 269 add %rcx, %rsi 270 bsf %rdx, %rdx 271 add %rdx, %rax 272 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) 273 274 .p2align 4 275 L(CopyFrom1To32Bytes1): 276 add $16, %rsi 277 add $16, %rdi 278 sub $16, %r8 279 L(CopyFrom1To16BytesTail1): 280 bsf %rdx, %rdx 281 add %rdx, %rax 282 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) 283 284 .p2align 4 285 L(CopyFrom1To32Bytes): 286 bsf %rdx, %rdx 287 add %rcx, %rsi 288 add $16, %rdx 289 sub %rcx, %rdx 290 add %rdx, %rax 291 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) 292 293 .p2align 4 294 L(CopyFrom1To16BytesExit): 295 add %rdx, %rax 296 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) 297 298 /* Case2 */ 299 300 .p2align 4 301 L(CopyFrom1To16BytesCase2): 302 add $16, %r8 303 add %rax, %rdi 304 add %rax, %rsi 305 bsf %rdx, %rdx 306 sub %rcx, %rax 307 cmp %r8, %rdx 308 jb L(CopyFrom1To16BytesExit) 309 add %r8, %rax 310 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) 311 312 .p2align 4 313 L(CopyFrom1To32BytesCase2): 314 add %rcx, %rsi 315 bsf %rdx, %rdx 316 add $16, %rdx 317 sub %rcx, %rdx 318 cmp %r8, %rdx 319 jb L(CopyFrom1To16BytesExit) 320 add %r8, %rax 321 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) 322 323 L(CopyFrom1To16BytesTailCase2): 324 add %rcx, %rsi 325 bsf %rdx, %rdx 326 cmp %r8, %rdx 327 jb L(CopyFrom1To16BytesExit) 328 add %r8, %rax 329 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) 330 331 .p2align 4 332 L(CopyFrom1To16BytesTail1Case2): 333 bsf %rdx, %rdx 334 cmp %r8, %rdx 335 jb L(CopyFrom1To16BytesExit) 336 add %r8, %rax 337 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) 338 339 /* Case2 or Case3, Case3 */ 340 341 .p2align 4 342 L(CopyFrom1To16BytesCase2OrCase3): 343 test %rdx, %rdx 344 jnz L(CopyFrom1To16BytesCase2) 345 add $16, %r8 346 add %rax, %rdi 347 add %rax, %rsi 348 add %r8, %rax 349 sub %rcx, %rax 350 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) 351 352 .p2align 4 353 L(CopyFrom1To32BytesCase2OrCase3): 354 test %rdx, %rdx 355 jnz L(CopyFrom1To32BytesCase2) 356 add %rcx, %rsi 357 add %r8, %rax 358 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) 359 360 .p2align 4 361 L(CopyFrom1To16BytesTailCase2OrCase3): 362 test %rdx, %rdx 363 jnz L(CopyFrom1To16BytesTailCase2) 364 add %rcx, %rsi 365 add %r8, %rax 366 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) 367 368 .p2align 4 369 L(CopyFrom1To32Bytes1Case2OrCase3): 370 add $16, %rdi 371 add $16, %rsi 372 sub $16, %r8 373 L(CopyFrom1To16BytesTail1Case2OrCase3): 374 test %rdx, %rdx 375 jnz L(CopyFrom1To16BytesTail1Case2) 376 add %r8, %rax 377 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4) 378 379 .p2align 4 380 L(CopyFrom1To16BytesXmmExit): 381 bsf %rdx, %rdx 382 add %rax, %rdi 383 add %rax, %rsi 384 add %rdx, %rax 385 sub %rcx, %rax 386 BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4) 387 388 /*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/ 389 390 391 .p2align 4 392 L(Exit0): 393 RETURN 394 395 .p2align 4 396 L(Exit1): 397 movb $0, (%rdi) 398 jmp L(CalculateSrcLen) 399 400 .p2align 4 401 L(Exit2): 402 movb (%rsi), %dh 403 movb %dh, (%rdi) 404 movb $0, 1(%rdi) 405 jmp L(CalculateSrcLen) 406 407 .p2align 4 408 L(Exit3): 409 movw (%rsi), %dx 410 movw %dx, (%rdi) 411 movb $0, 2(%rdi) 412 jmp L(CalculateSrcLen) 413 414 .p2align 4 415 L(Exit4): 416 movw (%rsi), %cx 417 movb 2(%rsi), %dh 418 movw %cx, (%rdi) 419 movb %dh, 2(%rdi) 420 movb $0, 3(%rdi) 421 jmp L(CalculateSrcLen) 422 423 .p2align 4 424 L(Exit5): 425 movl (%rsi), %edx 426 movl %edx, (%rdi) 427 movb $0, 4(%rdi) 428 jmp L(CalculateSrcLen) 429 430 .p2align 4 431 L(Exit6): 432 movl (%rsi), %ecx 433 movb 4(%rsi), %dh 434 movl %ecx, (%rdi) 435 movb %dh, 4(%rdi) 436 movb $0, 5(%rdi) 437 jmp L(CalculateSrcLen) 438 439 .p2align 4 440 L(Exit7): 441 movl (%rsi), %ecx 442 movw 4(%rsi), %dx 443 movl %ecx, (%rdi) 444 movw %dx, 4(%rdi) 445 movb $0, 6(%rdi) 446 jmp L(CalculateSrcLen) 447 448 .p2align 4 449 L(Exit8): 450 movl (%rsi), %ecx 451 movl 3(%rsi), %edx 452 movl %ecx, (%rdi) 453 movl %edx, 3(%rdi) 454 movb $0, 7(%rdi) 455 jmp L(CalculateSrcLen) 456 457 .p2align 4 458 L(Exit9): 459 movq (%rsi), %rdx 460 movq %rdx, (%rdi) 461 movb $0, 8(%rdi) 462 jmp L(CalculateSrcLen) 463 464 .p2align 4 465 L(Exit10): 466 movq (%rsi), %rcx 467 movb 8(%rsi), %dh 468 movq %rcx, (%rdi) 469 movb %dh, 8(%rdi) 470 movb $0, 9(%rdi) 471 jmp L(CalculateSrcLen) 472 473 .p2align 4 474 L(Exit11): 475 movq (%rsi), %rcx 476 movw 8(%rsi), %dx 477 movq %rcx, (%rdi) 478 movw %dx, 8(%rdi) 479 movb $0, 10(%rdi) 480 jmp L(CalculateSrcLen) 481 482 .p2align 4 483 L(Exit12): 484 movq (%rsi), %rcx 485 movl 7(%rsi), %edx 486 movq %rcx, (%rdi) 487 movl %edx, 7(%rdi) 488 movb $0, 11(%rdi) 489 jmp L(CalculateSrcLen) 490 491 .p2align 4 492 L(Exit13): 493 movq (%rsi), %rcx 494 movl 8(%rsi), %edx 495 movq %rcx, (%rdi) 496 movl %edx, 8(%rdi) 497 movb $0, 12(%rdi) 498 jmp L(CalculateSrcLen) 499 500 .p2align 4 501 L(Exit14): 502 movq (%rsi), %rcx 503 movq 5(%rsi), %rdx 504 movq %rcx, (%rdi) 505 movq %rdx, 5(%rdi) 506 movb $0, 13(%rdi) 507 jmp L(CalculateSrcLen) 508 509 .p2align 4 510 L(Exit15): 511 movq (%rsi), %rcx 512 movq 6(%rsi), %rdx 513 movq %rcx, (%rdi) 514 movq %rdx, 6(%rdi) 515 movb $0, 14(%rdi) 516 jmp L(CalculateSrcLen) 517 518 .p2align 4 519 L(Exit16): 520 movq (%rsi), %rcx 521 movq 7(%rsi), %rdx 522 movq %rcx, (%rdi) 523 movq %rdx, 7(%rdi) 524 movb $0, 15(%rdi) 525 jmp L(CalculateSrcLen) 526 527 .p2align 4 528 L(Exit17): 529 movdqu (%rsi), %xmm0 530 movdqu %xmm0, (%rdi) 531 movb $0, 16(%rdi) 532 jmp L(CalculateSrcLen) 533 534 .p2align 4 535 L(Exit18): 536 movdqu (%rsi), %xmm0 537 movb 16(%rsi), %dh 538 movdqu %xmm0, (%rdi) 539 movb %dh, 16(%rdi) 540 movb $0, 17(%rdi) 541 jmp L(CalculateSrcLen) 542 543 .p2align 4 544 L(Exit19): 545 movdqu (%rsi), %xmm0 546 movw 16(%rsi), %cx 547 movdqu %xmm0, (%rdi) 548 movw %cx, 16(%rdi) 549 movb $0, 18(%rdi) 550 jmp L(CalculateSrcLen) 551 552 .p2align 4 553 L(Exit20): 554 movdqu (%rsi), %xmm0 555 movl 15(%rsi), %ecx 556 movdqu %xmm0, (%rdi) 557 movl %ecx, 15(%rdi) 558 movb $0, 19(%rdi) 559 jmp L(CalculateSrcLen) 560 561 .p2align 4 562 L(Exit21): 563 movdqu (%rsi), %xmm0 564 movl 16(%rsi), %ecx 565 movdqu %xmm0, (%rdi) 566 movl %ecx, 16(%rdi) 567 movb $0, 20(%rdi) 568 jmp L(CalculateSrcLen) 569 570 .p2align 4 571 L(Exit22): 572 movdqu (%rsi), %xmm0 573 movl 16(%rsi), %ecx 574 movb 20(%rsi), %dh 575 movdqu %xmm0, (%rdi) 576 movl %ecx, 16(%rdi) 577 movb %dh, 20(%rdi) 578 movb $0, 21(%rdi) 579 jmp L(CalculateSrcLen) 580 581 .p2align 4 582 L(Exit23): 583 movdqu (%rsi), %xmm0 584 movq 14(%rsi), %rcx 585 movdqu %xmm0, (%rdi) 586 movq %rcx, 14(%rdi) 587 movb $0, 22(%rdi) 588 jmp L(CalculateSrcLen) 589 590 .p2align 4 591 L(Exit24): 592 movdqu (%rsi), %xmm0 593 movq 15(%rsi), %rcx 594 movdqu %xmm0, (%rdi) 595 movq %rcx, 15(%rdi) 596 movb $0, 23(%rdi) 597 jmp L(CalculateSrcLen) 598 599 .p2align 4 600 L(Exit25): 601 movdqu (%rsi), %xmm0 602 movq 16(%rsi), %rcx 603 movdqu %xmm0, (%rdi) 604 movq %rcx, 16(%rdi) 605 movb $0, 24(%rdi) 606 jmp L(CalculateSrcLen) 607 608 .p2align 4 609 L(Exit26): 610 movdqu (%rsi), %xmm0 611 movq 16(%rsi), %rcx 612 movb 24(%rsi), %dh 613 movdqu %xmm0, (%rdi) 614 movq %rcx, 16(%rdi) 615 mov %dh, 24(%rdi) 616 movb $0, 25(%rdi) 617 jmp L(CalculateSrcLen) 618 619 .p2align 4 620 L(Exit27): 621 movdqu (%rsi), %xmm0 622 movq 16(%rsi), %rdx 623 movw 24(%rsi), %cx 624 movdqu %xmm0, (%rdi) 625 movq %rdx, 16(%rdi) 626 movw %cx, 24(%rdi) 627 movb $0, 26(%rdi) 628 jmp L(CalculateSrcLen) 629 630 .p2align 4 631 L(Exit28): 632 movdqu (%rsi), %xmm0 633 movq 16(%rsi), %rdx 634 movl 23(%rsi), %ecx 635 movdqu %xmm0, (%rdi) 636 movq %rdx, 16(%rdi) 637 movl %ecx, 23(%rdi) 638 movb $0, 27(%rdi) 639 jmp L(CalculateSrcLen) 640 641 .p2align 4 642 L(Exit29): 643 movdqu (%rsi), %xmm0 644 movq 16(%rsi), %rdx 645 movl 24(%rsi), %ecx 646 movdqu %xmm0, (%rdi) 647 movq %rdx, 16(%rdi) 648 movl %ecx, 24(%rdi) 649 movb $0, 28(%rdi) 650 jmp L(CalculateSrcLen) 651 652 .p2align 4 653 L(Exit30): 654 movdqu (%rsi), %xmm0 655 movdqu 13(%rsi), %xmm2 656 movdqu %xmm0, (%rdi) 657 movdqu %xmm2, 13(%rdi) 658 movb $0, 29(%rdi) 659 jmp L(CalculateSrcLen) 660 661 .p2align 4 662 L(Exit31): 663 movdqu (%rsi), %xmm0 664 movdqu 14(%rsi), %xmm2 665 movdqu %xmm0, (%rdi) 666 movdqu %xmm2, 14(%rdi) 667 movb $0, 30(%rdi) 668 jmp L(CalculateSrcLen) 669 670 .p2align 4 671 L(Exit32): 672 movdqu (%rsi), %xmm0 673 movdqu 15(%rsi), %xmm2 674 movdqu %xmm0, (%rdi) 675 movdqu %xmm2, 15(%rdi) 676 movb $0, 31(%rdi) 677 jmp L(CalculateSrcLen) 678 679 .p2align 4 680 L(StringTail0): 681 mov (%rsi), %dl 682 mov %dl, (%rdi) 683 RETURN 684 685 .p2align 4 686 L(StringTail1): 687 mov (%rsi), %dx 688 mov %dx, (%rdi) 689 RETURN 690 691 .p2align 4 692 L(StringTail2): 693 mov (%rsi), %cx 694 mov 2(%rsi), %dl 695 mov %cx, (%rdi) 696 mov %dl, 2(%rdi) 697 RETURN 698 699 .p2align 4 700 L(StringTail3): 701 mov (%rsi), %edx 702 mov %edx, (%rdi) 703 RETURN 704 705 .p2align 4 706 L(StringTail4): 707 mov (%rsi), %ecx 708 mov 4(%rsi), %dl 709 mov %ecx, (%rdi) 710 mov %dl, 4(%rdi) 711 RETURN 712 713 .p2align 4 714 L(StringTail5): 715 mov (%rsi), %ecx 716 mov 4(%rsi), %dx 717 mov %ecx, (%rdi) 718 mov %dx, 4(%rdi) 719 RETURN 720 721 .p2align 4 722 L(StringTail6): 723 mov (%rsi), %ecx 724 mov 3(%rsi), %edx 725 mov %ecx, (%rdi) 726 mov %edx, 3(%rdi) 727 RETURN 728 729 .p2align 4 730 L(StringTail7): 731 mov (%rsi), %rdx 732 mov %rdx, (%rdi) 733 RETURN 734 735 .p2align 4 736 L(StringTail8): 737 mov (%rsi), %rcx 738 mov 8(%rsi), %dl 739 mov %rcx, (%rdi) 740 mov %dl, 8(%rdi) 741 RETURN 742 743 .p2align 4 744 L(StringTail9): 745 mov (%rsi), %rcx 746 mov 8(%rsi), %dx 747 mov %rcx, (%rdi) 748 mov %dx, 8(%rdi) 749 RETURN 750 751 .p2align 4 752 L(StringTail10): 753 mov (%rsi), %rcx 754 mov 7(%rsi), %edx 755 mov %rcx, (%rdi) 756 mov %edx, 7(%rdi) 757 RETURN 758 759 .p2align 4 760 L(StringTail11): 761 mov (%rsi), %rcx 762 mov 8(%rsi), %edx 763 mov %rcx, (%rdi) 764 mov %edx, 8(%rdi) 765 RETURN 766 767 .p2align 4 768 L(StringTail12): 769 mov (%rsi), %rcx 770 mov 5(%rsi), %rdx 771 mov %rcx, (%rdi) 772 mov %rdx, 5(%rdi) 773 RETURN 774 775 .p2align 4 776 L(StringTail13): 777 mov (%rsi), %rcx 778 mov 6(%rsi), %rdx 779 mov %rcx, (%rdi) 780 mov %rdx, 6(%rdi) 781 RETURN 782 783 .p2align 4 784 L(StringTail14): 785 mov (%rsi), %rcx 786 mov 7(%rsi), %rdx 787 mov %rcx, (%rdi) 788 mov %rdx, 7(%rdi) 789 RETURN 790 791 .p2align 4 792 L(StringTail15): 793 movdqu (%rsi), %xmm0 794 movdqu %xmm0, (%rdi) 795 RETURN 796 797 .p2align 4 798 L(StringTail16): 799 movdqu (%rsi), %xmm0 800 mov 16(%rsi), %cl 801 movdqu %xmm0, (%rdi) 802 mov %cl, 16(%rdi) 803 RETURN 804 805 .p2align 4 806 L(StringTail17): 807 movdqu (%rsi), %xmm0 808 mov 16(%rsi), %cx 809 movdqu %xmm0, (%rdi) 810 mov %cx, 16(%rdi) 811 RETURN 812 813 .p2align 4 814 L(StringTail18): 815 movdqu (%rsi), %xmm0 816 mov 15(%rsi), %ecx 817 movdqu %xmm0, (%rdi) 818 mov %ecx, 15(%rdi) 819 RETURN 820 821 .p2align 4 822 L(StringTail19): 823 movdqu (%rsi), %xmm0 824 mov 16(%rsi), %ecx 825 movdqu %xmm0, (%rdi) 826 mov %ecx, 16(%rdi) 827 RETURN 828 829 .p2align 4 830 L(StringTail20): 831 movdqu (%rsi), %xmm0 832 mov 16(%rsi), %ecx 833 mov 20(%rsi), %dl 834 movdqu %xmm0, (%rdi) 835 mov %ecx, 16(%rdi) 836 mov %dl, 20(%rdi) 837 RETURN 838 839 .p2align 4 840 L(StringTail21): 841 movdqu (%rsi), %xmm0 842 mov 14(%rsi), %rcx 843 movdqu %xmm0, (%rdi) 844 mov %rcx, 14(%rdi) 845 RETURN 846 847 .p2align 4 848 L(StringTail22): 849 movdqu (%rsi), %xmm0 850 mov 15(%rsi), %rcx 851 movdqu %xmm0, (%rdi) 852 mov %rcx, 15(%rdi) 853 RETURN 854 855 .p2align 4 856 L(StringTail23): 857 movdqu (%rsi), %xmm0 858 mov 16(%rsi), %rcx 859 movdqu %xmm0, (%rdi) 860 mov %rcx, 16(%rdi) 861 RETURN 862 863 .p2align 4 864 L(StringTail24): 865 movdqu (%rsi), %xmm0 866 mov 16(%rsi), %rdx 867 mov 24(%rsi), %cl 868 movdqu %xmm0, (%rdi) 869 mov %rdx, 16(%rdi) 870 mov %cl, 24(%rdi) 871 RETURN 872 873 .p2align 4 874 L(StringTail25): 875 movdqu (%rsi), %xmm0 876 mov 16(%rsi), %rdx 877 mov 24(%rsi), %cx 878 movdqu %xmm0, (%rdi) 879 mov %rdx, 16(%rdi) 880 mov %cx, 24(%rdi) 881 RETURN 882 883 .p2align 4 884 L(StringTail26): 885 movdqu (%rsi), %xmm0 886 mov 16(%rsi), %rdx 887 mov 23(%rsi), %ecx 888 movdqu %xmm0, (%rdi) 889 mov %rdx, 16(%rdi) 890 mov %ecx, 23(%rdi) 891 RETURN 892 893 .p2align 4 894 L(StringTail27): 895 movdqu (%rsi), %xmm0 896 mov 16(%rsi), %rdx 897 mov 24(%rsi), %ecx 898 movdqu %xmm0, (%rdi) 899 mov %rdx, 16(%rdi) 900 mov %ecx, 24(%rdi) 901 RETURN 902 903 .p2align 4 904 L(StringTail28): 905 movdqu (%rsi), %xmm0 906 movdqu 13(%rsi), %xmm2 907 movdqu %xmm0, (%rdi) 908 movdqu %xmm2, 13(%rdi) 909 RETURN 910 911 .p2align 4 912 L(StringTail29): 913 movdqu (%rsi), %xmm0 914 movdqu 14(%rsi), %xmm2 915 movdqu %xmm0, (%rdi) 916 movdqu %xmm2, 14(%rdi) 917 RETURN 918 919 .p2align 4 920 L(StringTail30): 921 movdqu (%rsi), %xmm0 922 movdqu 15(%rsi), %xmm2 923 movdqu %xmm0, (%rdi) 924 movdqu %xmm2, 15(%rdi) 925 RETURN 926 927 .p2align 4 928 L(StringTail31): 929 movdqu (%rsi), %xmm0 930 movdqu 16(%rsi), %xmm2 931 movdqu %xmm0, (%rdi) 932 movdqu %xmm2, 16(%rdi) 933 RETURN 934 935 .p2align 4 936 L(StringTail32): 937 movdqu (%rsi), %xmm0 938 movdqu 16(%rsi), %xmm2 939 mov 32(%rsi), %cl 940 movdqu %xmm0, (%rdi) 941 movdqu %xmm2, 16(%rdi) 942 mov %cl, 32(%rdi) 943 RETURN 944 945 .p2align 4 946 L(StringTail33): 947 movdqu (%rsi), %xmm0 948 movdqu 16(%rsi), %xmm2 949 mov 32(%rsi), %cl 950 movdqu %xmm0, (%rdi) 951 movdqu %xmm2, 16(%rdi) 952 mov %cl, 32(%rdi) 953 RETURN 954 955 .p2align 4 956 L(CalculateSrcLenCase1): 957 xor %r8, %r8 958 xor %rax, %rax 959 L(CalculateSrcLen): 960 pxor %xmm0, %xmm0 961 xor %rcx, %rcx 962 add %r8, %rsi 963 movdqu (%rsi), %xmm1 964 pcmpeqb %xmm1, %xmm0 965 pmovmskb %xmm0, %rdx 966 test %rdx, %rdx 967 jnz L(SrcLenLoopEnd) 968 969 add %rax, %r9 970 mov $16, %rax 971 mov %rsi, %rcx 972 and $15, %rcx 973 and $-16, %rsi 974 L(SrcLenLoop): 975 movdqa (%rsi, %rax), %xmm1 976 pcmpeqb %xmm1, %xmm0 977 pmovmskb %xmm0, %rdx 978 test %rdx, %rdx 979 jnz L(SrcLenLoopEnd) 980 add $16, %rax 981 jmp L(SrcLenLoop) 982 983 .p2align 4 984 L(SrcLenLoopEnd): 985 bsf %rdx, %rdx 986 add %rdx, %rax 987 sub %rcx, %rax 988 RETURN 989 990 END (STRLCPY) 991 992 .p2align 4 993 .section .rodata 994 L(ExitTable): 995 .int JMPTBL(L(Exit0), L(ExitTable)) 996 .int JMPTBL(L(Exit1), L(ExitTable)) 997 .int JMPTBL(L(Exit2), L(ExitTable)) 998 .int JMPTBL(L(Exit3), L(ExitTable)) 999 .int JMPTBL(L(Exit4), L(ExitTable)) 1000 .int JMPTBL(L(Exit5), L(ExitTable)) 1001 .int JMPTBL(L(Exit6), L(ExitTable)) 1002 .int JMPTBL(L(Exit7), L(ExitTable)) 1003 .int JMPTBL(L(Exit8), L(ExitTable)) 1004 .int JMPTBL(L(Exit9), L(ExitTable)) 1005 .int JMPTBL(L(Exit10), L(ExitTable)) 1006 .int JMPTBL(L(Exit11), L(ExitTable)) 1007 .int JMPTBL(L(Exit12), L(ExitTable)) 1008 .int JMPTBL(L(Exit13), L(ExitTable)) 1009 .int JMPTBL(L(Exit14), L(ExitTable)) 1010 .int JMPTBL(L(Exit15), L(ExitTable)) 1011 .int JMPTBL(L(Exit16), L(ExitTable)) 1012 .int JMPTBL(L(Exit17), L(ExitTable)) 1013 .int JMPTBL(L(Exit18), L(ExitTable)) 1014 .int JMPTBL(L(Exit19), L(ExitTable)) 1015 .int JMPTBL(L(Exit20), L(ExitTable)) 1016 .int JMPTBL(L(Exit21), L(ExitTable)) 1017 .int JMPTBL(L(Exit22), L(ExitTable)) 1018 .int JMPTBL(L(Exit23), L(ExitTable)) 1019 .int JMPTBL(L(Exit24), L(ExitTable)) 1020 .int JMPTBL(L(Exit25), L(ExitTable)) 1021 .int JMPTBL(L(Exit26), L(ExitTable)) 1022 .int JMPTBL(L(Exit27), L(ExitTable)) 1023 .int JMPTBL(L(Exit28), L(ExitTable)) 1024 .int JMPTBL(L(Exit29), L(ExitTable)) 1025 .int JMPTBL(L(Exit30), L(ExitTable)) 1026 .int JMPTBL(L(Exit31), L(ExitTable)) 1027 .int JMPTBL(L(Exit32), L(ExitTable)) 1028 L(ExitStringTailTable): 1029 .int JMPTBL(L(StringTail0), L(ExitStringTailTable)) 1030 .int JMPTBL(L(StringTail1), L(ExitStringTailTable)) 1031 .int JMPTBL(L(StringTail2), L(ExitStringTailTable)) 1032 .int JMPTBL(L(StringTail3), L(ExitStringTailTable)) 1033 .int JMPTBL(L(StringTail4), L(ExitStringTailTable)) 1034 .int JMPTBL(L(StringTail5), L(ExitStringTailTable)) 1035 .int JMPTBL(L(StringTail6), L(ExitStringTailTable)) 1036 .int JMPTBL(L(StringTail7), L(ExitStringTailTable)) 1037 .int JMPTBL(L(StringTail8), L(ExitStringTailTable)) 1038 .int JMPTBL(L(StringTail9), L(ExitStringTailTable)) 1039 .int JMPTBL(L(StringTail10), L(ExitStringTailTable)) 1040 .int JMPTBL(L(StringTail11), L(ExitStringTailTable)) 1041 .int JMPTBL(L(StringTail12), L(ExitStringTailTable)) 1042 .int JMPTBL(L(StringTail13), L(ExitStringTailTable)) 1043 .int JMPTBL(L(StringTail14), L(ExitStringTailTable)) 1044 .int JMPTBL(L(StringTail15), L(ExitStringTailTable)) 1045 .int JMPTBL(L(StringTail16), L(ExitStringTailTable)) 1046 .int JMPTBL(L(StringTail17), L(ExitStringTailTable)) 1047 .int JMPTBL(L(StringTail18), L(ExitStringTailTable)) 1048 .int JMPTBL(L(StringTail19), L(ExitStringTailTable)) 1049 .int JMPTBL(L(StringTail20), L(ExitStringTailTable)) 1050 .int JMPTBL(L(StringTail21), L(ExitStringTailTable)) 1051 .int JMPTBL(L(StringTail22), L(ExitStringTailTable)) 1052 .int JMPTBL(L(StringTail23), L(ExitStringTailTable)) 1053 .int JMPTBL(L(StringTail24), L(ExitStringTailTable)) 1054 .int JMPTBL(L(StringTail25), L(ExitStringTailTable)) 1055 .int JMPTBL(L(StringTail26), L(ExitStringTailTable)) 1056 .int JMPTBL(L(StringTail27), L(ExitStringTailTable)) 1057 .int JMPTBL(L(StringTail28), L(ExitStringTailTable)) 1058 .int JMPTBL(L(StringTail29), L(ExitStringTailTable)) 1059 .int JMPTBL(L(StringTail30), L(ExitStringTailTable)) 1060 .int JMPTBL(L(StringTail31), L(ExitStringTailTable)) 1061 .int JMPTBL(L(StringTail32), L(ExitStringTailTable)) 1062 .int JMPTBL(L(StringTail33), L(ExitStringTailTable)) 1063