1 /* 2 Copyright (c) 2010, Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef MEMCMP 32 # define MEMCMP ssse3_memcmp3_new 33 #endif 34 35 #ifndef L 36 # define L(label) .L##label 37 #endif 38 39 #ifndef ALIGN 40 # define ALIGN(n) .p2align n 41 #endif 42 43 #ifndef cfi_startproc 44 # define cfi_startproc .cfi_startproc 45 #endif 46 47 #ifndef cfi_endproc 48 # define cfi_endproc .cfi_endproc 49 #endif 50 51 #ifndef cfi_rel_offset 52 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 53 #endif 54 55 #ifndef cfi_restore 56 # define cfi_restore(reg) .cfi_restore reg 57 #endif 58 59 #ifndef cfi_adjust_cfa_offset 60 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 61 #endif 62 63 #ifndef cfi_remember_state 64 # define cfi_remember_state .cfi_remember_state 65 #endif 66 67 #ifndef cfi_restore_state 68 # define cfi_restore_state .cfi_restore_state 69 #endif 70 71 #ifndef ENTRY 72 # define ENTRY(name) \ 73 .type name, @function; \ 74 .globl name; \ 75 .p2align 4; \ 76 name: \ 77 cfi_startproc 78 #endif 79 80 #ifndef END 81 # define END(name) \ 82 cfi_endproc; \ 83 .size name, .-name 84 #endif 85 86 #define CFI_PUSH(REG) \ 87 cfi_adjust_cfa_offset (4); \ 88 cfi_rel_offset (REG, 0) 89 90 #define CFI_POP(REG) \ 91 cfi_adjust_cfa_offset (-4); \ 92 cfi_restore (REG) 93 94 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 95 #define POP(REG) popl REG; CFI_POP (REG) 96 97 #define PARMS 4 98 #define BLK1 PARMS 99 #define BLK2 BLK1+4 100 #define LEN BLK2+4 101 #define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret 102 #define RETURN RETURN_END; cfi_restore_state; cfi_remember_state 103 104 .section .text.ssse3,"ax",@progbits 105 ENTRY (MEMCMP) 106 movl LEN(%esp), %ecx 107 movl BLK1(%esp), %eax 108 cmp $48, %ecx 109 movl BLK2(%esp), %edx 110 jae L(48bytesormore) 111 cmp $1, %ecx 112 jbe L(less1bytes) 113 PUSH (%ebx) 114 add %ecx, %edx 115 add %ecx, %eax 116 jmp L(less48bytes) 117 118 CFI_POP (%ebx) 119 ALIGN (4) 120 L(less1bytes): 121 jb L(zero) 122 movb (%eax), %cl 123 cmp (%edx), %cl 124 je L(zero) 125 mov $1, %eax 126 ja L(1bytesend) 127 neg %eax 128 L(1bytesend): 129 ret 130 131 ALIGN (4) 132 L(zero): 133 mov $0, %eax 134 ret 135 136 ALIGN (4) 137 L(48bytesormore): 138 PUSH (%ebx) 139 PUSH (%esi) 140 PUSH (%edi) 141 cfi_remember_state 142 movdqu (%eax), %xmm3 143 movdqu (%edx), %xmm0 144 movl %eax, %edi 145 movl %edx, %esi 146 pcmpeqb %xmm0, %xmm3 147 pmovmskb %xmm3, %edx 148 lea 16(%edi), %edi 149 150 sub $0xffff, %edx 151 lea 16(%esi), %esi 152 jnz L(less16bytes) 153 mov %edi, %edx 154 and $0xf, %edx 155 xor %edx, %edi 156 sub %edx, %esi 157 add %edx, %ecx 158 mov %esi, %edx 159 and $0xf, %edx 160 jz L(shr_0) 161 xor %edx, %esi 162 163 cmp $8, %edx 164 jae L(next_unaligned_table) 165 cmp $0, %edx 166 je L(shr_0) 167 cmp $1, %edx 168 je L(shr_1) 169 cmp $2, %edx 170 je L(shr_2) 171 cmp $3, %edx 172 je L(shr_3) 173 cmp $4, %edx 174 je L(shr_4) 175 cmp $5, %edx 176 je L(shr_5) 177 cmp $6, %edx 178 je L(shr_6) 179 jmp L(shr_7) 180 181 ALIGN (4) 182 L(next_unaligned_table): 183 cmp $8, %edx 184 je L(shr_8) 185 cmp $9, %edx 186 je L(shr_9) 187 cmp $10, %edx 188 je L(shr_10) 189 cmp $11, %edx 190 je L(shr_11) 191 cmp $12, %edx 192 je L(shr_12) 193 cmp $13, %edx 194 je L(shr_13) 195 cmp $14, %edx 196 je L(shr_14) 197 jmp L(shr_15) 198 199 ALIGN (4) 200 L(shr_0): 201 cmp $80, %ecx 202 jae L(shr_0_gobble) 203 lea -48(%ecx), %ecx 204 xor %eax, %eax 205 movaps (%esi), %xmm1 206 pcmpeqb (%edi), %xmm1 207 movaps 16(%esi), %xmm2 208 pcmpeqb 16(%edi), %xmm2 209 pand %xmm1, %xmm2 210 pmovmskb %xmm2, %edx 211 add $32, %edi 212 add $32, %esi 213 sub $0xffff, %edx 214 jnz L(exit) 215 216 lea (%ecx, %edi,1), %eax 217 lea (%ecx, %esi,1), %edx 218 POP (%edi) 219 POP (%esi) 220 jmp L(less48bytes) 221 222 cfi_restore_state 223 cfi_remember_state 224 ALIGN (4) 225 L(shr_0_gobble): 226 lea -48(%ecx), %ecx 227 movdqa (%esi), %xmm0 228 xor %eax, %eax 229 pcmpeqb (%edi), %xmm0 230 sub $32, %ecx 231 movdqa 16(%esi), %xmm2 232 pcmpeqb 16(%edi), %xmm2 233 L(shr_0_gobble_loop): 234 pand %xmm0, %xmm2 235 sub $32, %ecx 236 pmovmskb %xmm2, %edx 237 movdqa %xmm0, %xmm1 238 movdqa 32(%esi), %xmm0 239 movdqa 48(%esi), %xmm2 240 sbb $0xffff, %edx 241 pcmpeqb 32(%edi), %xmm0 242 pcmpeqb 48(%edi), %xmm2 243 lea 32(%edi), %edi 244 lea 32(%esi), %esi 245 jz L(shr_0_gobble_loop) 246 247 pand %xmm0, %xmm2 248 cmp $0, %ecx 249 jge L(shr_0_gobble_loop_next) 250 inc %edx 251 add $32, %ecx 252 L(shr_0_gobble_loop_next): 253 test %edx, %edx 254 jnz L(exit) 255 256 pmovmskb %xmm2, %edx 257 movdqa %xmm0, %xmm1 258 lea 32(%edi), %edi 259 lea 32(%esi), %esi 260 sub $0xffff, %edx 261 jnz L(exit) 262 lea (%ecx, %edi,1), %eax 263 lea (%ecx, %esi,1), %edx 264 POP (%edi) 265 POP (%esi) 266 jmp L(less48bytes) 267 268 cfi_restore_state 269 cfi_remember_state 270 ALIGN (4) 271 L(shr_1): 272 cmp $80, %ecx 273 lea -48(%ecx), %ecx 274 mov %edx, %eax 275 jae L(shr_1_gobble) 276 277 movdqa 16(%esi), %xmm1 278 movdqa %xmm1, %xmm2 279 palignr $1,(%esi), %xmm1 280 pcmpeqb (%edi), %xmm1 281 282 movdqa 32(%esi), %xmm3 283 palignr $1,%xmm2, %xmm3 284 pcmpeqb 16(%edi), %xmm3 285 286 pand %xmm1, %xmm3 287 pmovmskb %xmm3, %edx 288 lea 32(%edi), %edi 289 lea 32(%esi), %esi 290 sub $0xffff, %edx 291 jnz L(exit) 292 lea (%ecx, %edi,1), %eax 293 lea 1(%ecx, %esi,1), %edx 294 POP (%edi) 295 POP (%esi) 296 jmp L(less48bytes) 297 298 cfi_restore_state 299 cfi_remember_state 300 ALIGN (4) 301 L(shr_1_gobble): 302 sub $32, %ecx 303 movdqa 16(%esi), %xmm0 304 palignr $1,(%esi), %xmm0 305 pcmpeqb (%edi), %xmm0 306 307 movdqa 32(%esi), %xmm3 308 palignr $1,16(%esi), %xmm3 309 pcmpeqb 16(%edi), %xmm3 310 311 L(shr_1_gobble_loop): 312 pand %xmm0, %xmm3 313 sub $32, %ecx 314 pmovmskb %xmm3, %edx 315 movdqa %xmm0, %xmm1 316 317 movdqa 64(%esi), %xmm3 318 palignr $1,48(%esi), %xmm3 319 sbb $0xffff, %edx 320 movdqa 48(%esi), %xmm0 321 palignr $1,32(%esi), %xmm0 322 pcmpeqb 32(%edi), %xmm0 323 lea 32(%esi), %esi 324 pcmpeqb 48(%edi), %xmm3 325 326 lea 32(%edi), %edi 327 jz L(shr_1_gobble_loop) 328 pand %xmm0, %xmm3 329 330 cmp $0, %ecx 331 jge L(shr_1_gobble_next) 332 inc %edx 333 add $32, %ecx 334 L(shr_1_gobble_next): 335 test %edx, %edx 336 jnz L(exit) 337 338 pmovmskb %xmm3, %edx 339 movdqa %xmm0, %xmm1 340 lea 32(%edi), %edi 341 lea 32(%esi), %esi 342 sub $0xffff, %edx 343 jnz L(exit) 344 345 lea (%ecx, %edi,1), %eax 346 lea 1(%ecx, %esi,1), %edx 347 POP (%edi) 348 POP (%esi) 349 jmp L(less48bytes) 350 351 cfi_restore_state 352 cfi_remember_state 353 ALIGN (4) 354 L(shr_2): 355 cmp $80, %ecx 356 lea -48(%ecx), %ecx 357 mov %edx, %eax 358 jae L(shr_2_gobble) 359 360 movdqa 16(%esi), %xmm1 361 movdqa %xmm1, %xmm2 362 palignr $2,(%esi), %xmm1 363 pcmpeqb (%edi), %xmm1 364 365 movdqa 32(%esi), %xmm3 366 palignr $2,%xmm2, %xmm3 367 pcmpeqb 16(%edi), %xmm3 368 369 pand %xmm1, %xmm3 370 pmovmskb %xmm3, %edx 371 lea 32(%edi), %edi 372 lea 32(%esi), %esi 373 sub $0xffff, %edx 374 jnz L(exit) 375 lea (%ecx, %edi,1), %eax 376 lea 2(%ecx, %esi,1), %edx 377 POP (%edi) 378 POP (%esi) 379 jmp L(less48bytes) 380 381 cfi_restore_state 382 cfi_remember_state 383 ALIGN (4) 384 L(shr_2_gobble): 385 sub $32, %ecx 386 movdqa 16(%esi), %xmm0 387 palignr $2,(%esi), %xmm0 388 pcmpeqb (%edi), %xmm0 389 390 movdqa 32(%esi), %xmm3 391 palignr $2,16(%esi), %xmm3 392 pcmpeqb 16(%edi), %xmm3 393 394 L(shr_2_gobble_loop): 395 pand %xmm0, %xmm3 396 sub $32, %ecx 397 pmovmskb %xmm3, %edx 398 movdqa %xmm0, %xmm1 399 400 movdqa 64(%esi), %xmm3 401 palignr $2,48(%esi), %xmm3 402 sbb $0xffff, %edx 403 movdqa 48(%esi), %xmm0 404 palignr $2,32(%esi), %xmm0 405 pcmpeqb 32(%edi), %xmm0 406 lea 32(%esi), %esi 407 pcmpeqb 48(%edi), %xmm3 408 409 lea 32(%edi), %edi 410 jz L(shr_2_gobble_loop) 411 pand %xmm0, %xmm3 412 413 cmp $0, %ecx 414 jge L(shr_2_gobble_next) 415 inc %edx 416 add $32, %ecx 417 L(shr_2_gobble_next): 418 test %edx, %edx 419 jnz L(exit) 420 421 pmovmskb %xmm3, %edx 422 movdqa %xmm0, %xmm1 423 lea 32(%edi), %edi 424 lea 32(%esi), %esi 425 sub $0xffff, %edx 426 jnz L(exit) 427 428 lea (%ecx, %edi,1), %eax 429 lea 2(%ecx, %esi,1), %edx 430 POP (%edi) 431 POP (%esi) 432 jmp L(less48bytes) 433 434 cfi_restore_state 435 cfi_remember_state 436 ALIGN (4) 437 L(shr_3): 438 cmp $80, %ecx 439 lea -48(%ecx), %ecx 440 mov %edx, %eax 441 jae L(shr_3_gobble) 442 443 movdqa 16(%esi), %xmm1 444 movdqa %xmm1, %xmm2 445 palignr $3,(%esi), %xmm1 446 pcmpeqb (%edi), %xmm1 447 448 movdqa 32(%esi), %xmm3 449 palignr $3,%xmm2, %xmm3 450 pcmpeqb 16(%edi), %xmm3 451 452 pand %xmm1, %xmm3 453 pmovmskb %xmm3, %edx 454 lea 32(%edi), %edi 455 lea 32(%esi), %esi 456 sub $0xffff, %edx 457 jnz L(exit) 458 lea (%ecx, %edi,1), %eax 459 lea 3(%ecx, %esi,1), %edx 460 POP (%edi) 461 POP (%esi) 462 jmp L(less48bytes) 463 464 cfi_restore_state 465 cfi_remember_state 466 ALIGN (4) 467 L(shr_3_gobble): 468 sub $32, %ecx 469 movdqa 16(%esi), %xmm0 470 palignr $3,(%esi), %xmm0 471 pcmpeqb (%edi), %xmm0 472 473 movdqa 32(%esi), %xmm3 474 palignr $3,16(%esi), %xmm3 475 pcmpeqb 16(%edi), %xmm3 476 477 L(shr_3_gobble_loop): 478 pand %xmm0, %xmm3 479 sub $32, %ecx 480 pmovmskb %xmm3, %edx 481 movdqa %xmm0, %xmm1 482 483 movdqa 64(%esi), %xmm3 484 palignr $3,48(%esi), %xmm3 485 sbb $0xffff, %edx 486 movdqa 48(%esi), %xmm0 487 palignr $3,32(%esi), %xmm0 488 pcmpeqb 32(%edi), %xmm0 489 lea 32(%esi), %esi 490 pcmpeqb 48(%edi), %xmm3 491 492 lea 32(%edi), %edi 493 jz L(shr_3_gobble_loop) 494 pand %xmm0, %xmm3 495 496 cmp $0, %ecx 497 jge L(shr_3_gobble_next) 498 inc %edx 499 add $32, %ecx 500 L(shr_3_gobble_next): 501 test %edx, %edx 502 jnz L(exit) 503 504 pmovmskb %xmm3, %edx 505 movdqa %xmm0, %xmm1 506 lea 32(%edi), %edi 507 lea 32(%esi), %esi 508 sub $0xffff, %edx 509 jnz L(exit) 510 511 lea (%ecx, %edi,1), %eax 512 lea 3(%ecx, %esi,1), %edx 513 POP (%edi) 514 POP (%esi) 515 jmp L(less48bytes) 516 517 cfi_restore_state 518 cfi_remember_state 519 ALIGN (4) 520 L(shr_4): 521 cmp $80, %ecx 522 lea -48(%ecx), %ecx 523 mov %edx, %eax 524 jae L(shr_4_gobble) 525 526 movdqa 16(%esi), %xmm1 527 movdqa %xmm1, %xmm2 528 palignr $4,(%esi), %xmm1 529 pcmpeqb (%edi), %xmm1 530 531 movdqa 32(%esi), %xmm3 532 palignr $4,%xmm2, %xmm3 533 pcmpeqb 16(%edi), %xmm3 534 535 pand %xmm1, %xmm3 536 pmovmskb %xmm3, %edx 537 lea 32(%edi), %edi 538 lea 32(%esi), %esi 539 sub $0xffff, %edx 540 jnz L(exit) 541 lea (%ecx, %edi,1), %eax 542 lea 4(%ecx, %esi,1), %edx 543 POP (%edi) 544 POP (%esi) 545 jmp L(less48bytes) 546 547 cfi_restore_state 548 cfi_remember_state 549 ALIGN (4) 550 L(shr_4_gobble): 551 sub $32, %ecx 552 movdqa 16(%esi), %xmm0 553 palignr $4,(%esi), %xmm0 554 pcmpeqb (%edi), %xmm0 555 556 movdqa 32(%esi), %xmm3 557 palignr $4,16(%esi), %xmm3 558 pcmpeqb 16(%edi), %xmm3 559 560 L(shr_4_gobble_loop): 561 pand %xmm0, %xmm3 562 sub $32, %ecx 563 pmovmskb %xmm3, %edx 564 movdqa %xmm0, %xmm1 565 566 movdqa 64(%esi), %xmm3 567 palignr $4,48(%esi), %xmm3 568 sbb $0xffff, %edx 569 movdqa 48(%esi), %xmm0 570 palignr $4,32(%esi), %xmm0 571 pcmpeqb 32(%edi), %xmm0 572 lea 32(%esi), %esi 573 pcmpeqb 48(%edi), %xmm3 574 575 lea 32(%edi), %edi 576 jz L(shr_4_gobble_loop) 577 pand %xmm0, %xmm3 578 579 cmp $0, %ecx 580 jge L(shr_4_gobble_next) 581 inc %edx 582 add $32, %ecx 583 L(shr_4_gobble_next): 584 test %edx, %edx 585 jnz L(exit) 586 587 pmovmskb %xmm3, %edx 588 movdqa %xmm0, %xmm1 589 lea 32(%edi), %edi 590 lea 32(%esi), %esi 591 sub $0xffff, %edx 592 jnz L(exit) 593 594 lea (%ecx, %edi,1), %eax 595 lea 4(%ecx, %esi,1), %edx 596 POP (%edi) 597 POP (%esi) 598 jmp L(less48bytes) 599 600 cfi_restore_state 601 cfi_remember_state 602 ALIGN (4) 603 L(shr_5): 604 cmp $80, %ecx 605 lea -48(%ecx), %ecx 606 mov %edx, %eax 607 jae L(shr_5_gobble) 608 609 movdqa 16(%esi), %xmm1 610 movdqa %xmm1, %xmm2 611 palignr $5,(%esi), %xmm1 612 pcmpeqb (%edi), %xmm1 613 614 movdqa 32(%esi), %xmm3 615 palignr $5,%xmm2, %xmm3 616 pcmpeqb 16(%edi), %xmm3 617 618 pand %xmm1, %xmm3 619 pmovmskb %xmm3, %edx 620 lea 32(%edi), %edi 621 lea 32(%esi), %esi 622 sub $0xffff, %edx 623 jnz L(exit) 624 lea (%ecx, %edi,1), %eax 625 lea 5(%ecx, %esi,1), %edx 626 POP (%edi) 627 POP (%esi) 628 jmp L(less48bytes) 629 630 cfi_restore_state 631 cfi_remember_state 632 ALIGN (4) 633 L(shr_5_gobble): 634 sub $32, %ecx 635 movdqa 16(%esi), %xmm0 636 palignr $5,(%esi), %xmm0 637 pcmpeqb (%edi), %xmm0 638 639 movdqa 32(%esi), %xmm3 640 palignr $5,16(%esi), %xmm3 641 pcmpeqb 16(%edi), %xmm3 642 643 L(shr_5_gobble_loop): 644 pand %xmm0, %xmm3 645 sub $32, %ecx 646 pmovmskb %xmm3, %edx 647 movdqa %xmm0, %xmm1 648 649 movdqa 64(%esi), %xmm3 650 palignr $5,48(%esi), %xmm3 651 sbb $0xffff, %edx 652 movdqa 48(%esi), %xmm0 653 palignr $5,32(%esi), %xmm0 654 pcmpeqb 32(%edi), %xmm0 655 lea 32(%esi), %esi 656 pcmpeqb 48(%edi), %xmm3 657 658 lea 32(%edi), %edi 659 jz L(shr_5_gobble_loop) 660 pand %xmm0, %xmm3 661 662 cmp $0, %ecx 663 jge L(shr_5_gobble_next) 664 inc %edx 665 add $32, %ecx 666 L(shr_5_gobble_next): 667 test %edx, %edx 668 jnz L(exit) 669 670 pmovmskb %xmm3, %edx 671 movdqa %xmm0, %xmm1 672 lea 32(%edi), %edi 673 lea 32(%esi), %esi 674 sub $0xffff, %edx 675 jnz L(exit) 676 677 lea (%ecx, %edi,1), %eax 678 lea 5(%ecx, %esi,1), %edx 679 POP (%edi) 680 POP (%esi) 681 jmp L(less48bytes) 682 683 cfi_restore_state 684 cfi_remember_state 685 ALIGN (4) 686 L(shr_6): 687 cmp $80, %ecx 688 lea -48(%ecx), %ecx 689 mov %edx, %eax 690 jae L(shr_6_gobble) 691 692 movdqa 16(%esi), %xmm1 693 movdqa %xmm1, %xmm2 694 palignr $6,(%esi), %xmm1 695 pcmpeqb (%edi), %xmm1 696 697 movdqa 32(%esi), %xmm3 698 palignr $6,%xmm2, %xmm3 699 pcmpeqb 16(%edi), %xmm3 700 701 pand %xmm1, %xmm3 702 pmovmskb %xmm3, %edx 703 lea 32(%edi), %edi 704 lea 32(%esi), %esi 705 sub $0xffff, %edx 706 jnz L(exit) 707 lea (%ecx, %edi,1), %eax 708 lea 6(%ecx, %esi,1), %edx 709 POP (%edi) 710 POP (%esi) 711 jmp L(less48bytes) 712 713 cfi_restore_state 714 cfi_remember_state 715 ALIGN (4) 716 L(shr_6_gobble): 717 sub $32, %ecx 718 movdqa 16(%esi), %xmm0 719 palignr $6,(%esi), %xmm0 720 pcmpeqb (%edi), %xmm0 721 722 movdqa 32(%esi), %xmm3 723 palignr $6,16(%esi), %xmm3 724 pcmpeqb 16(%edi), %xmm3 725 726 L(shr_6_gobble_loop): 727 pand %xmm0, %xmm3 728 sub $32, %ecx 729 pmovmskb %xmm3, %edx 730 movdqa %xmm0, %xmm1 731 732 movdqa 64(%esi), %xmm3 733 palignr $6,48(%esi), %xmm3 734 sbb $0xffff, %edx 735 movdqa 48(%esi), %xmm0 736 palignr $6,32(%esi), %xmm0 737 pcmpeqb 32(%edi), %xmm0 738 lea 32(%esi), %esi 739 pcmpeqb 48(%edi), %xmm3 740 741 lea 32(%edi), %edi 742 jz L(shr_6_gobble_loop) 743 pand %xmm0, %xmm3 744 745 cmp $0, %ecx 746 jge L(shr_6_gobble_next) 747 inc %edx 748 add $32, %ecx 749 L(shr_6_gobble_next): 750 test %edx, %edx 751 jnz L(exit) 752 753 pmovmskb %xmm3, %edx 754 movdqa %xmm0, %xmm1 755 lea 32(%edi), %edi 756 lea 32(%esi), %esi 757 sub $0xffff, %edx 758 jnz L(exit) 759 760 lea (%ecx, %edi,1), %eax 761 lea 6(%ecx, %esi,1), %edx 762 POP (%edi) 763 POP (%esi) 764 jmp L(less48bytes) 765 766 cfi_restore_state 767 cfi_remember_state 768 ALIGN (4) 769 L(shr_7): 770 cmp $80, %ecx 771 lea -48(%ecx), %ecx 772 mov %edx, %eax 773 jae L(shr_7_gobble) 774 775 movdqa 16(%esi), %xmm1 776 movdqa %xmm1, %xmm2 777 palignr $7,(%esi), %xmm1 778 pcmpeqb (%edi), %xmm1 779 780 movdqa 32(%esi), %xmm3 781 palignr $7,%xmm2, %xmm3 782 pcmpeqb 16(%edi), %xmm3 783 784 pand %xmm1, %xmm3 785 pmovmskb %xmm3, %edx 786 lea 32(%edi), %edi 787 lea 32(%esi), %esi 788 sub $0xffff, %edx 789 jnz L(exit) 790 lea (%ecx, %edi,1), %eax 791 lea 7(%ecx, %esi,1), %edx 792 POP (%edi) 793 POP (%esi) 794 jmp L(less48bytes) 795 796 cfi_restore_state 797 cfi_remember_state 798 ALIGN (4) 799 L(shr_7_gobble): 800 sub $32, %ecx 801 movdqa 16(%esi), %xmm0 802 palignr $7,(%esi), %xmm0 803 pcmpeqb (%edi), %xmm0 804 805 movdqa 32(%esi), %xmm3 806 palignr $7,16(%esi), %xmm3 807 pcmpeqb 16(%edi), %xmm3 808 809 L(shr_7_gobble_loop): 810 pand %xmm0, %xmm3 811 sub $32, %ecx 812 pmovmskb %xmm3, %edx 813 movdqa %xmm0, %xmm1 814 815 movdqa 64(%esi), %xmm3 816 palignr $7,48(%esi), %xmm3 817 sbb $0xffff, %edx 818 movdqa 48(%esi), %xmm0 819 palignr $7,32(%esi), %xmm0 820 pcmpeqb 32(%edi), %xmm0 821 lea 32(%esi), %esi 822 pcmpeqb 48(%edi), %xmm3 823 824 lea 32(%edi), %edi 825 jz L(shr_7_gobble_loop) 826 pand %xmm0, %xmm3 827 828 cmp $0, %ecx 829 jge L(shr_7_gobble_next) 830 inc %edx 831 add $32, %ecx 832 L(shr_7_gobble_next): 833 test %edx, %edx 834 jnz L(exit) 835 836 pmovmskb %xmm3, %edx 837 movdqa %xmm0, %xmm1 838 lea 32(%edi), %edi 839 lea 32(%esi), %esi 840 sub $0xffff, %edx 841 jnz L(exit) 842 843 lea (%ecx, %edi,1), %eax 844 lea 7(%ecx, %esi,1), %edx 845 POP (%edi) 846 POP (%esi) 847 jmp L(less48bytes) 848 849 cfi_restore_state 850 cfi_remember_state 851 ALIGN (4) 852 L(shr_8): 853 cmp $80, %ecx 854 lea -48(%ecx), %ecx 855 mov %edx, %eax 856 jae L(shr_8_gobble) 857 858 movdqa 16(%esi), %xmm1 859 movdqa %xmm1, %xmm2 860 palignr $8,(%esi), %xmm1 861 pcmpeqb (%edi), %xmm1 862 863 movdqa 32(%esi), %xmm3 864 palignr $8,%xmm2, %xmm3 865 pcmpeqb 16(%edi), %xmm3 866 867 pand %xmm1, %xmm3 868 pmovmskb %xmm3, %edx 869 lea 32(%edi), %edi 870 lea 32(%esi), %esi 871 sub $0xffff, %edx 872 jnz L(exit) 873 lea (%ecx, %edi,1), %eax 874 lea 8(%ecx, %esi,1), %edx 875 POP (%edi) 876 POP (%esi) 877 jmp L(less48bytes) 878 879 cfi_restore_state 880 cfi_remember_state 881 ALIGN (4) 882 L(shr_8_gobble): 883 sub $32, %ecx 884 movdqa 16(%esi), %xmm0 885 palignr $8,(%esi), %xmm0 886 pcmpeqb (%edi), %xmm0 887 888 movdqa 32(%esi), %xmm3 889 palignr $8,16(%esi), %xmm3 890 pcmpeqb 16(%edi), %xmm3 891 892 L(shr_8_gobble_loop): 893 pand %xmm0, %xmm3 894 sub $32, %ecx 895 pmovmskb %xmm3, %edx 896 movdqa %xmm0, %xmm1 897 898 movdqa 64(%esi), %xmm3 899 palignr $8,48(%esi), %xmm3 900 sbb $0xffff, %edx 901 movdqa 48(%esi), %xmm0 902 palignr $8,32(%esi), %xmm0 903 pcmpeqb 32(%edi), %xmm0 904 lea 32(%esi), %esi 905 pcmpeqb 48(%edi), %xmm3 906 907 lea 32(%edi), %edi 908 jz L(shr_8_gobble_loop) 909 pand %xmm0, %xmm3 910 911 cmp $0, %ecx 912 jge L(shr_8_gobble_next) 913 inc %edx 914 add $32, %ecx 915 L(shr_8_gobble_next): 916 test %edx, %edx 917 jnz L(exit) 918 919 pmovmskb %xmm3, %edx 920 movdqa %xmm0, %xmm1 921 lea 32(%edi), %edi 922 lea 32(%esi), %esi 923 sub $0xffff, %edx 924 jnz L(exit) 925 926 lea (%ecx, %edi,1), %eax 927 lea 8(%ecx, %esi,1), %edx 928 POP (%edi) 929 POP (%esi) 930 jmp L(less48bytes) 931 932 cfi_restore_state 933 cfi_remember_state 934 ALIGN (4) 935 L(shr_9): 936 cmp $80, %ecx 937 lea -48(%ecx), %ecx 938 mov %edx, %eax 939 jae L(shr_9_gobble) 940 941 movdqa 16(%esi), %xmm1 942 movdqa %xmm1, %xmm2 943 palignr $9,(%esi), %xmm1 944 pcmpeqb (%edi), %xmm1 945 946 movdqa 32(%esi), %xmm3 947 palignr $9,%xmm2, %xmm3 948 pcmpeqb 16(%edi), %xmm3 949 950 pand %xmm1, %xmm3 951 pmovmskb %xmm3, %edx 952 lea 32(%edi), %edi 953 lea 32(%esi), %esi 954 sub $0xffff, %edx 955 jnz L(exit) 956 lea (%ecx, %edi,1), %eax 957 lea 9(%ecx, %esi,1), %edx 958 POP (%edi) 959 POP (%esi) 960 jmp L(less48bytes) 961 962 cfi_restore_state 963 cfi_remember_state 964 ALIGN (4) 965 L(shr_9_gobble): 966 sub $32, %ecx 967 movdqa 16(%esi), %xmm0 968 palignr $9,(%esi), %xmm0 969 pcmpeqb (%edi), %xmm0 970 971 movdqa 32(%esi), %xmm3 972 palignr $9,16(%esi), %xmm3 973 pcmpeqb 16(%edi), %xmm3 974 975 L(shr_9_gobble_loop): 976 pand %xmm0, %xmm3 977 sub $32, %ecx 978 pmovmskb %xmm3, %edx 979 movdqa %xmm0, %xmm1 980 981 movdqa 64(%esi), %xmm3 982 palignr $9,48(%esi), %xmm3 983 sbb $0xffff, %edx 984 movdqa 48(%esi), %xmm0 985 palignr $9,32(%esi), %xmm0 986 pcmpeqb 32(%edi), %xmm0 987 lea 32(%esi), %esi 988 pcmpeqb 48(%edi), %xmm3 989 990 lea 32(%edi), %edi 991 jz L(shr_9_gobble_loop) 992 pand %xmm0, %xmm3 993 994 cmp $0, %ecx 995 jge L(shr_9_gobble_next) 996 inc %edx 997 add $32, %ecx 998 L(shr_9_gobble_next): 999 test %edx, %edx 1000 jnz L(exit) 1001 1002 pmovmskb %xmm3, %edx 1003 movdqa %xmm0, %xmm1 1004 lea 32(%edi), %edi 1005 lea 32(%esi), %esi 1006 sub $0xffff, %edx 1007 jnz L(exit) 1008 1009 lea (%ecx, %edi,1), %eax 1010 lea 9(%ecx, %esi,1), %edx 1011 POP (%edi) 1012 POP (%esi) 1013 jmp L(less48bytes) 1014 1015 cfi_restore_state 1016 cfi_remember_state 1017 ALIGN (4) 1018 L(shr_10): 1019 cmp $80, %ecx 1020 lea -48(%ecx), %ecx 1021 mov %edx, %eax 1022 jae L(shr_10_gobble) 1023 1024 movdqa 16(%esi), %xmm1 1025 movdqa %xmm1, %xmm2 1026 palignr $10, (%esi), %xmm1 1027 pcmpeqb (%edi), %xmm1 1028 1029 movdqa 32(%esi), %xmm3 1030 palignr $10,%xmm2, %xmm3 1031 pcmpeqb 16(%edi), %xmm3 1032 1033 pand %xmm1, %xmm3 1034 pmovmskb %xmm3, %edx 1035 lea 32(%edi), %edi 1036 lea 32(%esi), %esi 1037 sub $0xffff, %edx 1038 jnz L(exit) 1039 lea (%ecx, %edi,1), %eax 1040 lea 10(%ecx, %esi,1), %edx 1041 POP (%edi) 1042 POP (%esi) 1043 jmp L(less48bytes) 1044 1045 cfi_restore_state 1046 cfi_remember_state 1047 ALIGN (4) 1048 L(shr_10_gobble): 1049 sub $32, %ecx 1050 movdqa 16(%esi), %xmm0 1051 palignr $10, (%esi), %xmm0 1052 pcmpeqb (%edi), %xmm0 1053 1054 movdqa 32(%esi), %xmm3 1055 palignr $10, 16(%esi), %xmm3 1056 pcmpeqb 16(%edi), %xmm3 1057 1058 L(shr_10_gobble_loop): 1059 pand %xmm0, %xmm3 1060 sub $32, %ecx 1061 pmovmskb %xmm3, %edx 1062 movdqa %xmm0, %xmm1 1063 1064 movdqa 64(%esi), %xmm3 1065 palignr $10,48(%esi), %xmm3 1066 sbb $0xffff, %edx 1067 movdqa 48(%esi), %xmm0 1068 palignr $10,32(%esi), %xmm0 1069 pcmpeqb 32(%edi), %xmm0 1070 lea 32(%esi), %esi 1071 pcmpeqb 48(%edi), %xmm3 1072 1073 lea 32(%edi), %edi 1074 jz L(shr_10_gobble_loop) 1075 pand %xmm0, %xmm3 1076 1077 cmp $0, %ecx 1078 jge L(shr_10_gobble_next) 1079 inc %edx 1080 add $32, %ecx 1081 L(shr_10_gobble_next): 1082 test %edx, %edx 1083 jnz L(exit) 1084 1085 pmovmskb %xmm3, %edx 1086 movdqa %xmm0, %xmm1 1087 lea 32(%edi), %edi 1088 lea 32(%esi), %esi 1089 sub $0xffff, %edx 1090 jnz L(exit) 1091 1092 lea (%ecx, %edi,1), %eax 1093 lea 10(%ecx, %esi,1), %edx 1094 POP (%edi) 1095 POP (%esi) 1096 jmp L(less48bytes) 1097 1098 cfi_restore_state 1099 cfi_remember_state 1100 ALIGN (4) 1101 L(shr_11): 1102 cmp $80, %ecx 1103 lea -48(%ecx), %ecx 1104 mov %edx, %eax 1105 jae L(shr_11_gobble) 1106 1107 movdqa 16(%esi), %xmm1 1108 movdqa %xmm1, %xmm2 1109 palignr $11, (%esi), %xmm1 1110 pcmpeqb (%edi), %xmm1 1111 1112 movdqa 32(%esi), %xmm3 1113 palignr $11, %xmm2, %xmm3 1114 pcmpeqb 16(%edi), %xmm3 1115 1116 pand %xmm1, %xmm3 1117 pmovmskb %xmm3, %edx 1118 lea 32(%edi), %edi 1119 lea 32(%esi), %esi 1120 sub $0xffff, %edx 1121 jnz L(exit) 1122 lea (%ecx, %edi,1), %eax 1123 lea 11(%ecx, %esi,1), %edx 1124 POP (%edi) 1125 POP (%esi) 1126 jmp L(less48bytes) 1127 1128 cfi_restore_state 1129 cfi_remember_state 1130 ALIGN (4) 1131 L(shr_11_gobble): 1132 sub $32, %ecx 1133 movdqa 16(%esi), %xmm0 1134 palignr $11, (%esi), %xmm0 1135 pcmpeqb (%edi), %xmm0 1136 1137 movdqa 32(%esi), %xmm3 1138 palignr $11, 16(%esi), %xmm3 1139 pcmpeqb 16(%edi), %xmm3 1140 1141 L(shr_11_gobble_loop): 1142 pand %xmm0, %xmm3 1143 sub $32, %ecx 1144 pmovmskb %xmm3, %edx 1145 movdqa %xmm0, %xmm1 1146 1147 movdqa 64(%esi), %xmm3 1148 palignr $11,48(%esi), %xmm3 1149 sbb $0xffff, %edx 1150 movdqa 48(%esi), %xmm0 1151 palignr $11,32(%esi), %xmm0 1152 pcmpeqb 32(%edi), %xmm0 1153 lea 32(%esi), %esi 1154 pcmpeqb 48(%edi), %xmm3 1155 1156 lea 32(%edi), %edi 1157 jz L(shr_11_gobble_loop) 1158 pand %xmm0, %xmm3 1159 1160 cmp $0, %ecx 1161 jge L(shr_11_gobble_next) 1162 inc %edx 1163 add $32, %ecx 1164 L(shr_11_gobble_next): 1165 test %edx, %edx 1166 jnz L(exit) 1167 1168 pmovmskb %xmm3, %edx 1169 movdqa %xmm0, %xmm1 1170 lea 32(%edi), %edi 1171 lea 32(%esi), %esi 1172 sub $0xffff, %edx 1173 jnz L(exit) 1174 1175 lea (%ecx, %edi,1), %eax 1176 lea 11(%ecx, %esi,1), %edx 1177 POP (%edi) 1178 POP (%esi) 1179 jmp L(less48bytes) 1180 1181 cfi_restore_state 1182 cfi_remember_state 1183 ALIGN (4) 1184 L(shr_12): 1185 cmp $80, %ecx 1186 lea -48(%ecx), %ecx 1187 mov %edx, %eax 1188 jae L(shr_12_gobble) 1189 1190 movdqa 16(%esi), %xmm1 1191 movdqa %xmm1, %xmm2 1192 palignr $12, (%esi), %xmm1 1193 pcmpeqb (%edi), %xmm1 1194 1195 movdqa 32(%esi), %xmm3 1196 palignr $12, %xmm2, %xmm3 1197 pcmpeqb 16(%edi), %xmm3 1198 1199 pand %xmm1, %xmm3 1200 pmovmskb %xmm3, %edx 1201 lea 32(%edi), %edi 1202 lea 32(%esi), %esi 1203 sub $0xffff, %edx 1204 jnz L(exit) 1205 lea (%ecx, %edi,1), %eax 1206 lea 12(%ecx, %esi,1), %edx 1207 POP (%edi) 1208 POP (%esi) 1209 jmp L(less48bytes) 1210 1211 cfi_restore_state 1212 cfi_remember_state 1213 ALIGN (4) 1214 L(shr_12_gobble): 1215 sub $32, %ecx 1216 movdqa 16(%esi), %xmm0 1217 palignr $12, (%esi), %xmm0 1218 pcmpeqb (%edi), %xmm0 1219 1220 movdqa 32(%esi), %xmm3 1221 palignr $12, 16(%esi), %xmm3 1222 pcmpeqb 16(%edi), %xmm3 1223 1224 L(shr_12_gobble_loop): 1225 pand %xmm0, %xmm3 1226 sub $32, %ecx 1227 pmovmskb %xmm3, %edx 1228 movdqa %xmm0, %xmm1 1229 1230 movdqa 64(%esi), %xmm3 1231 palignr $12,48(%esi), %xmm3 1232 sbb $0xffff, %edx 1233 movdqa 48(%esi), %xmm0 1234 palignr $12,32(%esi), %xmm0 1235 pcmpeqb 32(%edi), %xmm0 1236 lea 32(%esi), %esi 1237 pcmpeqb 48(%edi), %xmm3 1238 1239 lea 32(%edi), %edi 1240 jz L(shr_12_gobble_loop) 1241 pand %xmm0, %xmm3 1242 1243 cmp $0, %ecx 1244 jge L(shr_12_gobble_next) 1245 inc %edx 1246 add $32, %ecx 1247 L(shr_12_gobble_next): 1248 test %edx, %edx 1249 jnz L(exit) 1250 1251 pmovmskb %xmm3, %edx 1252 movdqa %xmm0, %xmm1 1253 lea 32(%edi), %edi 1254 lea 32(%esi), %esi 1255 sub $0xffff, %edx 1256 jnz L(exit) 1257 1258 lea (%ecx, %edi,1), %eax 1259 lea 12(%ecx, %esi,1), %edx 1260 POP (%edi) 1261 POP (%esi) 1262 jmp L(less48bytes) 1263 1264 cfi_restore_state 1265 cfi_remember_state 1266 ALIGN (4) 1267 L(shr_13): 1268 cmp $80, %ecx 1269 lea -48(%ecx), %ecx 1270 mov %edx, %eax 1271 jae L(shr_13_gobble) 1272 1273 movdqa 16(%esi), %xmm1 1274 movdqa %xmm1, %xmm2 1275 palignr $13, (%esi), %xmm1 1276 pcmpeqb (%edi), %xmm1 1277 1278 movdqa 32(%esi), %xmm3 1279 palignr $13, %xmm2, %xmm3 1280 pcmpeqb 16(%edi), %xmm3 1281 1282 pand %xmm1, %xmm3 1283 pmovmskb %xmm3, %edx 1284 lea 32(%edi), %edi 1285 lea 32(%esi), %esi 1286 sub $0xffff, %edx 1287 jnz L(exit) 1288 lea (%ecx, %edi,1), %eax 1289 lea 13(%ecx, %esi,1), %edx 1290 POP (%edi) 1291 POP (%esi) 1292 jmp L(less48bytes) 1293 1294 cfi_restore_state 1295 cfi_remember_state 1296 ALIGN (4) 1297 L(shr_13_gobble): 1298 sub $32, %ecx 1299 movdqa 16(%esi), %xmm0 1300 palignr $13, (%esi), %xmm0 1301 pcmpeqb (%edi), %xmm0 1302 1303 movdqa 32(%esi), %xmm3 1304 palignr $13, 16(%esi), %xmm3 1305 pcmpeqb 16(%edi), %xmm3 1306 1307 L(shr_13_gobble_loop): 1308 pand %xmm0, %xmm3 1309 sub $32, %ecx 1310 pmovmskb %xmm3, %edx 1311 movdqa %xmm0, %xmm1 1312 1313 movdqa 64(%esi), %xmm3 1314 palignr $13,48(%esi), %xmm3 1315 sbb $0xffff, %edx 1316 movdqa 48(%esi), %xmm0 1317 palignr $13,32(%esi), %xmm0 1318 pcmpeqb 32(%edi), %xmm0 1319 lea 32(%esi), %esi 1320 pcmpeqb 48(%edi), %xmm3 1321 1322 lea 32(%edi), %edi 1323 jz L(shr_13_gobble_loop) 1324 pand %xmm0, %xmm3 1325 1326 cmp $0, %ecx 1327 jge L(shr_13_gobble_next) 1328 inc %edx 1329 add $32, %ecx 1330 L(shr_13_gobble_next): 1331 test %edx, %edx 1332 jnz L(exit) 1333 1334 pmovmskb %xmm3, %edx 1335 movdqa %xmm0, %xmm1 1336 lea 32(%edi), %edi 1337 lea 32(%esi), %esi 1338 sub $0xffff, %edx 1339 jnz L(exit) 1340 1341 lea (%ecx, %edi,1), %eax 1342 lea 13(%ecx, %esi,1), %edx 1343 POP (%edi) 1344 POP (%esi) 1345 jmp L(less48bytes) 1346 1347 cfi_restore_state 1348 cfi_remember_state 1349 ALIGN (4) 1350 L(shr_14): 1351 cmp $80, %ecx 1352 lea -48(%ecx), %ecx 1353 mov %edx, %eax 1354 jae L(shr_14_gobble) 1355 1356 movdqa 16(%esi), %xmm1 1357 movdqa %xmm1, %xmm2 1358 palignr $14, (%esi), %xmm1 1359 pcmpeqb (%edi), %xmm1 1360 1361 movdqa 32(%esi), %xmm3 1362 palignr $14, %xmm2, %xmm3 1363 pcmpeqb 16(%edi), %xmm3 1364 1365 pand %xmm1, %xmm3 1366 pmovmskb %xmm3, %edx 1367 lea 32(%edi), %edi 1368 lea 32(%esi), %esi 1369 sub $0xffff, %edx 1370 jnz L(exit) 1371 lea (%ecx, %edi,1), %eax 1372 lea 14(%ecx, %esi,1), %edx 1373 POP (%edi) 1374 POP (%esi) 1375 jmp L(less48bytes) 1376 1377 cfi_restore_state 1378 cfi_remember_state 1379 ALIGN (4) 1380 L(shr_14_gobble): 1381 sub $32, %ecx 1382 movdqa 16(%esi), %xmm0 1383 palignr $14, (%esi), %xmm0 1384 pcmpeqb (%edi), %xmm0 1385 1386 movdqa 32(%esi), %xmm3 1387 palignr $14, 16(%esi), %xmm3 1388 pcmpeqb 16(%edi), %xmm3 1389 1390 L(shr_14_gobble_loop): 1391 pand %xmm0, %xmm3 1392 sub $32, %ecx 1393 pmovmskb %xmm3, %edx 1394 movdqa %xmm0, %xmm1 1395 1396 movdqa 64(%esi), %xmm3 1397 palignr $14,48(%esi), %xmm3 1398 sbb $0xffff, %edx 1399 movdqa 48(%esi), %xmm0 1400 palignr $14,32(%esi), %xmm0 1401 pcmpeqb 32(%edi), %xmm0 1402 lea 32(%esi), %esi 1403 pcmpeqb 48(%edi), %xmm3 1404 1405 lea 32(%edi), %edi 1406 jz L(shr_14_gobble_loop) 1407 pand %xmm0, %xmm3 1408 1409 cmp $0, %ecx 1410 jge L(shr_14_gobble_next) 1411 inc %edx 1412 add $32, %ecx 1413 L(shr_14_gobble_next): 1414 test %edx, %edx 1415 jnz L(exit) 1416 1417 pmovmskb %xmm3, %edx 1418 movdqa %xmm0, %xmm1 1419 lea 32(%edi), %edi 1420 lea 32(%esi), %esi 1421 sub $0xffff, %edx 1422 jnz L(exit) 1423 1424 lea (%ecx, %edi,1), %eax 1425 lea 14(%ecx, %esi,1), %edx 1426 POP (%edi) 1427 POP (%esi) 1428 jmp L(less48bytes) 1429 1430 cfi_restore_state 1431 cfi_remember_state 1432 ALIGN (4) 1433 L(shr_15): 1434 cmp $80, %ecx 1435 lea -48(%ecx), %ecx 1436 mov %edx, %eax 1437 jae L(shr_15_gobble) 1438 1439 movdqa 16(%esi), %xmm1 1440 movdqa %xmm1, %xmm2 1441 palignr $15, (%esi), %xmm1 1442 pcmpeqb (%edi), %xmm1 1443 1444 movdqa 32(%esi), %xmm3 1445 palignr $15, %xmm2, %xmm3 1446 pcmpeqb 16(%edi), %xmm3 1447 1448 pand %xmm1, %xmm3 1449 pmovmskb %xmm3, %edx 1450 lea 32(%edi), %edi 1451 lea 32(%esi), %esi 1452 sub $0xffff, %edx 1453 jnz L(exit) 1454 lea (%ecx, %edi,1), %eax 1455 lea 15(%ecx, %esi,1), %edx 1456 POP (%edi) 1457 POP (%esi) 1458 jmp L(less48bytes) 1459 1460 cfi_restore_state 1461 cfi_remember_state 1462 ALIGN (4) 1463 L(shr_15_gobble): 1464 sub $32, %ecx 1465 movdqa 16(%esi), %xmm0 1466 palignr $15, (%esi), %xmm0 1467 pcmpeqb (%edi), %xmm0 1468 1469 movdqa 32(%esi), %xmm3 1470 palignr $15, 16(%esi), %xmm3 1471 pcmpeqb 16(%edi), %xmm3 1472 1473 L(shr_15_gobble_loop): 1474 pand %xmm0, %xmm3 1475 sub $32, %ecx 1476 pmovmskb %xmm3, %edx 1477 movdqa %xmm0, %xmm1 1478 1479 movdqa 64(%esi), %xmm3 1480 palignr $15,48(%esi), %xmm3 1481 sbb $0xffff, %edx 1482 movdqa 48(%esi), %xmm0 1483 palignr $15,32(%esi), %xmm0 1484 pcmpeqb 32(%edi), %xmm0 1485 lea 32(%esi), %esi 1486 pcmpeqb 48(%edi), %xmm3 1487 1488 lea 32(%edi), %edi 1489 jz L(shr_15_gobble_loop) 1490 pand %xmm0, %xmm3 1491 1492 cmp $0, %ecx 1493 jge L(shr_15_gobble_next) 1494 inc %edx 1495 add $32, %ecx 1496 L(shr_15_gobble_next): 1497 test %edx, %edx 1498 jnz L(exit) 1499 1500 pmovmskb %xmm3, %edx 1501 movdqa %xmm0, %xmm1 1502 lea 32(%edi), %edi 1503 lea 32(%esi), %esi 1504 sub $0xffff, %edx 1505 jnz L(exit) 1506 1507 lea (%ecx, %edi,1), %eax 1508 lea 15(%ecx, %esi,1), %edx 1509 POP (%edi) 1510 POP (%esi) 1511 jmp L(less48bytes) 1512 1513 cfi_restore_state 1514 cfi_remember_state 1515 ALIGN (4) 1516 L(exit): 1517 pmovmskb %xmm1, %ebx 1518 sub $0xffff, %ebx 1519 jz L(first16bytes) 1520 lea -16(%esi), %esi 1521 lea -16(%edi), %edi 1522 mov %ebx, %edx 1523 L(first16bytes): 1524 add %eax, %esi 1525 L(less16bytes): 1526 test %dl, %dl 1527 jz L(next_24_bytes) 1528 1529 test $0x01, %dl 1530 jnz L(Byte16) 1531 1532 test $0x02, %dl 1533 jnz L(Byte17) 1534 1535 test $0x04, %dl 1536 jnz L(Byte18) 1537 1538 test $0x08, %dl 1539 jnz L(Byte19) 1540 1541 test $0x10, %dl 1542 jnz L(Byte20) 1543 1544 test $0x20, %dl 1545 jnz L(Byte21) 1546 1547 test $0x40, %dl 1548 jnz L(Byte22) 1549 L(Byte23): 1550 movzbl -9(%edi), %eax 1551 movzbl -9(%esi), %edx 1552 sub %edx, %eax 1553 RETURN 1554 1555 ALIGN (4) 1556 L(Byte16): 1557 movzbl -16(%edi), %eax 1558 movzbl -16(%esi), %edx 1559 sub %edx, %eax 1560 RETURN 1561 1562 ALIGN (4) 1563 L(Byte17): 1564 movzbl -15(%edi), %eax 1565 movzbl -15(%esi), %edx 1566 sub %edx, %eax 1567 RETURN 1568 1569 ALIGN (4) 1570 L(Byte18): 1571 movzbl -14(%edi), %eax 1572 movzbl -14(%esi), %edx 1573 sub %edx, %eax 1574 RETURN 1575 1576 ALIGN (4) 1577 L(Byte19): 1578 movzbl -13(%edi), %eax 1579 movzbl -13(%esi), %edx 1580 sub %edx, %eax 1581 RETURN 1582 1583 ALIGN (4) 1584 L(Byte20): 1585 movzbl -12(%edi), %eax 1586 movzbl -12(%esi), %edx 1587 sub %edx, %eax 1588 RETURN 1589 1590 ALIGN (4) 1591 L(Byte21): 1592 movzbl -11(%edi), %eax 1593 movzbl -11(%esi), %edx 1594 sub %edx, %eax 1595 RETURN 1596 1597 ALIGN (4) 1598 L(Byte22): 1599 movzbl -10(%edi), %eax 1600 movzbl -10(%esi), %edx 1601 sub %edx, %eax 1602 RETURN 1603 1604 ALIGN (4) 1605 L(next_24_bytes): 1606 lea 8(%edi), %edi 1607 lea 8(%esi), %esi 1608 test $0x01, %dh 1609 jnz L(Byte16) 1610 1611 test $0x02, %dh 1612 jnz L(Byte17) 1613 1614 test $0x04, %dh 1615 jnz L(Byte18) 1616 1617 test $0x08, %dh 1618 jnz L(Byte19) 1619 1620 test $0x10, %dh 1621 jnz L(Byte20) 1622 1623 test $0x20, %dh 1624 jnz L(Byte21) 1625 1626 test $0x40, %dh 1627 jnz L(Byte22) 1628 1629 ALIGN (4) 1630 L(Byte31): 1631 movzbl -9(%edi), %eax 1632 movzbl -9(%esi), %edx 1633 sub %edx, %eax 1634 RETURN_END 1635 CFI_PUSH (%ebx) 1636 1637 ALIGN (4) 1638 L(more8bytes): 1639 cmp $16, %ecx 1640 jae L(more16bytes) 1641 cmp $8, %ecx 1642 je L(8bytes) 1643 cmp $9, %ecx 1644 je L(9bytes) 1645 cmp $10, %ecx 1646 je L(10bytes) 1647 cmp $11, %ecx 1648 je L(11bytes) 1649 cmp $12, %ecx 1650 je L(12bytes) 1651 cmp $13, %ecx 1652 je L(13bytes) 1653 cmp $14, %ecx 1654 je L(14bytes) 1655 jmp L(15bytes) 1656 1657 ALIGN (4) 1658 L(more16bytes): 1659 cmp $24, %ecx 1660 jae L(more24bytes) 1661 cmp $16, %ecx 1662 je L(16bytes) 1663 cmp $17, %ecx 1664 je L(17bytes) 1665 cmp $18, %ecx 1666 je L(18bytes) 1667 cmp $19, %ecx 1668 je L(19bytes) 1669 cmp $20, %ecx 1670 je L(20bytes) 1671 cmp $21, %ecx 1672 je L(21bytes) 1673 cmp $22, %ecx 1674 je L(22bytes) 1675 jmp L(23bytes) 1676 1677 ALIGN (4) 1678 L(more24bytes): 1679 cmp $32, %ecx 1680 jae L(more32bytes) 1681 cmp $24, %ecx 1682 je L(24bytes) 1683 cmp $25, %ecx 1684 je L(25bytes) 1685 cmp $26, %ecx 1686 je L(26bytes) 1687 cmp $27, %ecx 1688 je L(27bytes) 1689 cmp $28, %ecx 1690 je L(28bytes) 1691 cmp $29, %ecx 1692 je L(29bytes) 1693 cmp $30, %ecx 1694 je L(30bytes) 1695 jmp L(31bytes) 1696 1697 ALIGN (4) 1698 L(more32bytes): 1699 cmp $40, %ecx 1700 jae L(more40bytes) 1701 cmp $32, %ecx 1702 je L(32bytes) 1703 cmp $33, %ecx 1704 je L(33bytes) 1705 cmp $34, %ecx 1706 je L(34bytes) 1707 cmp $35, %ecx 1708 je L(35bytes) 1709 cmp $36, %ecx 1710 je L(36bytes) 1711 cmp $37, %ecx 1712 je L(37bytes) 1713 cmp $38, %ecx 1714 je L(38bytes) 1715 jmp L(39bytes) 1716 1717 ALIGN (4) 1718 L(more40bytes): 1719 cmp $40, %ecx 1720 je L(40bytes) 1721 cmp $41, %ecx 1722 je L(41bytes) 1723 cmp $42, %ecx 1724 je L(42bytes) 1725 cmp $43, %ecx 1726 je L(43bytes) 1727 cmp $44, %ecx 1728 je L(44bytes) 1729 cmp $45, %ecx 1730 je L(45bytes) 1731 cmp $46, %ecx 1732 je L(46bytes) 1733 jmp L(47bytes) 1734 1735 ALIGN (4) 1736 L(less48bytes): 1737 cmp $8, %ecx 1738 jae L(more8bytes) 1739 cmp $2, %ecx 1740 je L(2bytes) 1741 cmp $3, %ecx 1742 je L(3bytes) 1743 cmp $4, %ecx 1744 je L(4bytes) 1745 cmp $5, %ecx 1746 je L(5bytes) 1747 cmp $6, %ecx 1748 je L(6bytes) 1749 jmp L(7bytes) 1750 1751 1752 ALIGN (4) 1753 L(44bytes): 1754 mov -44(%eax), %ecx 1755 mov -44(%edx), %ebx 1756 cmp %ebx, %ecx 1757 jne L(find_diff) 1758 L(40bytes): 1759 mov -40(%eax), %ecx 1760 mov -40(%edx), %ebx 1761 cmp %ebx, %ecx 1762 jne L(find_diff) 1763 L(36bytes): 1764 mov -36(%eax), %ecx 1765 mov -36(%edx), %ebx 1766 cmp %ebx, %ecx 1767 jne L(find_diff) 1768 L(32bytes): 1769 mov -32(%eax), %ecx 1770 mov -32(%edx), %ebx 1771 cmp %ebx, %ecx 1772 jne L(find_diff) 1773 L(28bytes): 1774 mov -28(%eax), %ecx 1775 mov -28(%edx), %ebx 1776 cmp %ebx, %ecx 1777 jne L(find_diff) 1778 L(24bytes): 1779 mov -24(%eax), %ecx 1780 mov -24(%edx), %ebx 1781 cmp %ebx, %ecx 1782 jne L(find_diff) 1783 L(20bytes): 1784 mov -20(%eax), %ecx 1785 mov -20(%edx), %ebx 1786 cmp %ebx, %ecx 1787 jne L(find_diff) 1788 L(16bytes): 1789 mov -16(%eax), %ecx 1790 mov -16(%edx), %ebx 1791 cmp %ebx, %ecx 1792 jne L(find_diff) 1793 L(12bytes): 1794 mov -12(%eax), %ecx 1795 mov -12(%edx), %ebx 1796 cmp %ebx, %ecx 1797 jne L(find_diff) 1798 L(8bytes): 1799 mov -8(%eax), %ecx 1800 mov -8(%edx), %ebx 1801 cmp %ebx, %ecx 1802 jne L(find_diff) 1803 L(4bytes): 1804 mov -4(%eax), %ecx 1805 mov -4(%edx), %ebx 1806 cmp %ebx, %ecx 1807 mov $0, %eax 1808 jne L(find_diff) 1809 POP (%ebx) 1810 ret 1811 CFI_PUSH (%ebx) 1812 1813 ALIGN (4) 1814 L(45bytes): 1815 mov -45(%eax), %ecx 1816 mov -45(%edx), %ebx 1817 cmp %ebx, %ecx 1818 jne L(find_diff) 1819 L(41bytes): 1820 mov -41(%eax), %ecx 1821 mov -41(%edx), %ebx 1822 cmp %ebx, %ecx 1823 jne L(find_diff) 1824 L(37bytes): 1825 mov -37(%eax), %ecx 1826 mov -37(%edx), %ebx 1827 cmp %ebx, %ecx 1828 jne L(find_diff) 1829 L(33bytes): 1830 mov -33(%eax), %ecx 1831 mov -33(%edx), %ebx 1832 cmp %ebx, %ecx 1833 jne L(find_diff) 1834 L(29bytes): 1835 mov -29(%eax), %ecx 1836 mov -29(%edx), %ebx 1837 cmp %ebx, %ecx 1838 jne L(find_diff) 1839 L(25bytes): 1840 mov -25(%eax), %ecx 1841 mov -25(%edx), %ebx 1842 cmp %ebx, %ecx 1843 jne L(find_diff) 1844 L(21bytes): 1845 mov -21(%eax), %ecx 1846 mov -21(%edx), %ebx 1847 cmp %ebx, %ecx 1848 jne L(find_diff) 1849 L(17bytes): 1850 mov -17(%eax), %ecx 1851 mov -17(%edx), %ebx 1852 cmp %ebx, %ecx 1853 jne L(find_diff) 1854 L(13bytes): 1855 mov -13(%eax), %ecx 1856 mov -13(%edx), %ebx 1857 cmp %ebx, %ecx 1858 jne L(find_diff) 1859 L(9bytes): 1860 mov -9(%eax), %ecx 1861 mov -9(%edx), %ebx 1862 cmp %ebx, %ecx 1863 jne L(find_diff) 1864 L(5bytes): 1865 mov -5(%eax), %ecx 1866 mov -5(%edx), %ebx 1867 cmp %ebx, %ecx 1868 jne L(find_diff) 1869 movzbl -1(%eax), %ecx 1870 cmp -1(%edx), %cl 1871 mov $0, %eax 1872 jne L(end) 1873 POP (%ebx) 1874 ret 1875 CFI_PUSH (%ebx) 1876 1877 ALIGN (4) 1878 L(46bytes): 1879 mov -46(%eax), %ecx 1880 mov -46(%edx), %ebx 1881 cmp %ebx, %ecx 1882 jne L(find_diff) 1883 L(42bytes): 1884 mov -42(%eax), %ecx 1885 mov -42(%edx), %ebx 1886 cmp %ebx, %ecx 1887 jne L(find_diff) 1888 L(38bytes): 1889 mov -38(%eax), %ecx 1890 mov -38(%edx), %ebx 1891 cmp %ebx, %ecx 1892 jne L(find_diff) 1893 L(34bytes): 1894 mov -34(%eax), %ecx 1895 mov -34(%edx), %ebx 1896 cmp %ebx, %ecx 1897 jne L(find_diff) 1898 L(30bytes): 1899 mov -30(%eax), %ecx 1900 mov -30(%edx), %ebx 1901 cmp %ebx, %ecx 1902 jne L(find_diff) 1903 L(26bytes): 1904 mov -26(%eax), %ecx 1905 mov -26(%edx), %ebx 1906 cmp %ebx, %ecx 1907 jne L(find_diff) 1908 L(22bytes): 1909 mov -22(%eax), %ecx 1910 mov -22(%edx), %ebx 1911 cmp %ebx, %ecx 1912 jne L(find_diff) 1913 L(18bytes): 1914 mov -18(%eax), %ecx 1915 mov -18(%edx), %ebx 1916 cmp %ebx, %ecx 1917 jne L(find_diff) 1918 L(14bytes): 1919 mov -14(%eax), %ecx 1920 mov -14(%edx), %ebx 1921 cmp %ebx, %ecx 1922 jne L(find_diff) 1923 L(10bytes): 1924 mov -10(%eax), %ecx 1925 mov -10(%edx), %ebx 1926 cmp %ebx, %ecx 1927 jne L(find_diff) 1928 L(6bytes): 1929 mov -6(%eax), %ecx 1930 mov -6(%edx), %ebx 1931 cmp %ebx, %ecx 1932 jne L(find_diff) 1933 L(2bytes): 1934 movzwl -2(%eax), %ecx 1935 movzwl -2(%edx), %ebx 1936 cmp %bl, %cl 1937 jne L(end) 1938 cmp %bh, %ch 1939 mov $0, %eax 1940 jne L(end) 1941 POP (%ebx) 1942 ret 1943 CFI_PUSH (%ebx) 1944 1945 ALIGN (4) 1946 L(47bytes): 1947 movl -47(%eax), %ecx 1948 movl -47(%edx), %ebx 1949 cmp %ebx, %ecx 1950 jne L(find_diff) 1951 L(43bytes): 1952 movl -43(%eax), %ecx 1953 movl -43(%edx), %ebx 1954 cmp %ebx, %ecx 1955 jne L(find_diff) 1956 L(39bytes): 1957 movl -39(%eax), %ecx 1958 movl -39(%edx), %ebx 1959 cmp %ebx, %ecx 1960 jne L(find_diff) 1961 L(35bytes): 1962 movl -35(%eax), %ecx 1963 movl -35(%edx), %ebx 1964 cmp %ebx, %ecx 1965 jne L(find_diff) 1966 L(31bytes): 1967 movl -31(%eax), %ecx 1968 movl -31(%edx), %ebx 1969 cmp %ebx, %ecx 1970 jne L(find_diff) 1971 L(27bytes): 1972 movl -27(%eax), %ecx 1973 movl -27(%edx), %ebx 1974 cmp %ebx, %ecx 1975 jne L(find_diff) 1976 L(23bytes): 1977 movl -23(%eax), %ecx 1978 movl -23(%edx), %ebx 1979 cmp %ebx, %ecx 1980 jne L(find_diff) 1981 L(19bytes): 1982 movl -19(%eax), %ecx 1983 movl -19(%edx), %ebx 1984 cmp %ebx, %ecx 1985 jne L(find_diff) 1986 L(15bytes): 1987 movl -15(%eax), %ecx 1988 movl -15(%edx), %ebx 1989 cmp %ebx, %ecx 1990 jne L(find_diff) 1991 L(11bytes): 1992 movl -11(%eax), %ecx 1993 movl -11(%edx), %ebx 1994 cmp %ebx, %ecx 1995 jne L(find_diff) 1996 L(7bytes): 1997 movl -7(%eax), %ecx 1998 movl -7(%edx), %ebx 1999 cmp %ebx, %ecx 2000 jne L(find_diff) 2001 L(3bytes): 2002 movzwl -3(%eax), %ecx 2003 movzwl -3(%edx), %ebx 2004 cmpb %bl, %cl 2005 jne L(end) 2006 cmp %bx, %cx 2007 jne L(end) 2008 movzbl -1(%eax), %eax 2009 cmpb -1(%edx), %al 2010 mov $0, %eax 2011 jne L(end) 2012 POP (%ebx) 2013 ret 2014 CFI_PUSH (%ebx) 2015 2016 ALIGN (4) 2017 L(find_diff): 2018 cmpb %bl, %cl 2019 jne L(end) 2020 cmp %bx, %cx 2021 jne L(end) 2022 shr $16,%ecx 2023 shr $16,%ebx 2024 cmp %bl, %cl 2025 jne L(end) 2026 cmp %bx, %cx 2027 L(end): 2028 POP (%ebx) 2029 mov $1, %eax 2030 ja L(bigger) 2031 neg %eax 2032 L(bigger): 2033 ret 2034 2035 END (MEMCMP) 2036