Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2010, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef MEMCMP
     32 # define MEMCMP		ssse3_memcmp3_new
     33 #endif
     34 
     35 #ifndef L
     36 # define L(label)	.L##label
     37 #endif
     38 
     39 #ifndef ALIGN
     40 # define ALIGN(n)	.p2align n
     41 #endif
     42 
     43 #ifndef cfi_startproc
     44 # define cfi_startproc			.cfi_startproc
     45 #endif
     46 
     47 #ifndef cfi_endproc
     48 # define cfi_endproc			.cfi_endproc
     49 #endif
     50 
     51 #ifndef cfi_rel_offset
     52 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     53 #endif
     54 
     55 #ifndef cfi_restore
     56 # define cfi_restore(reg)		.cfi_restore reg
     57 #endif
     58 
     59 #ifndef cfi_adjust_cfa_offset
     60 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     61 #endif
     62 
     63 #ifndef cfi_remember_state
     64 # define cfi_remember_state		.cfi_remember_state
     65 #endif
     66 
     67 #ifndef cfi_restore_state
     68 # define cfi_restore_state		.cfi_restore_state
     69 #endif
     70 
     71 #ifndef ENTRY
     72 # define ENTRY(name)			\
     73 	.type name,  @function; 	\
     74 	.globl name;			\
     75 	.p2align 4;			\
     76 name:					\
     77 	cfi_startproc
     78 #endif
     79 
     80 #ifndef END
     81 # define END(name)			\
     82 	cfi_endproc;			\
     83 	.size name, .-name
     84 #endif
     85 
     86 #define CFI_PUSH(REG)						\
     87   cfi_adjust_cfa_offset (4);					\
     88   cfi_rel_offset (REG, 0)
     89 
     90 #define CFI_POP(REG)						\
     91   cfi_adjust_cfa_offset (-4);					\
     92   cfi_restore (REG)
     93 
     94 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     95 #define POP(REG)	popl REG; CFI_POP (REG)
     96 
     97 #define PARMS		4
     98 #define BLK1		PARMS
     99 #define BLK2		BLK1+4
    100 #define LEN		BLK2+4
    101 #define RETURN_END	POP (%edi); POP (%esi); POP (%ebx); ret
    102 #define RETURN		RETURN_END; cfi_restore_state; cfi_remember_state
    103 
    104 	.section .text.ssse3,"ax",@progbits
    105 ENTRY (MEMCMP)
    106 	movl	LEN(%esp), %ecx
    107 	movl	BLK1(%esp), %eax
    108 	cmp	$48, %ecx
    109 	movl	BLK2(%esp), %edx
    110 	jae	L(48bytesormore)
    111 	cmp	$1, %ecx
    112 	jbe	L(less1bytes)
    113 	PUSH (%ebx)
    114 	add	%ecx, %edx
    115 	add	%ecx, %eax
    116 	jmp	L(less48bytes)
    117 
    118 	CFI_POP (%ebx)
    119 	ALIGN (4)
    120 L(less1bytes):
    121 	jb	L(zero)
    122 	movb	(%eax), %cl
    123 	cmp	(%edx), %cl
    124 	je	L(zero)
    125 	mov	$1, %eax
    126 	ja	L(1bytesend)
    127 	neg	%eax
    128 L(1bytesend):
    129 	ret
    130 
    131 	ALIGN (4)
    132 L(zero):
    133 	mov	$0, %eax
    134 	ret
    135 
    136 	ALIGN (4)
    137 L(48bytesormore):
    138 	PUSH (%ebx)
    139 	PUSH (%esi)
    140 	PUSH (%edi)
    141 	cfi_remember_state
    142 	movdqu    (%eax), %xmm3
    143 	movdqu    (%edx), %xmm0
    144 	movl	%eax, %edi
    145 	movl	%edx, %esi
    146 	pcmpeqb   %xmm0, %xmm3
    147 	pmovmskb  %xmm3, %edx
    148 	lea	16(%edi), %edi
    149 
    150 	sub      $0xffff, %edx
    151 	lea	16(%esi), %esi
    152 	jnz	  L(less16bytes)
    153 	mov	%edi, %edx
    154 	and	$0xf, %edx
    155 	xor	%edx, %edi
    156 	sub	%edx, %esi
    157 	add	%edx, %ecx
    158 	mov	%esi, %edx
    159 	and	$0xf, %edx
    160 	jz	L(shr_0)
    161 	xor	%edx, %esi
    162 
    163 	cmp	$8, %edx
    164 	jae	L(next_unaligned_table)
    165 	cmp	$0, %edx
    166 	je	L(shr_0)
    167 	cmp	$1, %edx
    168 	je	L(shr_1)
    169 	cmp	$2, %edx
    170 	je	L(shr_2)
    171 	cmp	$3, %edx
    172 	je	L(shr_3)
    173 	cmp	$4, %edx
    174 	je	L(shr_4)
    175 	cmp	$5, %edx
    176 	je	L(shr_5)
    177 	cmp	$6, %edx
    178 	je	L(shr_6)
    179 	jmp	L(shr_7)
    180 
    181 	ALIGN (4)
    182 L(next_unaligned_table):
    183 	cmp	$8, %edx
    184 	je	L(shr_8)
    185 	cmp	$9, %edx
    186 	je	L(shr_9)
    187 	cmp	$10, %edx
    188 	je	L(shr_10)
    189 	cmp	$11, %edx
    190 	je	L(shr_11)
    191 	cmp	$12, %edx
    192 	je	L(shr_12)
    193 	cmp	$13, %edx
    194 	je	L(shr_13)
    195 	cmp	$14, %edx
    196 	je	L(shr_14)
    197 	jmp	L(shr_15)
    198 
    199 	ALIGN (4)
    200 L(shr_0):
    201 	cmp	$80, %ecx
    202 	jae	L(shr_0_gobble)
    203 	lea	-48(%ecx), %ecx
    204 	xor	%eax, %eax
    205 	movaps	(%esi), %xmm1
    206 	pcmpeqb	(%edi), %xmm1
    207 	movaps	16(%esi), %xmm2
    208 	pcmpeqb	16(%edi), %xmm2
    209 	pand	%xmm1, %xmm2
    210 	pmovmskb %xmm2, %edx
    211 	add	$32, %edi
    212 	add	$32, %esi
    213 	sub	$0xffff, %edx
    214 	jnz	L(exit)
    215 
    216 	lea	(%ecx, %edi,1), %eax
    217 	lea	(%ecx, %esi,1), %edx
    218 	POP (%edi)
    219 	POP (%esi)
    220 	jmp	L(less48bytes)
    221 
    222 	cfi_restore_state
    223 	cfi_remember_state
    224 	ALIGN (4)
    225 L(shr_0_gobble):
    226 	lea	-48(%ecx), %ecx
    227 	movdqa	(%esi), %xmm0
    228 	xor	%eax, %eax
    229 	pcmpeqb	(%edi), %xmm0
    230 	sub	$32, %ecx
    231 	movdqa	16(%esi), %xmm2
    232 	pcmpeqb	16(%edi), %xmm2
    233 L(shr_0_gobble_loop):
    234 	pand	%xmm0, %xmm2
    235 	sub	$32, %ecx
    236 	pmovmskb %xmm2, %edx
    237 	movdqa	%xmm0, %xmm1
    238 	movdqa	32(%esi), %xmm0
    239 	movdqa	48(%esi), %xmm2
    240 	sbb	$0xffff, %edx
    241 	pcmpeqb	32(%edi), %xmm0
    242 	pcmpeqb	48(%edi), %xmm2
    243 	lea	32(%edi), %edi
    244 	lea	32(%esi), %esi
    245 	jz	L(shr_0_gobble_loop)
    246 
    247 	pand	%xmm0, %xmm2
    248 	cmp	$0, %ecx
    249 	jge	L(shr_0_gobble_loop_next)
    250 	inc	%edx
    251 	add	$32, %ecx
    252 L(shr_0_gobble_loop_next):
    253 	test	%edx, %edx
    254 	jnz	L(exit)
    255 
    256 	pmovmskb %xmm2, %edx
    257 	movdqa	%xmm0, %xmm1
    258 	lea	32(%edi), %edi
    259 	lea	32(%esi), %esi
    260 	sub	$0xffff, %edx
    261 	jnz	L(exit)
    262 	lea	(%ecx, %edi,1), %eax
    263 	lea	(%ecx, %esi,1), %edx
    264 	POP (%edi)
    265 	POP (%esi)
    266 	jmp	L(less48bytes)
    267 
    268 	cfi_restore_state
    269 	cfi_remember_state
    270 	ALIGN (4)
    271 L(shr_1):
    272 	cmp	$80, %ecx
    273 	lea	-48(%ecx), %ecx
    274 	mov	%edx, %eax
    275 	jae	L(shr_1_gobble)
    276 
    277 	movdqa	16(%esi), %xmm1
    278 	movdqa	%xmm1, %xmm2
    279 	palignr	$1,(%esi), %xmm1
    280 	pcmpeqb	(%edi), %xmm1
    281 
    282 	movdqa	32(%esi), %xmm3
    283 	palignr	$1,%xmm2, %xmm3
    284 	pcmpeqb	16(%edi), %xmm3
    285 
    286 	pand	%xmm1, %xmm3
    287 	pmovmskb %xmm3, %edx
    288 	lea	32(%edi), %edi
    289 	lea	32(%esi), %esi
    290 	sub	$0xffff, %edx
    291 	jnz	L(exit)
    292 	lea	(%ecx, %edi,1), %eax
    293 	lea	1(%ecx, %esi,1), %edx
    294 	POP (%edi)
    295 	POP (%esi)
    296 	jmp	L(less48bytes)
    297 
    298 	cfi_restore_state
    299 	cfi_remember_state
    300 	ALIGN (4)
    301 L(shr_1_gobble):
    302 	sub	$32, %ecx
    303 	movdqa	16(%esi), %xmm0
    304 	palignr	$1,(%esi), %xmm0
    305 	pcmpeqb	(%edi), %xmm0
    306 
    307 	movdqa	32(%esi), %xmm3
    308 	palignr	$1,16(%esi), %xmm3
    309 	pcmpeqb	16(%edi), %xmm3
    310 
    311 L(shr_1_gobble_loop):
    312 	pand	%xmm0, %xmm3
    313 	sub	$32, %ecx
    314 	pmovmskb %xmm3, %edx
    315 	movdqa	%xmm0, %xmm1
    316 
    317 	movdqa	64(%esi), %xmm3
    318 	palignr	$1,48(%esi), %xmm3
    319 	sbb	$0xffff, %edx
    320 	movdqa	48(%esi), %xmm0
    321 	palignr	$1,32(%esi), %xmm0
    322 	pcmpeqb	32(%edi), %xmm0
    323 	lea	32(%esi), %esi
    324 	pcmpeqb	48(%edi), %xmm3
    325 
    326 	lea	32(%edi), %edi
    327 	jz	L(shr_1_gobble_loop)
    328 	pand	%xmm0, %xmm3
    329 
    330 	cmp	$0, %ecx
    331 	jge	L(shr_1_gobble_next)
    332 	inc	%edx
    333 	add	$32, %ecx
    334 L(shr_1_gobble_next):
    335 	test	%edx, %edx
    336 	jnz	L(exit)
    337 
    338 	pmovmskb %xmm3, %edx
    339 	movdqa	%xmm0, %xmm1
    340 	lea	32(%edi), %edi
    341 	lea	32(%esi), %esi
    342 	sub	$0xffff, %edx
    343 	jnz	L(exit)
    344 
    345 	lea	(%ecx, %edi,1), %eax
    346 	lea	1(%ecx, %esi,1), %edx
    347 	POP (%edi)
    348 	POP (%esi)
    349 	jmp	L(less48bytes)
    350 
    351 	cfi_restore_state
    352 	cfi_remember_state
    353 	ALIGN (4)
    354 L(shr_2):
    355 	cmp	$80, %ecx
    356 	lea	-48(%ecx), %ecx
    357 	mov	%edx, %eax
    358 	jae	L(shr_2_gobble)
    359 
    360 	movdqa	16(%esi), %xmm1
    361 	movdqa	%xmm1, %xmm2
    362 	palignr	$2,(%esi), %xmm1
    363 	pcmpeqb	(%edi), %xmm1
    364 
    365 	movdqa	32(%esi), %xmm3
    366 	palignr	$2,%xmm2, %xmm3
    367 	pcmpeqb	16(%edi), %xmm3
    368 
    369 	pand	%xmm1, %xmm3
    370 	pmovmskb %xmm3, %edx
    371 	lea	32(%edi), %edi
    372 	lea	32(%esi), %esi
    373 	sub	$0xffff, %edx
    374 	jnz	L(exit)
    375 	lea	(%ecx, %edi,1), %eax
    376 	lea	2(%ecx, %esi,1), %edx
    377 	POP (%edi)
    378 	POP (%esi)
    379 	jmp	L(less48bytes)
    380 
    381 	cfi_restore_state
    382 	cfi_remember_state
    383 	ALIGN (4)
    384 L(shr_2_gobble):
    385 	sub	$32, %ecx
    386 	movdqa	16(%esi), %xmm0
    387 	palignr	$2,(%esi), %xmm0
    388 	pcmpeqb	(%edi), %xmm0
    389 
    390 	movdqa	32(%esi), %xmm3
    391 	palignr	$2,16(%esi), %xmm3
    392 	pcmpeqb	16(%edi), %xmm3
    393 
    394 L(shr_2_gobble_loop):
    395 	pand	%xmm0, %xmm3
    396 	sub	$32, %ecx
    397 	pmovmskb %xmm3, %edx
    398 	movdqa	%xmm0, %xmm1
    399 
    400 	movdqa	64(%esi), %xmm3
    401 	palignr	$2,48(%esi), %xmm3
    402 	sbb	$0xffff, %edx
    403 	movdqa	48(%esi), %xmm0
    404 	palignr	$2,32(%esi), %xmm0
    405 	pcmpeqb	32(%edi), %xmm0
    406 	lea	32(%esi), %esi
    407 	pcmpeqb	48(%edi), %xmm3
    408 
    409 	lea	32(%edi), %edi
    410 	jz	L(shr_2_gobble_loop)
    411 	pand	%xmm0, %xmm3
    412 
    413 	cmp	$0, %ecx
    414 	jge	L(shr_2_gobble_next)
    415 	inc	%edx
    416 	add	$32, %ecx
    417 L(shr_2_gobble_next):
    418 	test	%edx, %edx
    419 	jnz	L(exit)
    420 
    421 	pmovmskb %xmm3, %edx
    422 	movdqa	%xmm0, %xmm1
    423 	lea	32(%edi), %edi
    424 	lea	32(%esi), %esi
    425 	sub	$0xffff, %edx
    426 	jnz	L(exit)
    427 
    428 	lea	(%ecx, %edi,1), %eax
    429 	lea	2(%ecx, %esi,1), %edx
    430 	POP (%edi)
    431 	POP (%esi)
    432 	jmp	L(less48bytes)
    433 
    434 	cfi_restore_state
    435 	cfi_remember_state
    436 	ALIGN (4)
    437 L(shr_3):
    438 	cmp	$80, %ecx
    439 	lea	-48(%ecx), %ecx
    440 	mov	%edx, %eax
    441 	jae	L(shr_3_gobble)
    442 
    443 	movdqa	16(%esi), %xmm1
    444 	movdqa	%xmm1, %xmm2
    445 	palignr	$3,(%esi), %xmm1
    446 	pcmpeqb	(%edi), %xmm1
    447 
    448 	movdqa	32(%esi), %xmm3
    449 	palignr	$3,%xmm2, %xmm3
    450 	pcmpeqb	16(%edi), %xmm3
    451 
    452 	pand	%xmm1, %xmm3
    453 	pmovmskb %xmm3, %edx
    454 	lea	32(%edi), %edi
    455 	lea	32(%esi), %esi
    456 	sub	$0xffff, %edx
    457 	jnz	L(exit)
    458 	lea	(%ecx, %edi,1), %eax
    459 	lea	3(%ecx, %esi,1), %edx
    460 	POP (%edi)
    461 	POP (%esi)
    462 	jmp	L(less48bytes)
    463 
    464 	cfi_restore_state
    465 	cfi_remember_state
    466 	ALIGN (4)
    467 L(shr_3_gobble):
    468 	sub	$32, %ecx
    469 	movdqa	16(%esi), %xmm0
    470 	palignr	$3,(%esi), %xmm0
    471 	pcmpeqb	(%edi), %xmm0
    472 
    473 	movdqa	32(%esi), %xmm3
    474 	palignr	$3,16(%esi), %xmm3
    475 	pcmpeqb	16(%edi), %xmm3
    476 
    477 L(shr_3_gobble_loop):
    478 	pand	%xmm0, %xmm3
    479 	sub	$32, %ecx
    480 	pmovmskb %xmm3, %edx
    481 	movdqa	%xmm0, %xmm1
    482 
    483 	movdqa	64(%esi), %xmm3
    484 	palignr	$3,48(%esi), %xmm3
    485 	sbb	$0xffff, %edx
    486 	movdqa	48(%esi), %xmm0
    487 	palignr	$3,32(%esi), %xmm0
    488 	pcmpeqb	32(%edi), %xmm0
    489 	lea	32(%esi), %esi
    490 	pcmpeqb	48(%edi), %xmm3
    491 
    492 	lea	32(%edi), %edi
    493 	jz	L(shr_3_gobble_loop)
    494 	pand	%xmm0, %xmm3
    495 
    496 	cmp	$0, %ecx
    497 	jge	L(shr_3_gobble_next)
    498 	inc	%edx
    499 	add	$32, %ecx
    500 L(shr_3_gobble_next):
    501 	test	%edx, %edx
    502 	jnz	L(exit)
    503 
    504 	pmovmskb %xmm3, %edx
    505 	movdqa	%xmm0, %xmm1
    506 	lea	32(%edi), %edi
    507 	lea	32(%esi), %esi
    508 	sub	$0xffff, %edx
    509 	jnz	L(exit)
    510 
    511 	lea	(%ecx, %edi,1), %eax
    512 	lea	3(%ecx, %esi,1), %edx
    513 	POP (%edi)
    514 	POP (%esi)
    515 	jmp	L(less48bytes)
    516 
    517 	cfi_restore_state
    518 	cfi_remember_state
    519 	ALIGN (4)
    520 L(shr_4):
    521 	cmp	$80, %ecx
    522 	lea	-48(%ecx), %ecx
    523 	mov	%edx, %eax
    524 	jae	L(shr_4_gobble)
    525 
    526 	movdqa	16(%esi), %xmm1
    527 	movdqa	%xmm1, %xmm2
    528 	palignr	$4,(%esi), %xmm1
    529 	pcmpeqb	(%edi), %xmm1
    530 
    531 	movdqa	32(%esi), %xmm3
    532 	palignr	$4,%xmm2, %xmm3
    533 	pcmpeqb	16(%edi), %xmm3
    534 
    535 	pand	%xmm1, %xmm3
    536 	pmovmskb %xmm3, %edx
    537 	lea	32(%edi), %edi
    538 	lea	32(%esi), %esi
    539 	sub	$0xffff, %edx
    540 	jnz	L(exit)
    541 	lea	(%ecx, %edi,1), %eax
    542 	lea	4(%ecx, %esi,1), %edx
    543 	POP (%edi)
    544 	POP (%esi)
    545 	jmp	L(less48bytes)
    546 
    547 	cfi_restore_state
    548 	cfi_remember_state
    549 	ALIGN (4)
    550 L(shr_4_gobble):
    551 	sub	$32, %ecx
    552 	movdqa	16(%esi), %xmm0
    553 	palignr	$4,(%esi), %xmm0
    554 	pcmpeqb	(%edi), %xmm0
    555 
    556 	movdqa	32(%esi), %xmm3
    557 	palignr	$4,16(%esi), %xmm3
    558 	pcmpeqb	16(%edi), %xmm3
    559 
    560 L(shr_4_gobble_loop):
    561 	pand	%xmm0, %xmm3
    562 	sub	$32, %ecx
    563 	pmovmskb %xmm3, %edx
    564 	movdqa	%xmm0, %xmm1
    565 
    566 	movdqa	64(%esi), %xmm3
    567 	palignr	$4,48(%esi), %xmm3
    568 	sbb	$0xffff, %edx
    569 	movdqa	48(%esi), %xmm0
    570 	palignr	$4,32(%esi), %xmm0
    571 	pcmpeqb	32(%edi), %xmm0
    572 	lea	32(%esi), %esi
    573 	pcmpeqb	48(%edi), %xmm3
    574 
    575 	lea	32(%edi), %edi
    576 	jz	L(shr_4_gobble_loop)
    577 	pand	%xmm0, %xmm3
    578 
    579 	cmp	$0, %ecx
    580 	jge	L(shr_4_gobble_next)
    581 	inc	%edx
    582 	add	$32, %ecx
    583 L(shr_4_gobble_next):
    584 	test	%edx, %edx
    585 	jnz	L(exit)
    586 
    587 	pmovmskb %xmm3, %edx
    588 	movdqa	%xmm0, %xmm1
    589 	lea	32(%edi), %edi
    590 	lea	32(%esi), %esi
    591 	sub	$0xffff, %edx
    592 	jnz	L(exit)
    593 
    594 	lea	(%ecx, %edi,1), %eax
    595 	lea	4(%ecx, %esi,1), %edx
    596 	POP (%edi)
    597 	POP (%esi)
    598 	jmp	L(less48bytes)
    599 
    600 	cfi_restore_state
    601 	cfi_remember_state
    602 	ALIGN (4)
    603 L(shr_5):
    604 	cmp	$80, %ecx
    605 	lea	-48(%ecx), %ecx
    606 	mov	%edx, %eax
    607 	jae	L(shr_5_gobble)
    608 
    609 	movdqa	16(%esi), %xmm1
    610 	movdqa	%xmm1, %xmm2
    611 	palignr	$5,(%esi), %xmm1
    612 	pcmpeqb	(%edi), %xmm1
    613 
    614 	movdqa	32(%esi), %xmm3
    615 	palignr	$5,%xmm2, %xmm3
    616 	pcmpeqb	16(%edi), %xmm3
    617 
    618 	pand	%xmm1, %xmm3
    619 	pmovmskb %xmm3, %edx
    620 	lea	32(%edi), %edi
    621 	lea	32(%esi), %esi
    622 	sub	$0xffff, %edx
    623 	jnz	L(exit)
    624 	lea	(%ecx, %edi,1), %eax
    625 	lea	5(%ecx, %esi,1), %edx
    626 	POP (%edi)
    627 	POP (%esi)
    628 	jmp	L(less48bytes)
    629 
    630 	cfi_restore_state
    631 	cfi_remember_state
    632 	ALIGN (4)
    633 L(shr_5_gobble):
    634 	sub	$32, %ecx
    635 	movdqa	16(%esi), %xmm0
    636 	palignr	$5,(%esi), %xmm0
    637 	pcmpeqb	(%edi), %xmm0
    638 
    639 	movdqa	32(%esi), %xmm3
    640 	palignr	$5,16(%esi), %xmm3
    641 	pcmpeqb	16(%edi), %xmm3
    642 
    643 L(shr_5_gobble_loop):
    644 	pand	%xmm0, %xmm3
    645 	sub	$32, %ecx
    646 	pmovmskb %xmm3, %edx
    647 	movdqa	%xmm0, %xmm1
    648 
    649 	movdqa	64(%esi), %xmm3
    650 	palignr	$5,48(%esi), %xmm3
    651 	sbb	$0xffff, %edx
    652 	movdqa	48(%esi), %xmm0
    653 	palignr	$5,32(%esi), %xmm0
    654 	pcmpeqb	32(%edi), %xmm0
    655 	lea	32(%esi), %esi
    656 	pcmpeqb	48(%edi), %xmm3
    657 
    658 	lea	32(%edi), %edi
    659 	jz	L(shr_5_gobble_loop)
    660 	pand	%xmm0, %xmm3
    661 
    662 	cmp	$0, %ecx
    663 	jge	L(shr_5_gobble_next)
    664 	inc	%edx
    665 	add	$32, %ecx
    666 L(shr_5_gobble_next):
    667 	test	%edx, %edx
    668 	jnz	L(exit)
    669 
    670 	pmovmskb %xmm3, %edx
    671 	movdqa	%xmm0, %xmm1
    672 	lea	32(%edi), %edi
    673 	lea	32(%esi), %esi
    674 	sub	$0xffff, %edx
    675 	jnz	L(exit)
    676 
    677 	lea	(%ecx, %edi,1), %eax
    678 	lea	5(%ecx, %esi,1), %edx
    679 	POP (%edi)
    680 	POP (%esi)
    681 	jmp	L(less48bytes)
    682 
    683 	cfi_restore_state
    684 	cfi_remember_state
    685 	ALIGN (4)
    686 L(shr_6):
    687 	cmp	$80, %ecx
    688 	lea	-48(%ecx), %ecx
    689 	mov	%edx, %eax
    690 	jae	L(shr_6_gobble)
    691 
    692 	movdqa	16(%esi), %xmm1
    693 	movdqa	%xmm1, %xmm2
    694 	palignr	$6,(%esi), %xmm1
    695 	pcmpeqb	(%edi), %xmm1
    696 
    697 	movdqa	32(%esi), %xmm3
    698 	palignr	$6,%xmm2, %xmm3
    699 	pcmpeqb	16(%edi), %xmm3
    700 
    701 	pand	%xmm1, %xmm3
    702 	pmovmskb %xmm3, %edx
    703 	lea	32(%edi), %edi
    704 	lea	32(%esi), %esi
    705 	sub	$0xffff, %edx
    706 	jnz	L(exit)
    707 	lea	(%ecx, %edi,1), %eax
    708 	lea	6(%ecx, %esi,1), %edx
    709 	POP (%edi)
    710 	POP (%esi)
    711 	jmp	L(less48bytes)
    712 
    713 	cfi_restore_state
    714 	cfi_remember_state
    715 	ALIGN (4)
    716 L(shr_6_gobble):
    717 	sub	$32, %ecx
    718 	movdqa	16(%esi), %xmm0
    719 	palignr	$6,(%esi), %xmm0
    720 	pcmpeqb	(%edi), %xmm0
    721 
    722 	movdqa	32(%esi), %xmm3
    723 	palignr	$6,16(%esi), %xmm3
    724 	pcmpeqb	16(%edi), %xmm3
    725 
    726 L(shr_6_gobble_loop):
    727 	pand	%xmm0, %xmm3
    728 	sub	$32, %ecx
    729 	pmovmskb %xmm3, %edx
    730 	movdqa	%xmm0, %xmm1
    731 
    732 	movdqa	64(%esi), %xmm3
    733 	palignr	$6,48(%esi), %xmm3
    734 	sbb	$0xffff, %edx
    735 	movdqa	48(%esi), %xmm0
    736 	palignr	$6,32(%esi), %xmm0
    737 	pcmpeqb	32(%edi), %xmm0
    738 	lea	32(%esi), %esi
    739 	pcmpeqb	48(%edi), %xmm3
    740 
    741 	lea	32(%edi), %edi
    742 	jz	L(shr_6_gobble_loop)
    743 	pand	%xmm0, %xmm3
    744 
    745 	cmp	$0, %ecx
    746 	jge	L(shr_6_gobble_next)
    747 	inc	%edx
    748 	add	$32, %ecx
    749 L(shr_6_gobble_next):
    750 	test	%edx, %edx
    751 	jnz	L(exit)
    752 
    753 	pmovmskb %xmm3, %edx
    754 	movdqa	%xmm0, %xmm1
    755 	lea	32(%edi), %edi
    756 	lea	32(%esi), %esi
    757 	sub	$0xffff, %edx
    758 	jnz	L(exit)
    759 
    760 	lea	(%ecx, %edi,1), %eax
    761 	lea	6(%ecx, %esi,1), %edx
    762 	POP (%edi)
    763 	POP (%esi)
    764 	jmp	L(less48bytes)
    765 
    766 	cfi_restore_state
    767 	cfi_remember_state
    768 	ALIGN (4)
    769 L(shr_7):
    770 	cmp	$80, %ecx
    771 	lea	-48(%ecx), %ecx
    772 	mov	%edx, %eax
    773 	jae	L(shr_7_gobble)
    774 
    775 	movdqa	16(%esi), %xmm1
    776 	movdqa	%xmm1, %xmm2
    777 	palignr	$7,(%esi), %xmm1
    778 	pcmpeqb	(%edi), %xmm1
    779 
    780 	movdqa	32(%esi), %xmm3
    781 	palignr	$7,%xmm2, %xmm3
    782 	pcmpeqb	16(%edi), %xmm3
    783 
    784 	pand	%xmm1, %xmm3
    785 	pmovmskb %xmm3, %edx
    786 	lea	32(%edi), %edi
    787 	lea	32(%esi), %esi
    788 	sub	$0xffff, %edx
    789 	jnz	L(exit)
    790 	lea	(%ecx, %edi,1), %eax
    791 	lea	7(%ecx, %esi,1), %edx
    792 	POP (%edi)
    793 	POP (%esi)
    794 	jmp	L(less48bytes)
    795 
    796 	cfi_restore_state
    797 	cfi_remember_state
    798 	ALIGN (4)
    799 L(shr_7_gobble):
    800 	sub	$32, %ecx
    801 	movdqa	16(%esi), %xmm0
    802 	palignr	$7,(%esi), %xmm0
    803 	pcmpeqb	(%edi), %xmm0
    804 
    805 	movdqa	32(%esi), %xmm3
    806 	palignr	$7,16(%esi), %xmm3
    807 	pcmpeqb	16(%edi), %xmm3
    808 
    809 L(shr_7_gobble_loop):
    810 	pand	%xmm0, %xmm3
    811 	sub	$32, %ecx
    812 	pmovmskb %xmm3, %edx
    813 	movdqa	%xmm0, %xmm1
    814 
    815 	movdqa	64(%esi), %xmm3
    816 	palignr	$7,48(%esi), %xmm3
    817 	sbb	$0xffff, %edx
    818 	movdqa	48(%esi), %xmm0
    819 	palignr	$7,32(%esi), %xmm0
    820 	pcmpeqb	32(%edi), %xmm0
    821 	lea	32(%esi), %esi
    822 	pcmpeqb	48(%edi), %xmm3
    823 
    824 	lea	32(%edi), %edi
    825 	jz	L(shr_7_gobble_loop)
    826 	pand	%xmm0, %xmm3
    827 
    828 	cmp	$0, %ecx
    829 	jge	L(shr_7_gobble_next)
    830 	inc	%edx
    831 	add	$32, %ecx
    832 L(shr_7_gobble_next):
    833 	test	%edx, %edx
    834 	jnz	L(exit)
    835 
    836 	pmovmskb %xmm3, %edx
    837 	movdqa	%xmm0, %xmm1
    838 	lea	32(%edi), %edi
    839 	lea	32(%esi), %esi
    840 	sub	$0xffff, %edx
    841 	jnz	L(exit)
    842 
    843 	lea	(%ecx, %edi,1), %eax
    844 	lea	7(%ecx, %esi,1), %edx
    845 	POP (%edi)
    846 	POP (%esi)
    847 	jmp	L(less48bytes)
    848 
    849 	cfi_restore_state
    850 	cfi_remember_state
    851 	ALIGN (4)
    852 L(shr_8):
    853 	cmp	$80, %ecx
    854 	lea	-48(%ecx), %ecx
    855 	mov	%edx, %eax
    856 	jae	L(shr_8_gobble)
    857 
    858 	movdqa	16(%esi), %xmm1
    859 	movdqa	%xmm1, %xmm2
    860 	palignr	$8,(%esi), %xmm1
    861 	pcmpeqb	(%edi), %xmm1
    862 
    863 	movdqa	32(%esi), %xmm3
    864 	palignr	$8,%xmm2, %xmm3
    865 	pcmpeqb	16(%edi), %xmm3
    866 
    867 	pand	%xmm1, %xmm3
    868 	pmovmskb %xmm3, %edx
    869 	lea	32(%edi), %edi
    870 	lea	32(%esi), %esi
    871 	sub	$0xffff, %edx
    872 	jnz	L(exit)
    873 	lea	(%ecx, %edi,1), %eax
    874 	lea	8(%ecx, %esi,1), %edx
    875 	POP (%edi)
    876 	POP (%esi)
    877 	jmp	L(less48bytes)
    878 
    879 	cfi_restore_state
    880 	cfi_remember_state
    881 	ALIGN (4)
    882 L(shr_8_gobble):
    883 	sub	$32, %ecx
    884 	movdqa	16(%esi), %xmm0
    885 	palignr	$8,(%esi), %xmm0
    886 	pcmpeqb	(%edi), %xmm0
    887 
    888 	movdqa	32(%esi), %xmm3
    889 	palignr	$8,16(%esi), %xmm3
    890 	pcmpeqb	16(%edi), %xmm3
    891 
    892 L(shr_8_gobble_loop):
    893 	pand	%xmm0, %xmm3
    894 	sub	$32, %ecx
    895 	pmovmskb %xmm3, %edx
    896 	movdqa	%xmm0, %xmm1
    897 
    898 	movdqa	64(%esi), %xmm3
    899 	palignr	$8,48(%esi), %xmm3
    900 	sbb	$0xffff, %edx
    901 	movdqa	48(%esi), %xmm0
    902 	palignr	$8,32(%esi), %xmm0
    903 	pcmpeqb	32(%edi), %xmm0
    904 	lea	32(%esi), %esi
    905 	pcmpeqb	48(%edi), %xmm3
    906 
    907 	lea	32(%edi), %edi
    908 	jz	L(shr_8_gobble_loop)
    909 	pand	%xmm0, %xmm3
    910 
    911 	cmp	$0, %ecx
    912 	jge	L(shr_8_gobble_next)
    913 	inc	%edx
    914 	add	$32, %ecx
    915 L(shr_8_gobble_next):
    916 	test	%edx, %edx
    917 	jnz	L(exit)
    918 
    919 	pmovmskb %xmm3, %edx
    920 	movdqa	%xmm0, %xmm1
    921 	lea	32(%edi), %edi
    922 	lea	32(%esi), %esi
    923 	sub	$0xffff, %edx
    924 	jnz	L(exit)
    925 
    926 	lea	(%ecx, %edi,1), %eax
    927 	lea	8(%ecx, %esi,1), %edx
    928 	POP (%edi)
    929 	POP (%esi)
    930 	jmp	L(less48bytes)
    931 
    932 	cfi_restore_state
    933 	cfi_remember_state
    934 	ALIGN (4)
    935 L(shr_9):
    936 	cmp	$80, %ecx
    937 	lea	-48(%ecx), %ecx
    938 	mov	%edx, %eax
    939 	jae	L(shr_9_gobble)
    940 
    941 	movdqa	16(%esi), %xmm1
    942 	movdqa	%xmm1, %xmm2
    943 	palignr	$9,(%esi), %xmm1
    944 	pcmpeqb	(%edi), %xmm1
    945 
    946 	movdqa	32(%esi), %xmm3
    947 	palignr	$9,%xmm2, %xmm3
    948 	pcmpeqb	16(%edi), %xmm3
    949 
    950 	pand	%xmm1, %xmm3
    951 	pmovmskb %xmm3, %edx
    952 	lea	32(%edi), %edi
    953 	lea	32(%esi), %esi
    954 	sub	$0xffff, %edx
    955 	jnz	L(exit)
    956 	lea	(%ecx, %edi,1), %eax
    957 	lea	9(%ecx, %esi,1), %edx
    958 	POP (%edi)
    959 	POP (%esi)
    960 	jmp	L(less48bytes)
    961 
    962 	cfi_restore_state
    963 	cfi_remember_state
    964 	ALIGN (4)
    965 L(shr_9_gobble):
    966 	sub	$32, %ecx
    967 	movdqa	16(%esi), %xmm0
    968 	palignr	$9,(%esi), %xmm0
    969 	pcmpeqb	(%edi), %xmm0
    970 
    971 	movdqa	32(%esi), %xmm3
    972 	palignr	$9,16(%esi), %xmm3
    973 	pcmpeqb	16(%edi), %xmm3
    974 
    975 L(shr_9_gobble_loop):
    976 	pand	%xmm0, %xmm3
    977 	sub	$32, %ecx
    978 	pmovmskb %xmm3, %edx
    979 	movdqa	%xmm0, %xmm1
    980 
    981 	movdqa	64(%esi), %xmm3
    982 	palignr	$9,48(%esi), %xmm3
    983 	sbb	$0xffff, %edx
    984 	movdqa	48(%esi), %xmm0
    985 	palignr	$9,32(%esi), %xmm0
    986 	pcmpeqb	32(%edi), %xmm0
    987 	lea	32(%esi), %esi
    988 	pcmpeqb	48(%edi), %xmm3
    989 
    990 	lea	32(%edi), %edi
    991 	jz	L(shr_9_gobble_loop)
    992 	pand	%xmm0, %xmm3
    993 
    994 	cmp	$0, %ecx
    995 	jge	L(shr_9_gobble_next)
    996 	inc	%edx
    997 	add	$32, %ecx
    998 L(shr_9_gobble_next):
    999 	test	%edx, %edx
   1000 	jnz	L(exit)
   1001 
   1002 	pmovmskb %xmm3, %edx
   1003 	movdqa	%xmm0, %xmm1
   1004 	lea	32(%edi), %edi
   1005 	lea	32(%esi), %esi
   1006 	sub	$0xffff, %edx
   1007 	jnz	L(exit)
   1008 
   1009 	lea	(%ecx, %edi,1), %eax
   1010 	lea	9(%ecx, %esi,1), %edx
   1011 	POP (%edi)
   1012 	POP (%esi)
   1013 	jmp	L(less48bytes)
   1014 
   1015 	cfi_restore_state
   1016 	cfi_remember_state
   1017 	ALIGN (4)
   1018 L(shr_10):
   1019 	cmp	$80, %ecx
   1020 	lea	-48(%ecx), %ecx
   1021 	mov	%edx, %eax
   1022 	jae	L(shr_10_gobble)
   1023 
   1024 	movdqa	16(%esi), %xmm1
   1025 	movdqa	%xmm1, %xmm2
   1026 	palignr	$10, (%esi), %xmm1
   1027 	pcmpeqb	(%edi), %xmm1
   1028 
   1029 	movdqa	32(%esi), %xmm3
   1030 	palignr	$10,%xmm2, %xmm3
   1031 	pcmpeqb	16(%edi), %xmm3
   1032 
   1033 	pand	%xmm1, %xmm3
   1034 	pmovmskb %xmm3, %edx
   1035 	lea	32(%edi), %edi
   1036 	lea	32(%esi), %esi
   1037 	sub	$0xffff, %edx
   1038 	jnz	L(exit)
   1039 	lea	(%ecx, %edi,1), %eax
   1040 	lea	10(%ecx, %esi,1), %edx
   1041 	POP (%edi)
   1042 	POP (%esi)
   1043 	jmp	L(less48bytes)
   1044 
   1045 	cfi_restore_state
   1046 	cfi_remember_state
   1047 	ALIGN (4)
   1048 L(shr_10_gobble):
   1049 	sub	$32, %ecx
   1050 	movdqa	16(%esi), %xmm0
   1051 	palignr	$10, (%esi), %xmm0
   1052 	pcmpeqb	(%edi), %xmm0
   1053 
   1054 	movdqa	32(%esi), %xmm3
   1055 	palignr	$10, 16(%esi), %xmm3
   1056 	pcmpeqb	16(%edi), %xmm3
   1057 
   1058 L(shr_10_gobble_loop):
   1059 	pand	%xmm0, %xmm3
   1060 	sub	$32, %ecx
   1061 	pmovmskb %xmm3, %edx
   1062 	movdqa	%xmm0, %xmm1
   1063 
   1064 	movdqa	64(%esi), %xmm3
   1065 	palignr	$10,48(%esi), %xmm3
   1066 	sbb	$0xffff, %edx
   1067 	movdqa	48(%esi), %xmm0
   1068 	palignr	$10,32(%esi), %xmm0
   1069 	pcmpeqb	32(%edi), %xmm0
   1070 	lea	32(%esi), %esi
   1071 	pcmpeqb	48(%edi), %xmm3
   1072 
   1073 	lea	32(%edi), %edi
   1074 	jz	L(shr_10_gobble_loop)
   1075 	pand	%xmm0, %xmm3
   1076 
   1077 	cmp	$0, %ecx
   1078 	jge	L(shr_10_gobble_next)
   1079 	inc	%edx
   1080 	add	$32, %ecx
   1081 L(shr_10_gobble_next):
   1082 	test	%edx, %edx
   1083 	jnz	L(exit)
   1084 
   1085 	pmovmskb %xmm3, %edx
   1086 	movdqa	%xmm0, %xmm1
   1087 	lea	32(%edi), %edi
   1088 	lea	32(%esi), %esi
   1089 	sub	$0xffff, %edx
   1090 	jnz	L(exit)
   1091 
   1092 	lea	(%ecx, %edi,1), %eax
   1093 	lea	10(%ecx, %esi,1), %edx
   1094 	POP (%edi)
   1095 	POP (%esi)
   1096 	jmp	L(less48bytes)
   1097 
   1098 	cfi_restore_state
   1099 	cfi_remember_state
   1100 	ALIGN (4)
   1101 L(shr_11):
   1102 	cmp	$80, %ecx
   1103 	lea	-48(%ecx), %ecx
   1104 	mov	%edx, %eax
   1105 	jae	L(shr_11_gobble)
   1106 
   1107 	movdqa	16(%esi), %xmm1
   1108 	movdqa	%xmm1, %xmm2
   1109 	palignr	$11, (%esi), %xmm1
   1110 	pcmpeqb	(%edi), %xmm1
   1111 
   1112 	movdqa	32(%esi), %xmm3
   1113 	palignr	$11, %xmm2, %xmm3
   1114 	pcmpeqb	16(%edi), %xmm3
   1115 
   1116 	pand	%xmm1, %xmm3
   1117 	pmovmskb %xmm3, %edx
   1118 	lea	32(%edi), %edi
   1119 	lea	32(%esi), %esi
   1120 	sub	$0xffff, %edx
   1121 	jnz	L(exit)
   1122 	lea	(%ecx, %edi,1), %eax
   1123 	lea	11(%ecx, %esi,1), %edx
   1124 	POP (%edi)
   1125 	POP (%esi)
   1126 	jmp	L(less48bytes)
   1127 
   1128 	cfi_restore_state
   1129 	cfi_remember_state
   1130 	ALIGN (4)
   1131 L(shr_11_gobble):
   1132 	sub	$32, %ecx
   1133 	movdqa	16(%esi), %xmm0
   1134 	palignr	$11, (%esi), %xmm0
   1135 	pcmpeqb	(%edi), %xmm0
   1136 
   1137 	movdqa	32(%esi), %xmm3
   1138 	palignr	$11, 16(%esi), %xmm3
   1139 	pcmpeqb	16(%edi), %xmm3
   1140 
   1141 L(shr_11_gobble_loop):
   1142 	pand	%xmm0, %xmm3
   1143 	sub	$32, %ecx
   1144 	pmovmskb %xmm3, %edx
   1145 	movdqa	%xmm0, %xmm1
   1146 
   1147 	movdqa	64(%esi), %xmm3
   1148 	palignr	$11,48(%esi), %xmm3
   1149 	sbb	$0xffff, %edx
   1150 	movdqa	48(%esi), %xmm0
   1151 	palignr	$11,32(%esi), %xmm0
   1152 	pcmpeqb	32(%edi), %xmm0
   1153 	lea	32(%esi), %esi
   1154 	pcmpeqb	48(%edi), %xmm3
   1155 
   1156 	lea	32(%edi), %edi
   1157 	jz	L(shr_11_gobble_loop)
   1158 	pand	%xmm0, %xmm3
   1159 
   1160 	cmp	$0, %ecx
   1161 	jge	L(shr_11_gobble_next)
   1162 	inc	%edx
   1163 	add	$32, %ecx
   1164 L(shr_11_gobble_next):
   1165 	test	%edx, %edx
   1166 	jnz	L(exit)
   1167 
   1168 	pmovmskb %xmm3, %edx
   1169 	movdqa	%xmm0, %xmm1
   1170 	lea	32(%edi), %edi
   1171 	lea	32(%esi), %esi
   1172 	sub	$0xffff, %edx
   1173 	jnz	L(exit)
   1174 
   1175 	lea	(%ecx, %edi,1), %eax
   1176 	lea	11(%ecx, %esi,1), %edx
   1177 	POP (%edi)
   1178 	POP (%esi)
   1179 	jmp	L(less48bytes)
   1180 
   1181 	cfi_restore_state
   1182 	cfi_remember_state
   1183 	ALIGN (4)
   1184 L(shr_12):
   1185 	cmp	$80, %ecx
   1186 	lea	-48(%ecx), %ecx
   1187 	mov	%edx, %eax
   1188 	jae	L(shr_12_gobble)
   1189 
   1190 	movdqa	16(%esi), %xmm1
   1191 	movdqa	%xmm1, %xmm2
   1192 	palignr	$12, (%esi), %xmm1
   1193 	pcmpeqb	(%edi), %xmm1
   1194 
   1195 	movdqa	32(%esi), %xmm3
   1196 	palignr	$12, %xmm2, %xmm3
   1197 	pcmpeqb	16(%edi), %xmm3
   1198 
   1199 	pand	%xmm1, %xmm3
   1200 	pmovmskb %xmm3, %edx
   1201 	lea	32(%edi), %edi
   1202 	lea	32(%esi), %esi
   1203 	sub	$0xffff, %edx
   1204 	jnz	L(exit)
   1205 	lea	(%ecx, %edi,1), %eax
   1206 	lea	12(%ecx, %esi,1), %edx
   1207 	POP (%edi)
   1208 	POP (%esi)
   1209 	jmp	L(less48bytes)
   1210 
   1211 	cfi_restore_state
   1212 	cfi_remember_state
   1213 	ALIGN (4)
   1214 L(shr_12_gobble):
   1215 	sub	$32, %ecx
   1216 	movdqa	16(%esi), %xmm0
   1217 	palignr	$12, (%esi), %xmm0
   1218 	pcmpeqb	(%edi), %xmm0
   1219 
   1220 	movdqa	32(%esi), %xmm3
   1221 	palignr	$12, 16(%esi), %xmm3
   1222 	pcmpeqb	16(%edi), %xmm3
   1223 
   1224 L(shr_12_gobble_loop):
   1225 	pand	%xmm0, %xmm3
   1226 	sub	$32, %ecx
   1227 	pmovmskb %xmm3, %edx
   1228 	movdqa	%xmm0, %xmm1
   1229 
   1230 	movdqa	64(%esi), %xmm3
   1231 	palignr	$12,48(%esi), %xmm3
   1232 	sbb	$0xffff, %edx
   1233 	movdqa	48(%esi), %xmm0
   1234 	palignr	$12,32(%esi), %xmm0
   1235 	pcmpeqb	32(%edi), %xmm0
   1236 	lea	32(%esi), %esi
   1237 	pcmpeqb	48(%edi), %xmm3
   1238 
   1239 	lea	32(%edi), %edi
   1240 	jz	L(shr_12_gobble_loop)
   1241 	pand	%xmm0, %xmm3
   1242 
   1243 	cmp	$0, %ecx
   1244 	jge	L(shr_12_gobble_next)
   1245 	inc	%edx
   1246 	add	$32, %ecx
   1247 L(shr_12_gobble_next):
   1248 	test	%edx, %edx
   1249 	jnz	L(exit)
   1250 
   1251 	pmovmskb %xmm3, %edx
   1252 	movdqa	%xmm0, %xmm1
   1253 	lea	32(%edi), %edi
   1254 	lea	32(%esi), %esi
   1255 	sub	$0xffff, %edx
   1256 	jnz	L(exit)
   1257 
   1258 	lea	(%ecx, %edi,1), %eax
   1259 	lea	12(%ecx, %esi,1), %edx
   1260 	POP (%edi)
   1261 	POP (%esi)
   1262 	jmp	L(less48bytes)
   1263 
   1264 	cfi_restore_state
   1265 	cfi_remember_state
   1266 	ALIGN (4)
   1267 L(shr_13):
   1268 	cmp	$80, %ecx
   1269 	lea	-48(%ecx), %ecx
   1270 	mov	%edx, %eax
   1271 	jae	L(shr_13_gobble)
   1272 
   1273 	movdqa	16(%esi), %xmm1
   1274 	movdqa	%xmm1, %xmm2
   1275 	palignr	$13, (%esi), %xmm1
   1276 	pcmpeqb	(%edi), %xmm1
   1277 
   1278 	movdqa	32(%esi), %xmm3
   1279 	palignr	$13, %xmm2, %xmm3
   1280 	pcmpeqb	16(%edi), %xmm3
   1281 
   1282 	pand	%xmm1, %xmm3
   1283 	pmovmskb %xmm3, %edx
   1284 	lea	32(%edi), %edi
   1285 	lea	32(%esi), %esi
   1286 	sub	$0xffff, %edx
   1287 	jnz	L(exit)
   1288 	lea	(%ecx, %edi,1), %eax
   1289 	lea	13(%ecx, %esi,1), %edx
   1290 	POP (%edi)
   1291 	POP (%esi)
   1292 	jmp	L(less48bytes)
   1293 
   1294 	cfi_restore_state
   1295 	cfi_remember_state
   1296 	ALIGN (4)
   1297 L(shr_13_gobble):
   1298 	sub	$32, %ecx
   1299 	movdqa	16(%esi), %xmm0
   1300 	palignr	$13, (%esi), %xmm0
   1301 	pcmpeqb	(%edi), %xmm0
   1302 
   1303 	movdqa	32(%esi), %xmm3
   1304 	palignr	$13, 16(%esi), %xmm3
   1305 	pcmpeqb	16(%edi), %xmm3
   1306 
   1307 L(shr_13_gobble_loop):
   1308 	pand	%xmm0, %xmm3
   1309 	sub	$32, %ecx
   1310 	pmovmskb %xmm3, %edx
   1311 	movdqa	%xmm0, %xmm1
   1312 
   1313 	movdqa	64(%esi), %xmm3
   1314 	palignr	$13,48(%esi), %xmm3
   1315 	sbb	$0xffff, %edx
   1316 	movdqa	48(%esi), %xmm0
   1317 	palignr	$13,32(%esi), %xmm0
   1318 	pcmpeqb	32(%edi), %xmm0
   1319 	lea	32(%esi), %esi
   1320 	pcmpeqb	48(%edi), %xmm3
   1321 
   1322 	lea	32(%edi), %edi
   1323 	jz	L(shr_13_gobble_loop)
   1324 	pand	%xmm0, %xmm3
   1325 
   1326 	cmp	$0, %ecx
   1327 	jge	L(shr_13_gobble_next)
   1328 	inc	%edx
   1329 	add	$32, %ecx
   1330 L(shr_13_gobble_next):
   1331 	test	%edx, %edx
   1332 	jnz	L(exit)
   1333 
   1334 	pmovmskb %xmm3, %edx
   1335 	movdqa	%xmm0, %xmm1
   1336 	lea	32(%edi), %edi
   1337 	lea	32(%esi), %esi
   1338 	sub	$0xffff, %edx
   1339 	jnz	L(exit)
   1340 
   1341 	lea	(%ecx, %edi,1), %eax
   1342 	lea	13(%ecx, %esi,1), %edx
   1343 	POP (%edi)
   1344 	POP (%esi)
   1345 	jmp	L(less48bytes)
   1346 
   1347 	cfi_restore_state
   1348 	cfi_remember_state
   1349 	ALIGN (4)
   1350 L(shr_14):
   1351 	cmp	$80, %ecx
   1352 	lea	-48(%ecx), %ecx
   1353 	mov	%edx, %eax
   1354 	jae	L(shr_14_gobble)
   1355 
   1356 	movdqa	16(%esi), %xmm1
   1357 	movdqa	%xmm1, %xmm2
   1358 	palignr	$14, (%esi), %xmm1
   1359 	pcmpeqb	(%edi), %xmm1
   1360 
   1361 	movdqa	32(%esi), %xmm3
   1362 	palignr	$14, %xmm2, %xmm3
   1363 	pcmpeqb	16(%edi), %xmm3
   1364 
   1365 	pand	%xmm1, %xmm3
   1366 	pmovmskb %xmm3, %edx
   1367 	lea	32(%edi), %edi
   1368 	lea	32(%esi), %esi
   1369 	sub	$0xffff, %edx
   1370 	jnz	L(exit)
   1371 	lea	(%ecx, %edi,1), %eax
   1372 	lea	14(%ecx, %esi,1), %edx
   1373 	POP (%edi)
   1374 	POP (%esi)
   1375 	jmp	L(less48bytes)
   1376 
   1377 	cfi_restore_state
   1378 	cfi_remember_state
   1379 	ALIGN (4)
   1380 L(shr_14_gobble):
   1381 	sub	$32, %ecx
   1382 	movdqa	16(%esi), %xmm0
   1383 	palignr	$14, (%esi), %xmm0
   1384 	pcmpeqb	(%edi), %xmm0
   1385 
   1386 	movdqa	32(%esi), %xmm3
   1387 	palignr	$14, 16(%esi), %xmm3
   1388 	pcmpeqb	16(%edi), %xmm3
   1389 
   1390 L(shr_14_gobble_loop):
   1391 	pand	%xmm0, %xmm3
   1392 	sub	$32, %ecx
   1393 	pmovmskb %xmm3, %edx
   1394 	movdqa	%xmm0, %xmm1
   1395 
   1396 	movdqa	64(%esi), %xmm3
   1397 	palignr	$14,48(%esi), %xmm3
   1398 	sbb	$0xffff, %edx
   1399 	movdqa	48(%esi), %xmm0
   1400 	palignr	$14,32(%esi), %xmm0
   1401 	pcmpeqb	32(%edi), %xmm0
   1402 	lea	32(%esi), %esi
   1403 	pcmpeqb	48(%edi), %xmm3
   1404 
   1405 	lea	32(%edi), %edi
   1406 	jz	L(shr_14_gobble_loop)
   1407 	pand	%xmm0, %xmm3
   1408 
   1409 	cmp	$0, %ecx
   1410 	jge	L(shr_14_gobble_next)
   1411 	inc	%edx
   1412 	add	$32, %ecx
   1413 L(shr_14_gobble_next):
   1414 	test	%edx, %edx
   1415 	jnz	L(exit)
   1416 
   1417 	pmovmskb %xmm3, %edx
   1418 	movdqa	%xmm0, %xmm1
   1419 	lea	32(%edi), %edi
   1420 	lea	32(%esi), %esi
   1421 	sub	$0xffff, %edx
   1422 	jnz	L(exit)
   1423 
   1424 	lea	(%ecx, %edi,1), %eax
   1425 	lea	14(%ecx, %esi,1), %edx
   1426 	POP (%edi)
   1427 	POP (%esi)
   1428 	jmp	L(less48bytes)
   1429 
   1430 	cfi_restore_state
   1431 	cfi_remember_state
   1432 	ALIGN (4)
   1433 L(shr_15):
   1434 	cmp	$80, %ecx
   1435 	lea	-48(%ecx), %ecx
   1436 	mov	%edx, %eax
   1437 	jae	L(shr_15_gobble)
   1438 
   1439 	movdqa	16(%esi), %xmm1
   1440 	movdqa	%xmm1, %xmm2
   1441 	palignr	$15, (%esi), %xmm1
   1442 	pcmpeqb	(%edi), %xmm1
   1443 
   1444 	movdqa	32(%esi), %xmm3
   1445 	palignr	$15, %xmm2, %xmm3
   1446 	pcmpeqb	16(%edi), %xmm3
   1447 
   1448 	pand	%xmm1, %xmm3
   1449 	pmovmskb %xmm3, %edx
   1450 	lea	32(%edi), %edi
   1451 	lea	32(%esi), %esi
   1452 	sub	$0xffff, %edx
   1453 	jnz	L(exit)
   1454 	lea	(%ecx, %edi,1), %eax
   1455 	lea	15(%ecx, %esi,1), %edx
   1456 	POP (%edi)
   1457 	POP (%esi)
   1458 	jmp	L(less48bytes)
   1459 
   1460 	cfi_restore_state
   1461 	cfi_remember_state
   1462 	ALIGN (4)
   1463 L(shr_15_gobble):
   1464 	sub	$32, %ecx
   1465 	movdqa	16(%esi), %xmm0
   1466 	palignr	$15, (%esi), %xmm0
   1467 	pcmpeqb	(%edi), %xmm0
   1468 
   1469 	movdqa	32(%esi), %xmm3
   1470 	palignr	$15, 16(%esi), %xmm3
   1471 	pcmpeqb	16(%edi), %xmm3
   1472 
   1473 L(shr_15_gobble_loop):
   1474 	pand	%xmm0, %xmm3
   1475 	sub	$32, %ecx
   1476 	pmovmskb %xmm3, %edx
   1477 	movdqa	%xmm0, %xmm1
   1478 
   1479 	movdqa	64(%esi), %xmm3
   1480 	palignr	$15,48(%esi), %xmm3
   1481 	sbb	$0xffff, %edx
   1482 	movdqa	48(%esi), %xmm0
   1483 	palignr	$15,32(%esi), %xmm0
   1484 	pcmpeqb	32(%edi), %xmm0
   1485 	lea	32(%esi), %esi
   1486 	pcmpeqb	48(%edi), %xmm3
   1487 
   1488 	lea	32(%edi), %edi
   1489 	jz	L(shr_15_gobble_loop)
   1490 	pand	%xmm0, %xmm3
   1491 
   1492 	cmp	$0, %ecx
   1493 	jge	L(shr_15_gobble_next)
   1494 	inc	%edx
   1495 	add	$32, %ecx
   1496 L(shr_15_gobble_next):
   1497 	test	%edx, %edx
   1498 	jnz	L(exit)
   1499 
   1500 	pmovmskb %xmm3, %edx
   1501 	movdqa	%xmm0, %xmm1
   1502 	lea	32(%edi), %edi
   1503 	lea	32(%esi), %esi
   1504 	sub	$0xffff, %edx
   1505 	jnz	L(exit)
   1506 
   1507 	lea	(%ecx, %edi,1), %eax
   1508 	lea	15(%ecx, %esi,1), %edx
   1509 	POP (%edi)
   1510 	POP (%esi)
   1511 	jmp	L(less48bytes)
   1512 
   1513 	cfi_restore_state
   1514 	cfi_remember_state
   1515 	ALIGN (4)
   1516 L(exit):
   1517 	pmovmskb %xmm1, %ebx
   1518 	sub	$0xffff, %ebx
   1519 	jz	L(first16bytes)
   1520 	lea	-16(%esi), %esi
   1521 	lea	-16(%edi), %edi
   1522 	mov	%ebx, %edx
   1523 L(first16bytes):
   1524 	add	%eax, %esi
   1525 L(less16bytes):
   1526 	test	%dl, %dl
   1527 	jz	L(next_24_bytes)
   1528 
   1529 	test	$0x01, %dl
   1530 	jnz	L(Byte16)
   1531 
   1532 	test	$0x02, %dl
   1533 	jnz	L(Byte17)
   1534 
   1535 	test	$0x04, %dl
   1536 	jnz	L(Byte18)
   1537 
   1538 	test	$0x08, %dl
   1539 	jnz	L(Byte19)
   1540 
   1541 	test	$0x10, %dl
   1542 	jnz	L(Byte20)
   1543 
   1544 	test	$0x20, %dl
   1545 	jnz	L(Byte21)
   1546 
   1547 	test	$0x40, %dl
   1548 	jnz	L(Byte22)
   1549 L(Byte23):
   1550 	movzbl	 -9(%edi), %eax
   1551 	movzbl	 -9(%esi), %edx
   1552 	sub	%edx, %eax
   1553 	RETURN
   1554 
   1555 	ALIGN (4)
   1556 L(Byte16):
   1557 	movzbl	 -16(%edi), %eax
   1558 	movzbl	 -16(%esi), %edx
   1559 	sub	%edx, %eax
   1560 	RETURN
   1561 
   1562 	ALIGN (4)
   1563 L(Byte17):
   1564 	movzbl	 -15(%edi), %eax
   1565 	movzbl	 -15(%esi), %edx
   1566 	sub	%edx, %eax
   1567 	RETURN
   1568 
   1569 	ALIGN (4)
   1570 L(Byte18):
   1571 	movzbl	 -14(%edi), %eax
   1572 	movzbl	 -14(%esi), %edx
   1573 	sub	%edx, %eax
   1574 	RETURN
   1575 
   1576 	ALIGN (4)
   1577 L(Byte19):
   1578 	movzbl	 -13(%edi), %eax
   1579 	movzbl	 -13(%esi), %edx
   1580 	sub	%edx, %eax
   1581 	RETURN
   1582 
   1583 	ALIGN (4)
   1584 L(Byte20):
   1585 	movzbl	 -12(%edi), %eax
   1586 	movzbl	 -12(%esi), %edx
   1587 	sub	%edx, %eax
   1588 	RETURN
   1589 
   1590 	ALIGN (4)
   1591 L(Byte21):
   1592 	movzbl	 -11(%edi), %eax
   1593 	movzbl	 -11(%esi), %edx
   1594 	sub	%edx, %eax
   1595 	RETURN
   1596 
   1597 	ALIGN (4)
   1598 L(Byte22):
   1599 	movzbl	 -10(%edi), %eax
   1600 	movzbl	 -10(%esi), %edx
   1601 	sub	%edx, %eax
   1602 	RETURN
   1603 
   1604 	ALIGN (4)
   1605 L(next_24_bytes):
   1606 	lea	8(%edi), %edi
   1607 	lea	8(%esi), %esi
   1608 	test	$0x01, %dh
   1609 	jnz	L(Byte16)
   1610 
   1611 	test	$0x02, %dh
   1612 	jnz	L(Byte17)
   1613 
   1614 	test	$0x04, %dh
   1615 	jnz	L(Byte18)
   1616 
   1617 	test	$0x08, %dh
   1618 	jnz	L(Byte19)
   1619 
   1620 	test	$0x10, %dh
   1621 	jnz	L(Byte20)
   1622 
   1623 	test	$0x20, %dh
   1624 	jnz	L(Byte21)
   1625 
   1626 	test	$0x40, %dh
   1627 	jnz	L(Byte22)
   1628 
   1629 	ALIGN (4)
   1630 L(Byte31):
   1631 	movzbl	 -9(%edi), %eax
   1632 	movzbl	 -9(%esi), %edx
   1633 	sub	%edx, %eax
   1634 	RETURN_END
   1635 	CFI_PUSH (%ebx)
   1636 
   1637 	ALIGN (4)
   1638 L(more8bytes):
   1639 	cmp	$16, %ecx
   1640 	jae	L(more16bytes)
   1641 	cmp	$8, %ecx
   1642 	je	L(8bytes)
   1643 	cmp	$9, %ecx
   1644 	je	L(9bytes)
   1645 	cmp	$10, %ecx
   1646 	je	L(10bytes)
   1647 	cmp	$11, %ecx
   1648 	je	L(11bytes)
   1649 	cmp	$12, %ecx
   1650 	je	L(12bytes)
   1651 	cmp	$13, %ecx
   1652 	je	L(13bytes)
   1653 	cmp	$14, %ecx
   1654 	je	L(14bytes)
   1655 	jmp	L(15bytes)
   1656 
   1657 	ALIGN (4)
   1658 L(more16bytes):
   1659 	cmp	$24, %ecx
   1660 	jae	L(more24bytes)
   1661 	cmp	$16, %ecx
   1662 	je	L(16bytes)
   1663 	cmp	$17, %ecx
   1664 	je	L(17bytes)
   1665 	cmp	$18, %ecx
   1666 	je	L(18bytes)
   1667 	cmp	$19, %ecx
   1668 	je	L(19bytes)
   1669 	cmp	$20, %ecx
   1670 	je	L(20bytes)
   1671 	cmp	$21, %ecx
   1672 	je	L(21bytes)
   1673 	cmp	$22, %ecx
   1674 	je	L(22bytes)
   1675 	jmp	L(23bytes)
   1676 
   1677 	ALIGN (4)
   1678 L(more24bytes):
   1679 	cmp	$32, %ecx
   1680 	jae	L(more32bytes)
   1681 	cmp	$24, %ecx
   1682 	je	L(24bytes)
   1683 	cmp	$25, %ecx
   1684 	je	L(25bytes)
   1685 	cmp	$26, %ecx
   1686 	je	L(26bytes)
   1687 	cmp	$27, %ecx
   1688 	je	L(27bytes)
   1689 	cmp	$28, %ecx
   1690 	je	L(28bytes)
   1691 	cmp	$29, %ecx
   1692 	je	L(29bytes)
   1693 	cmp	$30, %ecx
   1694 	je	L(30bytes)
   1695 	jmp	L(31bytes)
   1696 
   1697 	ALIGN (4)
   1698 L(more32bytes):
   1699 	cmp	$40, %ecx
   1700 	jae	L(more40bytes)
   1701 	cmp	$32, %ecx
   1702 	je	L(32bytes)
   1703 	cmp	$33, %ecx
   1704 	je	L(33bytes)
   1705 	cmp	$34, %ecx
   1706 	je	L(34bytes)
   1707 	cmp	$35, %ecx
   1708 	je	L(35bytes)
   1709 	cmp	$36, %ecx
   1710 	je	L(36bytes)
   1711 	cmp	$37, %ecx
   1712 	je	L(37bytes)
   1713 	cmp	$38, %ecx
   1714 	je	L(38bytes)
   1715 	jmp	L(39bytes)
   1716 
   1717 	ALIGN (4)
   1718 L(more40bytes):
   1719 	cmp	$40, %ecx
   1720 	je	L(40bytes)
   1721 	cmp	$41, %ecx
   1722 	je	L(41bytes)
   1723 	cmp	$42, %ecx
   1724 	je	L(42bytes)
   1725 	cmp	$43, %ecx
   1726 	je	L(43bytes)
   1727 	cmp	$44, %ecx
   1728 	je	L(44bytes)
   1729 	cmp	$45, %ecx
   1730 	je	L(45bytes)
   1731 	cmp	$46, %ecx
   1732 	je	L(46bytes)
   1733 	jmp	L(47bytes)
   1734 
   1735 	ALIGN (4)
   1736 L(less48bytes):
   1737 	cmp	$8, %ecx
   1738 	jae	L(more8bytes)
   1739 	cmp	$2, %ecx
   1740 	je	L(2bytes)
   1741 	cmp	$3, %ecx
   1742 	je	L(3bytes)
   1743 	cmp	$4, %ecx
   1744 	je	L(4bytes)
   1745 	cmp	$5, %ecx
   1746 	je	L(5bytes)
   1747 	cmp	$6, %ecx
   1748 	je	L(6bytes)
   1749 	jmp	L(7bytes)
   1750 
   1751 
   1752 	ALIGN (4)
   1753 L(44bytes):
   1754 	mov	-44(%eax), %ecx
   1755 	mov	-44(%edx), %ebx
   1756 	cmp	%ebx, %ecx
   1757 	jne	L(find_diff)
   1758 L(40bytes):
   1759 	mov	-40(%eax), %ecx
   1760 	mov	-40(%edx), %ebx
   1761 	cmp	%ebx, %ecx
   1762 	jne	L(find_diff)
   1763 L(36bytes):
   1764 	mov	-36(%eax), %ecx
   1765 	mov	-36(%edx), %ebx
   1766 	cmp	%ebx, %ecx
   1767 	jne	L(find_diff)
   1768 L(32bytes):
   1769 	mov	-32(%eax), %ecx
   1770 	mov	-32(%edx), %ebx
   1771 	cmp	%ebx, %ecx
   1772 	jne	L(find_diff)
   1773 L(28bytes):
   1774 	mov	-28(%eax), %ecx
   1775 	mov	-28(%edx), %ebx
   1776 	cmp	%ebx, %ecx
   1777 	jne	L(find_diff)
   1778 L(24bytes):
   1779 	mov	-24(%eax), %ecx
   1780 	mov	-24(%edx), %ebx
   1781 	cmp	%ebx, %ecx
   1782 	jne	L(find_diff)
   1783 L(20bytes):
   1784 	mov	-20(%eax), %ecx
   1785 	mov	-20(%edx), %ebx
   1786 	cmp	%ebx, %ecx
   1787 	jne	L(find_diff)
   1788 L(16bytes):
   1789 	mov	-16(%eax), %ecx
   1790 	mov	-16(%edx), %ebx
   1791 	cmp	%ebx, %ecx
   1792 	jne	L(find_diff)
   1793 L(12bytes):
   1794 	mov	-12(%eax), %ecx
   1795 	mov	-12(%edx), %ebx
   1796 	cmp	%ebx, %ecx
   1797 	jne	L(find_diff)
   1798 L(8bytes):
   1799 	mov	-8(%eax), %ecx
   1800 	mov	-8(%edx), %ebx
   1801 	cmp	%ebx, %ecx
   1802 	jne	L(find_diff)
   1803 L(4bytes):
   1804 	mov	-4(%eax), %ecx
   1805 	mov	-4(%edx), %ebx
   1806 	cmp	%ebx, %ecx
   1807 	mov	$0, %eax
   1808 	jne	L(find_diff)
   1809 	POP (%ebx)
   1810 	ret
   1811 	CFI_PUSH (%ebx)
   1812 
   1813 	ALIGN (4)
   1814 L(45bytes):
   1815 	mov	-45(%eax), %ecx
   1816 	mov	-45(%edx), %ebx
   1817 	cmp	%ebx, %ecx
   1818 	jne	L(find_diff)
   1819 L(41bytes):
   1820 	mov	-41(%eax), %ecx
   1821 	mov	-41(%edx), %ebx
   1822 	cmp	%ebx, %ecx
   1823 	jne	L(find_diff)
   1824 L(37bytes):
   1825 	mov	-37(%eax), %ecx
   1826 	mov	-37(%edx), %ebx
   1827 	cmp	%ebx, %ecx
   1828 	jne	L(find_diff)
   1829 L(33bytes):
   1830 	mov	-33(%eax), %ecx
   1831 	mov	-33(%edx), %ebx
   1832 	cmp	%ebx, %ecx
   1833 	jne	L(find_diff)
   1834 L(29bytes):
   1835 	mov	-29(%eax), %ecx
   1836 	mov	-29(%edx), %ebx
   1837 	cmp	%ebx, %ecx
   1838 	jne	L(find_diff)
   1839 L(25bytes):
   1840 	mov	-25(%eax), %ecx
   1841 	mov	-25(%edx), %ebx
   1842 	cmp	%ebx, %ecx
   1843 	jne	L(find_diff)
   1844 L(21bytes):
   1845 	mov	-21(%eax), %ecx
   1846 	mov	-21(%edx), %ebx
   1847 	cmp	%ebx, %ecx
   1848 	jne	L(find_diff)
   1849 L(17bytes):
   1850 	mov	-17(%eax), %ecx
   1851 	mov	-17(%edx), %ebx
   1852 	cmp	%ebx, %ecx
   1853 	jne	L(find_diff)
   1854 L(13bytes):
   1855 	mov	-13(%eax), %ecx
   1856 	mov	-13(%edx), %ebx
   1857 	cmp	%ebx, %ecx
   1858 	jne	L(find_diff)
   1859 L(9bytes):
   1860 	mov	-9(%eax), %ecx
   1861 	mov	-9(%edx), %ebx
   1862 	cmp	%ebx, %ecx
   1863 	jne	L(find_diff)
   1864 L(5bytes):
   1865 	mov	-5(%eax), %ecx
   1866 	mov	-5(%edx), %ebx
   1867 	cmp	%ebx, %ecx
   1868 	jne	L(find_diff)
   1869 	movzbl	-1(%eax), %ecx
   1870 	cmp	-1(%edx), %cl
   1871 	mov	$0, %eax
   1872 	jne	L(end)
   1873 	POP (%ebx)
   1874 	ret
   1875 	CFI_PUSH (%ebx)
   1876 
   1877 	ALIGN (4)
   1878 L(46bytes):
   1879 	mov	-46(%eax), %ecx
   1880 	mov	-46(%edx), %ebx
   1881 	cmp	%ebx, %ecx
   1882 	jne	L(find_diff)
   1883 L(42bytes):
   1884 	mov	-42(%eax), %ecx
   1885 	mov	-42(%edx), %ebx
   1886 	cmp	%ebx, %ecx
   1887 	jne	L(find_diff)
   1888 L(38bytes):
   1889 	mov	-38(%eax), %ecx
   1890 	mov	-38(%edx), %ebx
   1891 	cmp	%ebx, %ecx
   1892 	jne	L(find_diff)
   1893 L(34bytes):
   1894 	mov	-34(%eax), %ecx
   1895 	mov	-34(%edx), %ebx
   1896 	cmp	%ebx, %ecx
   1897 	jne	L(find_diff)
   1898 L(30bytes):
   1899 	mov	-30(%eax), %ecx
   1900 	mov	-30(%edx), %ebx
   1901 	cmp	%ebx, %ecx
   1902 	jne	L(find_diff)
   1903 L(26bytes):
   1904 	mov	-26(%eax), %ecx
   1905 	mov	-26(%edx), %ebx
   1906 	cmp	%ebx, %ecx
   1907 	jne	L(find_diff)
   1908 L(22bytes):
   1909 	mov	-22(%eax), %ecx
   1910 	mov	-22(%edx), %ebx
   1911 	cmp	%ebx, %ecx
   1912 	jne	L(find_diff)
   1913 L(18bytes):
   1914 	mov	-18(%eax), %ecx
   1915 	mov	-18(%edx), %ebx
   1916 	cmp	%ebx, %ecx
   1917 	jne	L(find_diff)
   1918 L(14bytes):
   1919 	mov	-14(%eax), %ecx
   1920 	mov	-14(%edx), %ebx
   1921 	cmp	%ebx, %ecx
   1922 	jne	L(find_diff)
   1923 L(10bytes):
   1924 	mov	-10(%eax), %ecx
   1925 	mov	-10(%edx), %ebx
   1926 	cmp	%ebx, %ecx
   1927 	jne	L(find_diff)
   1928 L(6bytes):
   1929 	mov	-6(%eax), %ecx
   1930 	mov	-6(%edx), %ebx
   1931 	cmp	%ebx, %ecx
   1932 	jne	L(find_diff)
   1933 L(2bytes):
   1934 	movzwl	-2(%eax), %ecx
   1935 	movzwl	-2(%edx), %ebx
   1936 	cmp	%bl, %cl
   1937 	jne	L(end)
   1938 	cmp	%bh, %ch
   1939 	mov	$0, %eax
   1940 	jne	L(end)
   1941 	POP (%ebx)
   1942 	ret
   1943 	CFI_PUSH (%ebx)
   1944 
   1945 	ALIGN (4)
   1946 L(47bytes):
   1947 	movl	-47(%eax), %ecx
   1948 	movl	-47(%edx), %ebx
   1949 	cmp	%ebx, %ecx
   1950 	jne	L(find_diff)
   1951 L(43bytes):
   1952 	movl	-43(%eax), %ecx
   1953 	movl	-43(%edx), %ebx
   1954 	cmp	%ebx, %ecx
   1955 	jne	L(find_diff)
   1956 L(39bytes):
   1957 	movl	-39(%eax), %ecx
   1958 	movl	-39(%edx), %ebx
   1959 	cmp	%ebx, %ecx
   1960 	jne	L(find_diff)
   1961 L(35bytes):
   1962 	movl	-35(%eax), %ecx
   1963 	movl	-35(%edx), %ebx
   1964 	cmp	%ebx, %ecx
   1965 	jne	L(find_diff)
   1966 L(31bytes):
   1967 	movl	-31(%eax), %ecx
   1968 	movl	-31(%edx), %ebx
   1969 	cmp	%ebx, %ecx
   1970 	jne	L(find_diff)
   1971 L(27bytes):
   1972 	movl	-27(%eax), %ecx
   1973 	movl	-27(%edx), %ebx
   1974 	cmp	%ebx, %ecx
   1975 	jne	L(find_diff)
   1976 L(23bytes):
   1977 	movl	-23(%eax), %ecx
   1978 	movl	-23(%edx), %ebx
   1979 	cmp	%ebx, %ecx
   1980 	jne	L(find_diff)
   1981 L(19bytes):
   1982 	movl	-19(%eax), %ecx
   1983 	movl	-19(%edx), %ebx
   1984 	cmp	%ebx, %ecx
   1985 	jne	L(find_diff)
   1986 L(15bytes):
   1987 	movl	-15(%eax), %ecx
   1988 	movl	-15(%edx), %ebx
   1989 	cmp	%ebx, %ecx
   1990 	jne	L(find_diff)
   1991 L(11bytes):
   1992 	movl	-11(%eax), %ecx
   1993 	movl	-11(%edx), %ebx
   1994 	cmp	%ebx, %ecx
   1995 	jne	L(find_diff)
   1996 L(7bytes):
   1997 	movl	-7(%eax), %ecx
   1998 	movl	-7(%edx), %ebx
   1999 	cmp	%ebx, %ecx
   2000 	jne	L(find_diff)
   2001 L(3bytes):
   2002 	movzwl	-3(%eax), %ecx
   2003 	movzwl	-3(%edx), %ebx
   2004 	cmpb	%bl, %cl
   2005 	jne	L(end)
   2006 	cmp	%bx, %cx
   2007 	jne	L(end)
   2008 	movzbl	-1(%eax), %eax
   2009 	cmpb	-1(%edx), %al
   2010 	mov	$0, %eax
   2011 	jne	L(end)
   2012 	POP (%ebx)
   2013 	ret
   2014 	CFI_PUSH (%ebx)
   2015 
   2016 	ALIGN (4)
   2017 L(find_diff):
   2018 	cmpb	%bl, %cl
   2019 	jne	L(end)
   2020 	cmp	%bx, %cx
   2021 	jne	L(end)
   2022 	shr	$16,%ecx
   2023 	shr	$16,%ebx
   2024 	cmp	%bl, %cl
   2025 	jne	L(end)
   2026 	cmp	%bx, %cx
   2027 L(end):
   2028 	POP (%ebx)
   2029 	mov	$1, %eax
   2030 	ja	L(bigger)
   2031 	neg	%eax
   2032 L(bigger):
   2033 	ret
   2034 
   2035 END (MEMCMP)
   2036