Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2010, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef MEMCMP
     32 # define MEMCMP		ssse3_memcmp3_new
     33 #endif
     34 
     35 #ifndef L
     36 # define L(label)	.L##label
     37 #endif
     38 
     39 #ifndef ALIGN
     40 # define ALIGN(n)	.p2align n
     41 #endif
     42 
     43 #ifndef cfi_startproc
     44 # define cfi_startproc			.cfi_startproc
     45 #endif
     46 
     47 #ifndef cfi_endproc
     48 # define cfi_endproc			.cfi_endproc
     49 #endif
     50 
     51 #ifndef cfi_rel_offset
     52 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     53 #endif
     54 
     55 #ifndef cfi_restore
     56 # define cfi_restore(reg)		.cfi_restore (reg)
     57 #endif
     58 
     59 #ifndef cfi_adjust_cfa_offset
     60 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     61 #endif
     62 
     63 #ifndef ENTRY
     64 # define ENTRY(name)			\
     65 	.type name,  @function; 	\
     66 	.globl name;			\
     67 	.p2align 4;			\
     68 name:					\
     69 	cfi_startproc
     70 #endif
     71 
     72 #ifndef END
     73 # define END(name)			\
     74 	cfi_endproc;			\
     75 	.size name, .-name
     76 #endif
     77 
     78 #define CFI_PUSH(REG)						\
     79   cfi_adjust_cfa_offset (4);					\
     80   cfi_rel_offset (REG, 0)
     81 
     82 #define CFI_POP(REG)						\
     83   cfi_adjust_cfa_offset (-4);					\
     84   cfi_restore (REG)
     85 
     86 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     87 #define POP(REG)	popl REG; CFI_POP (REG)
     88 
     89 #define PARMS		4
     90 #define BLK1		PARMS
     91 #define BLK2		BLK1+4
     92 #define LEN		BLK2+4
     93 #define RETURN_END	POP (%edi); POP (%esi); POP (%ebx); ret
     94 #define RETURN		RETURN_END; CFI_PUSH (%ebx); CFI_PUSH (%edi); \
     95 			CFI_PUSH (%esi)
     96 
     97 	.section .text.ssse3,"ax",@progbits
     98 ENTRY (MEMCMP)
     99 	movl	LEN(%esp), %ecx
    100 	movl	BLK1(%esp), %eax
    101 	cmp	$48, %ecx
    102 	movl	BLK2(%esp), %edx
    103 	jae	L(48bytesormore)
    104 	cmp	$1, %ecx
    105 	jbe	L(less1bytes)
    106 	PUSH (%ebx)
    107 	add	%ecx, %edx
    108 	add	%ecx, %eax
    109 	jmp	L(less48bytes)
    110 
    111 	CFI_POP (%ebx)
    112 	ALIGN (4)
    113 L(less1bytes):
    114 	jb	L(zero)
    115 	movb	(%eax), %cl
    116 	cmp	(%edx), %cl
    117 	je	L(zero)
    118 	mov	$1, %eax
    119 	ja	L(1bytesend)
    120 	neg	%eax
    121 L(1bytesend):
    122 	ret
    123 
    124 	ALIGN (4)
    125 L(zero):
    126 	mov	$0, %eax
    127 	ret
    128 
    129 	ALIGN (4)
    130 L(48bytesormore):
    131 	PUSH (%ebx)
    132 	PUSH (%esi)
    133 	PUSH (%edi)
    134 	movdqu    (%eax), %xmm3
    135 	movdqu    (%edx), %xmm0
    136 	movl	%eax, %edi
    137 	movl	%edx, %esi
    138 	pcmpeqb   %xmm0, %xmm3
    139 	pmovmskb  %xmm3, %edx
    140 	lea	16(%edi), %edi
    141 
    142 	sub      $0xffff, %edx
    143 	lea	16(%esi), %esi
    144 	jnz	  L(less16bytes)
    145 	mov	%edi, %edx
    146 	and	$0xf, %edx
    147 	xor	%edx, %edi
    148 	sub	%edx, %esi
    149 	add	%edx, %ecx
    150 	mov	%esi, %edx
    151 	and	$0xf, %edx
    152 	jz	L(shr_0)
    153 	xor	%edx, %esi
    154 
    155 	cmp	$8, %edx
    156 	jae	L(next_unaligned_table)
    157 	cmp	$0, %edx
    158 	je	L(shr_0)
    159 	cmp	$1, %edx
    160 	je	L(shr_1)
    161 	cmp	$2, %edx
    162 	je	L(shr_2)
    163 	cmp	$3, %edx
    164 	je	L(shr_3)
    165 	cmp	$4, %edx
    166 	je	L(shr_4)
    167 	cmp	$5, %edx
    168 	je	L(shr_5)
    169 	cmp	$6, %edx
    170 	je	L(shr_6)
    171 	jmp	L(shr_7)
    172 
    173 	ALIGN (4)
    174 L(next_unaligned_table):
    175 	cmp	$8, %edx
    176 	je	L(shr_8)
    177 	cmp	$9, %edx
    178 	je	L(shr_9)
    179 	cmp	$10, %edx
    180 	je	L(shr_10)
    181 	cmp	$11, %edx
    182 	je	L(shr_11)
    183 	cmp	$12, %edx
    184 	je	L(shr_12)
    185 	cmp	$13, %edx
    186 	je	L(shr_13)
    187 	cmp	$14, %edx
    188 	je	L(shr_14)
    189 	jmp	L(shr_15)
    190 
    191 	ALIGN (4)
    192 L(shr_0):
    193 	cmp	$80, %ecx
    194 	jae	L(shr_0_gobble)
    195 	lea	-48(%ecx), %ecx
    196 	xor	%eax, %eax
    197 	movaps	(%esi), %xmm1
    198 	pcmpeqb	(%edi), %xmm1
    199 	movaps	16(%esi), %xmm2
    200 	pcmpeqb	16(%edi), %xmm2
    201 	pand	%xmm1, %xmm2
    202 	pmovmskb %xmm2, %edx
    203 	add	$32, %edi
    204 	add	$32, %esi
    205 	sub	$0xffff, %edx
    206 	jnz	L(exit)
    207 
    208 	lea	(%ecx, %edi,1), %eax
    209 	lea	(%ecx, %esi,1), %edx
    210 	POP (%edi)
    211 	POP (%esi)
    212 	jmp	L(less48bytes)
    213 
    214 	CFI_PUSH (%esi)
    215 	CFI_PUSH (%edi)
    216 	ALIGN (4)
    217 L(shr_0_gobble):
    218 	lea	-48(%ecx), %ecx
    219 	movdqa	(%esi), %xmm0
    220 	xor	%eax, %eax
    221 	pcmpeqb	(%edi), %xmm0
    222 	sub	$32, %ecx
    223 	movdqa	16(%esi), %xmm2
    224 	pcmpeqb	16(%edi), %xmm2
    225 L(shr_0_gobble_loop):
    226 	pand	%xmm0, %xmm2
    227 	sub	$32, %ecx
    228 	pmovmskb %xmm2, %edx
    229 	movdqa	%xmm0, %xmm1
    230 	movdqa	32(%esi), %xmm0
    231 	movdqa	48(%esi), %xmm2
    232 	sbb	$0xffff, %edx
    233 	pcmpeqb	32(%edi), %xmm0
    234 	pcmpeqb	48(%edi), %xmm2
    235 	lea	32(%edi), %edi
    236 	lea	32(%esi), %esi
    237 	jz	L(shr_0_gobble_loop)
    238 
    239 	pand	%xmm0, %xmm2
    240 	cmp	$0, %ecx
    241 	jge	L(shr_0_gobble_loop_next)
    242 	inc	%edx
    243 	add	$32, %ecx
    244 L(shr_0_gobble_loop_next):
    245 	test	%edx, %edx
    246 	jnz	L(exit)
    247 
    248 	pmovmskb %xmm2, %edx
    249 	movdqa	%xmm0, %xmm1
    250 	lea	32(%edi), %edi
    251 	lea	32(%esi), %esi
    252 	sub	$0xffff, %edx
    253 	jnz	L(exit)
    254 	lea	(%ecx, %edi,1), %eax
    255 	lea	(%ecx, %esi,1), %edx
    256 	POP (%edi)
    257 	POP (%esi)
    258 	jmp	L(less48bytes)
    259 
    260 	CFI_PUSH (%esi)
    261 	CFI_PUSH (%edi)
    262 	ALIGN (4)
    263 L(shr_1):
    264 	cmp	$80, %ecx
    265 	lea	-48(%ecx), %ecx
    266 	mov	%edx, %eax
    267 	jae	L(shr_1_gobble)
    268 
    269 	movdqa	16(%esi), %xmm1
    270 	movdqa	%xmm1, %xmm2
    271 	palignr	$1,(%esi), %xmm1
    272 	pcmpeqb	(%edi), %xmm1
    273 
    274 	movdqa	32(%esi), %xmm3
    275 	palignr	$1,%xmm2, %xmm3
    276 	pcmpeqb	16(%edi), %xmm3
    277 
    278 	pand	%xmm1, %xmm3
    279 	pmovmskb %xmm3, %edx
    280 	lea	32(%edi), %edi
    281 	lea	32(%esi), %esi
    282 	sub	$0xffff, %edx
    283 	jnz	L(exit)
    284 	lea	(%ecx, %edi,1), %eax
    285 	lea	1(%ecx, %esi,1), %edx
    286 	POP (%edi)
    287 	POP (%esi)
    288 	jmp	L(less48bytes)
    289 
    290 	CFI_PUSH (%esi)
    291 	CFI_PUSH (%edi)
    292 	ALIGN (4)
    293 L(shr_1_gobble):
    294 	sub	$32, %ecx
    295 	movdqa	16(%esi), %xmm0
    296 	palignr	$1,(%esi), %xmm0
    297 	pcmpeqb	(%edi), %xmm0
    298 
    299 	movdqa	32(%esi), %xmm3
    300 	palignr	$1,16(%esi), %xmm3
    301 	pcmpeqb	16(%edi), %xmm3
    302 
    303 L(shr_1_gobble_loop):
    304 	pand	%xmm0, %xmm3
    305 	sub	$32, %ecx
    306 	pmovmskb %xmm3, %edx
    307 	movdqa	%xmm0, %xmm1
    308 
    309 	movdqa	64(%esi), %xmm3
    310 	palignr	$1,48(%esi), %xmm3
    311 	sbb	$0xffff, %edx
    312 	movdqa	48(%esi), %xmm0
    313 	palignr	$1,32(%esi), %xmm0
    314 	pcmpeqb	32(%edi), %xmm0
    315 	lea	32(%esi), %esi
    316 	pcmpeqb	48(%edi), %xmm3
    317 
    318 	lea	32(%edi), %edi
    319 	jz	L(shr_1_gobble_loop)
    320 	pand	%xmm0, %xmm3
    321 
    322 	cmp	$0, %ecx
    323 	jge	L(shr_1_gobble_next)
    324 	inc	%edx
    325 	add	$32, %ecx
    326 L(shr_1_gobble_next):
    327 	test	%edx, %edx
    328 	jnz	L(exit)
    329 
    330 	pmovmskb %xmm3, %edx
    331 	movdqa	%xmm0, %xmm1
    332 	lea	32(%edi), %edi
    333 	lea	32(%esi), %esi
    334 	sub	$0xffff, %edx
    335 	jnz	L(exit)
    336 
    337 	lea	(%ecx, %edi,1), %eax
    338 	lea	1(%ecx, %esi,1), %edx
    339 	POP (%edi)
    340 	POP (%esi)
    341 	jmp	L(less48bytes)
    342 
    343 	CFI_PUSH (%esi)
    344 	CFI_PUSH (%edi)
    345 	ALIGN (4)
    346 L(shr_2):
    347 	cmp	$80, %ecx
    348 	lea	-48(%ecx), %ecx
    349 	mov	%edx, %eax
    350 	jae	L(shr_2_gobble)
    351 
    352 	movdqa	16(%esi), %xmm1
    353 	movdqa	%xmm1, %xmm2
    354 	palignr	$2,(%esi), %xmm1
    355 	pcmpeqb	(%edi), %xmm1
    356 
    357 	movdqa	32(%esi), %xmm3
    358 	palignr	$2,%xmm2, %xmm3
    359 	pcmpeqb	16(%edi), %xmm3
    360 
    361 	pand	%xmm1, %xmm3
    362 	pmovmskb %xmm3, %edx
    363 	lea	32(%edi), %edi
    364 	lea	32(%esi), %esi
    365 	sub	$0xffff, %edx
    366 	jnz	L(exit)
    367 	lea	(%ecx, %edi,1), %eax
    368 	lea	2(%ecx, %esi,1), %edx
    369 	POP (%edi)
    370 	POP (%esi)
    371 	jmp	L(less48bytes)
    372 
    373 	CFI_PUSH (%esi)
    374 	CFI_PUSH (%edi)
    375 	ALIGN (4)
    376 L(shr_2_gobble):
    377 	sub	$32, %ecx
    378 	movdqa	16(%esi), %xmm0
    379 	palignr	$2,(%esi), %xmm0
    380 	pcmpeqb	(%edi), %xmm0
    381 
    382 	movdqa	32(%esi), %xmm3
    383 	palignr	$2,16(%esi), %xmm3
    384 	pcmpeqb	16(%edi), %xmm3
    385 
    386 L(shr_2_gobble_loop):
    387 	pand	%xmm0, %xmm3
    388 	sub	$32, %ecx
    389 	pmovmskb %xmm3, %edx
    390 	movdqa	%xmm0, %xmm1
    391 
    392 	movdqa	64(%esi), %xmm3
    393 	palignr	$2,48(%esi), %xmm3
    394 	sbb	$0xffff, %edx
    395 	movdqa	48(%esi), %xmm0
    396 	palignr	$2,32(%esi), %xmm0
    397 	pcmpeqb	32(%edi), %xmm0
    398 	lea	32(%esi), %esi
    399 	pcmpeqb	48(%edi), %xmm3
    400 
    401 	lea	32(%edi), %edi
    402 	jz	L(shr_2_gobble_loop)
    403 	pand	%xmm0, %xmm3
    404 
    405 	cmp	$0, %ecx
    406 	jge	L(shr_2_gobble_next)
    407 	inc	%edx
    408 	add	$32, %ecx
    409 L(shr_2_gobble_next):
    410 	test	%edx, %edx
    411 	jnz	L(exit)
    412 
    413 	pmovmskb %xmm3, %edx
    414 	movdqa	%xmm0, %xmm1
    415 	lea	32(%edi), %edi
    416 	lea	32(%esi), %esi
    417 	sub	$0xffff, %edx
    418 	jnz	L(exit)
    419 
    420 	lea	(%ecx, %edi,1), %eax
    421 	lea	2(%ecx, %esi,1), %edx
    422 	POP (%edi)
    423 	POP (%esi)
    424 	jmp	L(less48bytes)
    425 
    426 	CFI_PUSH (%esi)
    427 	CFI_PUSH (%edi)
    428 	ALIGN (4)
    429 L(shr_3):
    430 	cmp	$80, %ecx
    431 	lea	-48(%ecx), %ecx
    432 	mov	%edx, %eax
    433 	jae	L(shr_3_gobble)
    434 
    435 	movdqa	16(%esi), %xmm1
    436 	movdqa	%xmm1, %xmm2
    437 	palignr	$3,(%esi), %xmm1
    438 	pcmpeqb	(%edi), %xmm1
    439 
    440 	movdqa	32(%esi), %xmm3
    441 	palignr	$3,%xmm2, %xmm3
    442 	pcmpeqb	16(%edi), %xmm3
    443 
    444 	pand	%xmm1, %xmm3
    445 	pmovmskb %xmm3, %edx
    446 	lea	32(%edi), %edi
    447 	lea	32(%esi), %esi
    448 	sub	$0xffff, %edx
    449 	jnz	L(exit)
    450 	lea	(%ecx, %edi,1), %eax
    451 	lea	3(%ecx, %esi,1), %edx
    452 	POP (%edi)
    453 	POP (%esi)
    454 	jmp	L(less48bytes)
    455 
    456 	CFI_PUSH (%esi)
    457 	CFI_PUSH (%edi)
    458 	ALIGN (4)
    459 L(shr_3_gobble):
    460 	sub	$32, %ecx
    461 	movdqa	16(%esi), %xmm0
    462 	palignr	$3,(%esi), %xmm0
    463 	pcmpeqb	(%edi), %xmm0
    464 
    465 	movdqa	32(%esi), %xmm3
    466 	palignr	$3,16(%esi), %xmm3
    467 	pcmpeqb	16(%edi), %xmm3
    468 
    469 L(shr_3_gobble_loop):
    470 	pand	%xmm0, %xmm3
    471 	sub	$32, %ecx
    472 	pmovmskb %xmm3, %edx
    473 	movdqa	%xmm0, %xmm1
    474 
    475 	movdqa	64(%esi), %xmm3
    476 	palignr	$3,48(%esi), %xmm3
    477 	sbb	$0xffff, %edx
    478 	movdqa	48(%esi), %xmm0
    479 	palignr	$3,32(%esi), %xmm0
    480 	pcmpeqb	32(%edi), %xmm0
    481 	lea	32(%esi), %esi
    482 	pcmpeqb	48(%edi), %xmm3
    483 
    484 	lea	32(%edi), %edi
    485 	jz	L(shr_3_gobble_loop)
    486 	pand	%xmm0, %xmm3
    487 
    488 	cmp	$0, %ecx
    489 	jge	L(shr_3_gobble_next)
    490 	inc	%edx
    491 	add	$32, %ecx
    492 L(shr_3_gobble_next):
    493 	test	%edx, %edx
    494 	jnz	L(exit)
    495 
    496 	pmovmskb %xmm3, %edx
    497 	movdqa	%xmm0, %xmm1
    498 	lea	32(%edi), %edi
    499 	lea	32(%esi), %esi
    500 	sub	$0xffff, %edx
    501 	jnz	L(exit)
    502 
    503 	lea	(%ecx, %edi,1), %eax
    504 	lea	3(%ecx, %esi,1), %edx
    505 	POP (%edi)
    506 	POP (%esi)
    507 	jmp	L(less48bytes)
    508 
    509 	CFI_PUSH (%esi)
    510 	CFI_PUSH (%edi)
    511 	ALIGN (4)
    512 L(shr_4):
    513 	cmp	$80, %ecx
    514 	lea	-48(%ecx), %ecx
    515 	mov	%edx, %eax
    516 	jae	L(shr_4_gobble)
    517 
    518 	movdqa	16(%esi), %xmm1
    519 	movdqa	%xmm1, %xmm2
    520 	palignr	$4,(%esi), %xmm1
    521 	pcmpeqb	(%edi), %xmm1
    522 
    523 	movdqa	32(%esi), %xmm3
    524 	palignr	$4,%xmm2, %xmm3
    525 	pcmpeqb	16(%edi), %xmm3
    526 
    527 	pand	%xmm1, %xmm3
    528 	pmovmskb %xmm3, %edx
    529 	lea	32(%edi), %edi
    530 	lea	32(%esi), %esi
    531 	sub	$0xffff, %edx
    532 	jnz	L(exit)
    533 	lea	(%ecx, %edi,1), %eax
    534 	lea	4(%ecx, %esi,1), %edx
    535 	POP (%edi)
    536 	POP (%esi)
    537 	jmp	L(less48bytes)
    538 
    539 	CFI_PUSH (%esi)
    540 	CFI_PUSH (%edi)
    541 	ALIGN (4)
    542 L(shr_4_gobble):
    543 	sub	$32, %ecx
    544 	movdqa	16(%esi), %xmm0
    545 	palignr	$4,(%esi), %xmm0
    546 	pcmpeqb	(%edi), %xmm0
    547 
    548 	movdqa	32(%esi), %xmm3
    549 	palignr	$4,16(%esi), %xmm3
    550 	pcmpeqb	16(%edi), %xmm3
    551 
    552 L(shr_4_gobble_loop):
    553 	pand	%xmm0, %xmm3
    554 	sub	$32, %ecx
    555 	pmovmskb %xmm3, %edx
    556 	movdqa	%xmm0, %xmm1
    557 
    558 	movdqa	64(%esi), %xmm3
    559 	palignr	$4,48(%esi), %xmm3
    560 	sbb	$0xffff, %edx
    561 	movdqa	48(%esi), %xmm0
    562 	palignr	$4,32(%esi), %xmm0
    563 	pcmpeqb	32(%edi), %xmm0
    564 	lea	32(%esi), %esi
    565 	pcmpeqb	48(%edi), %xmm3
    566 
    567 	lea	32(%edi), %edi
    568 	jz	L(shr_4_gobble_loop)
    569 	pand	%xmm0, %xmm3
    570 
    571 	cmp	$0, %ecx
    572 	jge	L(shr_4_gobble_next)
    573 	inc	%edx
    574 	add	$32, %ecx
    575 L(shr_4_gobble_next):
    576 	test	%edx, %edx
    577 	jnz	L(exit)
    578 
    579 	pmovmskb %xmm3, %edx
    580 	movdqa	%xmm0, %xmm1
    581 	lea	32(%edi), %edi
    582 	lea	32(%esi), %esi
    583 	sub	$0xffff, %edx
    584 	jnz	L(exit)
    585 
    586 	lea	(%ecx, %edi,1), %eax
    587 	lea	4(%ecx, %esi,1), %edx
    588 	POP (%edi)
    589 	POP (%esi)
    590 	jmp	L(less48bytes)
    591 
    592 	CFI_PUSH (%esi)
    593 	CFI_PUSH (%edi)
    594 	ALIGN (4)
    595 L(shr_5):
    596 	cmp	$80, %ecx
    597 	lea	-48(%ecx), %ecx
    598 	mov	%edx, %eax
    599 	jae	L(shr_5_gobble)
    600 
    601 	movdqa	16(%esi), %xmm1
    602 	movdqa	%xmm1, %xmm2
    603 	palignr	$5,(%esi), %xmm1
    604 	pcmpeqb	(%edi), %xmm1
    605 
    606 	movdqa	32(%esi), %xmm3
    607 	palignr	$5,%xmm2, %xmm3
    608 	pcmpeqb	16(%edi), %xmm3
    609 
    610 	pand	%xmm1, %xmm3
    611 	pmovmskb %xmm3, %edx
    612 	lea	32(%edi), %edi
    613 	lea	32(%esi), %esi
    614 	sub	$0xffff, %edx
    615 	jnz	L(exit)
    616 	lea	(%ecx, %edi,1), %eax
    617 	lea	5(%ecx, %esi,1), %edx
    618 	POP (%edi)
    619 	POP (%esi)
    620 	jmp	L(less48bytes)
    621 
    622 	CFI_PUSH (%esi)
    623 	CFI_PUSH (%edi)
    624 	ALIGN (4)
    625 L(shr_5_gobble):
    626 	sub	$32, %ecx
    627 	movdqa	16(%esi), %xmm0
    628 	palignr	$5,(%esi), %xmm0
    629 	pcmpeqb	(%edi), %xmm0
    630 
    631 	movdqa	32(%esi), %xmm3
    632 	palignr	$5,16(%esi), %xmm3
    633 	pcmpeqb	16(%edi), %xmm3
    634 
    635 L(shr_5_gobble_loop):
    636 	pand	%xmm0, %xmm3
    637 	sub	$32, %ecx
    638 	pmovmskb %xmm3, %edx
    639 	movdqa	%xmm0, %xmm1
    640 
    641 	movdqa	64(%esi), %xmm3
    642 	palignr	$5,48(%esi), %xmm3
    643 	sbb	$0xffff, %edx
    644 	movdqa	48(%esi), %xmm0
    645 	palignr	$5,32(%esi), %xmm0
    646 	pcmpeqb	32(%edi), %xmm0
    647 	lea	32(%esi), %esi
    648 	pcmpeqb	48(%edi), %xmm3
    649 
    650 	lea	32(%edi), %edi
    651 	jz	L(shr_5_gobble_loop)
    652 	pand	%xmm0, %xmm3
    653 
    654 	cmp	$0, %ecx
    655 	jge	L(shr_5_gobble_next)
    656 	inc	%edx
    657 	add	$32, %ecx
    658 L(shr_5_gobble_next):
    659 	test	%edx, %edx
    660 	jnz	L(exit)
    661 
    662 	pmovmskb %xmm3, %edx
    663 	movdqa	%xmm0, %xmm1
    664 	lea	32(%edi), %edi
    665 	lea	32(%esi), %esi
    666 	sub	$0xffff, %edx
    667 	jnz	L(exit)
    668 
    669 	lea	(%ecx, %edi,1), %eax
    670 	lea	5(%ecx, %esi,1), %edx
    671 	POP (%edi)
    672 	POP (%esi)
    673 	jmp	L(less48bytes)
    674 
    675 	CFI_PUSH (%esi)
    676 	CFI_PUSH (%edi)
    677 	ALIGN (4)
    678 L(shr_6):
    679 	cmp	$80, %ecx
    680 	lea	-48(%ecx), %ecx
    681 	mov	%edx, %eax
    682 	jae	L(shr_6_gobble)
    683 
    684 	movdqa	16(%esi), %xmm1
    685 	movdqa	%xmm1, %xmm2
    686 	palignr	$6,(%esi), %xmm1
    687 	pcmpeqb	(%edi), %xmm1
    688 
    689 	movdqa	32(%esi), %xmm3
    690 	palignr	$6,%xmm2, %xmm3
    691 	pcmpeqb	16(%edi), %xmm3
    692 
    693 	pand	%xmm1, %xmm3
    694 	pmovmskb %xmm3, %edx
    695 	lea	32(%edi), %edi
    696 	lea	32(%esi), %esi
    697 	sub	$0xffff, %edx
    698 	jnz	L(exit)
    699 	lea	(%ecx, %edi,1), %eax
    700 	lea	6(%ecx, %esi,1), %edx
    701 	POP (%edi)
    702 	POP (%esi)
    703 	jmp	L(less48bytes)
    704 
    705 	CFI_PUSH (%esi)
    706 	CFI_PUSH (%edi)
    707 	ALIGN (4)
    708 L(shr_6_gobble):
    709 	sub	$32, %ecx
    710 	movdqa	16(%esi), %xmm0
    711 	palignr	$6,(%esi), %xmm0
    712 	pcmpeqb	(%edi), %xmm0
    713 
    714 	movdqa	32(%esi), %xmm3
    715 	palignr	$6,16(%esi), %xmm3
    716 	pcmpeqb	16(%edi), %xmm3
    717 
    718 L(shr_6_gobble_loop):
    719 	pand	%xmm0, %xmm3
    720 	sub	$32, %ecx
    721 	pmovmskb %xmm3, %edx
    722 	movdqa	%xmm0, %xmm1
    723 
    724 	movdqa	64(%esi), %xmm3
    725 	palignr	$6,48(%esi), %xmm3
    726 	sbb	$0xffff, %edx
    727 	movdqa	48(%esi), %xmm0
    728 	palignr	$6,32(%esi), %xmm0
    729 	pcmpeqb	32(%edi), %xmm0
    730 	lea	32(%esi), %esi
    731 	pcmpeqb	48(%edi), %xmm3
    732 
    733 	lea	32(%edi), %edi
    734 	jz	L(shr_6_gobble_loop)
    735 	pand	%xmm0, %xmm3
    736 
    737 	cmp	$0, %ecx
    738 	jge	L(shr_6_gobble_next)
    739 	inc	%edx
    740 	add	$32, %ecx
    741 L(shr_6_gobble_next):
    742 	test	%edx, %edx
    743 	jnz	L(exit)
    744 
    745 	pmovmskb %xmm3, %edx
    746 	movdqa	%xmm0, %xmm1
    747 	lea	32(%edi), %edi
    748 	lea	32(%esi), %esi
    749 	sub	$0xffff, %edx
    750 	jnz	L(exit)
    751 
    752 	lea	(%ecx, %edi,1), %eax
    753 	lea	6(%ecx, %esi,1), %edx
    754 	POP (%edi)
    755 	POP (%esi)
    756 	jmp	L(less48bytes)
    757 
    758 	CFI_PUSH (%esi)
    759 	CFI_PUSH (%edi)
    760 	ALIGN (4)
    761 L(shr_7):
    762 	cmp	$80, %ecx
    763 	lea	-48(%ecx), %ecx
    764 	mov	%edx, %eax
    765 	jae	L(shr_7_gobble)
    766 
    767 	movdqa	16(%esi), %xmm1
    768 	movdqa	%xmm1, %xmm2
    769 	palignr	$7,(%esi), %xmm1
    770 	pcmpeqb	(%edi), %xmm1
    771 
    772 	movdqa	32(%esi), %xmm3
    773 	palignr	$7,%xmm2, %xmm3
    774 	pcmpeqb	16(%edi), %xmm3
    775 
    776 	pand	%xmm1, %xmm3
    777 	pmovmskb %xmm3, %edx
    778 	lea	32(%edi), %edi
    779 	lea	32(%esi), %esi
    780 	sub	$0xffff, %edx
    781 	jnz	L(exit)
    782 	lea	(%ecx, %edi,1), %eax
    783 	lea	7(%ecx, %esi,1), %edx
    784 	POP (%edi)
    785 	POP (%esi)
    786 	jmp	L(less48bytes)
    787 
    788 	CFI_PUSH (%esi)
    789 	CFI_PUSH (%edi)
    790 	ALIGN (4)
    791 L(shr_7_gobble):
    792 	sub	$32, %ecx
    793 	movdqa	16(%esi), %xmm0
    794 	palignr	$7,(%esi), %xmm0
    795 	pcmpeqb	(%edi), %xmm0
    796 
    797 	movdqa	32(%esi), %xmm3
    798 	palignr	$7,16(%esi), %xmm3
    799 	pcmpeqb	16(%edi), %xmm3
    800 
    801 L(shr_7_gobble_loop):
    802 	pand	%xmm0, %xmm3
    803 	sub	$32, %ecx
    804 	pmovmskb %xmm3, %edx
    805 	movdqa	%xmm0, %xmm1
    806 
    807 	movdqa	64(%esi), %xmm3
    808 	palignr	$7,48(%esi), %xmm3
    809 	sbb	$0xffff, %edx
    810 	movdqa	48(%esi), %xmm0
    811 	palignr	$7,32(%esi), %xmm0
    812 	pcmpeqb	32(%edi), %xmm0
    813 	lea	32(%esi), %esi
    814 	pcmpeqb	48(%edi), %xmm3
    815 
    816 	lea	32(%edi), %edi
    817 	jz	L(shr_7_gobble_loop)
    818 	pand	%xmm0, %xmm3
    819 
    820 	cmp	$0, %ecx
    821 	jge	L(shr_7_gobble_next)
    822 	inc	%edx
    823 	add	$32, %ecx
    824 L(shr_7_gobble_next):
    825 	test	%edx, %edx
    826 	jnz	L(exit)
    827 
    828 	pmovmskb %xmm3, %edx
    829 	movdqa	%xmm0, %xmm1
    830 	lea	32(%edi), %edi
    831 	lea	32(%esi), %esi
    832 	sub	$0xffff, %edx
    833 	jnz	L(exit)
    834 
    835 	lea	(%ecx, %edi,1), %eax
    836 	lea	7(%ecx, %esi,1), %edx
    837 	POP (%edi)
    838 	POP (%esi)
    839 	jmp	L(less48bytes)
    840 
    841 	CFI_PUSH (%esi)
    842 	CFI_PUSH (%edi)
    843 	ALIGN (4)
    844 L(shr_8):
    845 	cmp	$80, %ecx
    846 	lea	-48(%ecx), %ecx
    847 	mov	%edx, %eax
    848 	jae	L(shr_8_gobble)
    849 
    850 	movdqa	16(%esi), %xmm1
    851 	movdqa	%xmm1, %xmm2
    852 	palignr	$8,(%esi), %xmm1
    853 	pcmpeqb	(%edi), %xmm1
    854 
    855 	movdqa	32(%esi), %xmm3
    856 	palignr	$8,%xmm2, %xmm3
    857 	pcmpeqb	16(%edi), %xmm3
    858 
    859 	pand	%xmm1, %xmm3
    860 	pmovmskb %xmm3, %edx
    861 	lea	32(%edi), %edi
    862 	lea	32(%esi), %esi
    863 	sub	$0xffff, %edx
    864 	jnz	L(exit)
    865 	lea	(%ecx, %edi,1), %eax
    866 	lea	8(%ecx, %esi,1), %edx
    867 	POP (%edi)
    868 	POP (%esi)
    869 	jmp	L(less48bytes)
    870 
    871 	CFI_PUSH (%esi)
    872 	CFI_PUSH (%edi)
    873 	ALIGN (4)
    874 L(shr_8_gobble):
    875 	sub	$32, %ecx
    876 	movdqa	16(%esi), %xmm0
    877 	palignr	$8,(%esi), %xmm0
    878 	pcmpeqb	(%edi), %xmm0
    879 
    880 	movdqa	32(%esi), %xmm3
    881 	palignr	$8,16(%esi), %xmm3
    882 	pcmpeqb	16(%edi), %xmm3
    883 
    884 L(shr_8_gobble_loop):
    885 	pand	%xmm0, %xmm3
    886 	sub	$32, %ecx
    887 	pmovmskb %xmm3, %edx
    888 	movdqa	%xmm0, %xmm1
    889 
    890 	movdqa	64(%esi), %xmm3
    891 	palignr	$8,48(%esi), %xmm3
    892 	sbb	$0xffff, %edx
    893 	movdqa	48(%esi), %xmm0
    894 	palignr	$8,32(%esi), %xmm0
    895 	pcmpeqb	32(%edi), %xmm0
    896 	lea	32(%esi), %esi
    897 	pcmpeqb	48(%edi), %xmm3
    898 
    899 	lea	32(%edi), %edi
    900 	jz	L(shr_8_gobble_loop)
    901 	pand	%xmm0, %xmm3
    902 
    903 	cmp	$0, %ecx
    904 	jge	L(shr_8_gobble_next)
    905 	inc	%edx
    906 	add	$32, %ecx
    907 L(shr_8_gobble_next):
    908 	test	%edx, %edx
    909 	jnz	L(exit)
    910 
    911 	pmovmskb %xmm3, %edx
    912 	movdqa	%xmm0, %xmm1
    913 	lea	32(%edi), %edi
    914 	lea	32(%esi), %esi
    915 	sub	$0xffff, %edx
    916 	jnz	L(exit)
    917 
    918 	lea	(%ecx, %edi,1), %eax
    919 	lea	8(%ecx, %esi,1), %edx
    920 	POP (%edi)
    921 	POP (%esi)
    922 	jmp	L(less48bytes)
    923 
    924 	CFI_PUSH (%esi)
    925 	CFI_PUSH (%edi)
    926 	ALIGN (4)
    927 L(shr_9):
    928 	cmp	$80, %ecx
    929 	lea	-48(%ecx), %ecx
    930 	mov	%edx, %eax
    931 	jae	L(shr_9_gobble)
    932 
    933 	movdqa	16(%esi), %xmm1
    934 	movdqa	%xmm1, %xmm2
    935 	palignr	$9,(%esi), %xmm1
    936 	pcmpeqb	(%edi), %xmm1
    937 
    938 	movdqa	32(%esi), %xmm3
    939 	palignr	$9,%xmm2, %xmm3
    940 	pcmpeqb	16(%edi), %xmm3
    941 
    942 	pand	%xmm1, %xmm3
    943 	pmovmskb %xmm3, %edx
    944 	lea	32(%edi), %edi
    945 	lea	32(%esi), %esi
    946 	sub	$0xffff, %edx
    947 	jnz	L(exit)
    948 	lea	(%ecx, %edi,1), %eax
    949 	lea	9(%ecx, %esi,1), %edx
    950 	POP (%edi)
    951 	POP (%esi)
    952 	jmp	L(less48bytes)
    953 
    954 	CFI_PUSH (%esi)
    955 	CFI_PUSH (%edi)
    956 	ALIGN (4)
    957 L(shr_9_gobble):
    958 	sub	$32, %ecx
    959 	movdqa	16(%esi), %xmm0
    960 	palignr	$9,(%esi), %xmm0
    961 	pcmpeqb	(%edi), %xmm0
    962 
    963 	movdqa	32(%esi), %xmm3
    964 	palignr	$9,16(%esi), %xmm3
    965 	pcmpeqb	16(%edi), %xmm3
    966 
    967 L(shr_9_gobble_loop):
    968 	pand	%xmm0, %xmm3
    969 	sub	$32, %ecx
    970 	pmovmskb %xmm3, %edx
    971 	movdqa	%xmm0, %xmm1
    972 
    973 	movdqa	64(%esi), %xmm3
    974 	palignr	$9,48(%esi), %xmm3
    975 	sbb	$0xffff, %edx
    976 	movdqa	48(%esi), %xmm0
    977 	palignr	$9,32(%esi), %xmm0
    978 	pcmpeqb	32(%edi), %xmm0
    979 	lea	32(%esi), %esi
    980 	pcmpeqb	48(%edi), %xmm3
    981 
    982 	lea	32(%edi), %edi
    983 	jz	L(shr_9_gobble_loop)
    984 	pand	%xmm0, %xmm3
    985 
    986 	cmp	$0, %ecx
    987 	jge	L(shr_9_gobble_next)
    988 	inc	%edx
    989 	add	$32, %ecx
    990 L(shr_9_gobble_next):
    991 	test	%edx, %edx
    992 	jnz	L(exit)
    993 
    994 	pmovmskb %xmm3, %edx
    995 	movdqa	%xmm0, %xmm1
    996 	lea	32(%edi), %edi
    997 	lea	32(%esi), %esi
    998 	sub	$0xffff, %edx
    999 	jnz	L(exit)
   1000 
   1001 	lea	(%ecx, %edi,1), %eax
   1002 	lea	9(%ecx, %esi,1), %edx
   1003 	POP (%edi)
   1004 	POP (%esi)
   1005 	jmp	L(less48bytes)
   1006 
   1007 	CFI_PUSH (%esi)
   1008 	CFI_PUSH (%edi)
   1009 	ALIGN (4)
   1010 L(shr_10):
   1011 	cmp	$80, %ecx
   1012 	lea	-48(%ecx), %ecx
   1013 	mov	%edx, %eax
   1014 	jae	L(shr_10_gobble)
   1015 
   1016 	movdqa	16(%esi), %xmm1
   1017 	movdqa	%xmm1, %xmm2
   1018 	palignr	$10, (%esi), %xmm1
   1019 	pcmpeqb	(%edi), %xmm1
   1020 
   1021 	movdqa	32(%esi), %xmm3
   1022 	palignr	$10,%xmm2, %xmm3
   1023 	pcmpeqb	16(%edi), %xmm3
   1024 
   1025 	pand	%xmm1, %xmm3
   1026 	pmovmskb %xmm3, %edx
   1027 	lea	32(%edi), %edi
   1028 	lea	32(%esi), %esi
   1029 	sub	$0xffff, %edx
   1030 	jnz	L(exit)
   1031 	lea	(%ecx, %edi,1), %eax
   1032 	lea	10(%ecx, %esi,1), %edx
   1033 	POP (%edi)
   1034 	POP (%esi)
   1035 	jmp	L(less48bytes)
   1036 
   1037 	CFI_PUSH (%esi)
   1038 	CFI_PUSH (%edi)
   1039 	ALIGN (4)
   1040 L(shr_10_gobble):
   1041 	sub	$32, %ecx
   1042 	movdqa	16(%esi), %xmm0
   1043 	palignr	$10, (%esi), %xmm0
   1044 	pcmpeqb	(%edi), %xmm0
   1045 
   1046 	movdqa	32(%esi), %xmm3
   1047 	palignr	$10, 16(%esi), %xmm3
   1048 	pcmpeqb	16(%edi), %xmm3
   1049 
   1050 L(shr_10_gobble_loop):
   1051 	pand	%xmm0, %xmm3
   1052 	sub	$32, %ecx
   1053 	pmovmskb %xmm3, %edx
   1054 	movdqa	%xmm0, %xmm1
   1055 
   1056 	movdqa	64(%esi), %xmm3
   1057 	palignr	$10,48(%esi), %xmm3
   1058 	sbb	$0xffff, %edx
   1059 	movdqa	48(%esi), %xmm0
   1060 	palignr	$10,32(%esi), %xmm0
   1061 	pcmpeqb	32(%edi), %xmm0
   1062 	lea	32(%esi), %esi
   1063 	pcmpeqb	48(%edi), %xmm3
   1064 
   1065 	lea	32(%edi), %edi
   1066 	jz	L(shr_10_gobble_loop)
   1067 	pand	%xmm0, %xmm3
   1068 
   1069 	cmp	$0, %ecx
   1070 	jge	L(shr_10_gobble_next)
   1071 	inc	%edx
   1072 	add	$32, %ecx
   1073 L(shr_10_gobble_next):
   1074 	test	%edx, %edx
   1075 	jnz	L(exit)
   1076 
   1077 	pmovmskb %xmm3, %edx
   1078 	movdqa	%xmm0, %xmm1
   1079 	lea	32(%edi), %edi
   1080 	lea	32(%esi), %esi
   1081 	sub	$0xffff, %edx
   1082 	jnz	L(exit)
   1083 
   1084 	lea	(%ecx, %edi,1), %eax
   1085 	lea	10(%ecx, %esi,1), %edx
   1086 	POP (%edi)
   1087 	POP (%esi)
   1088 	jmp	L(less48bytes)
   1089 
   1090 	CFI_PUSH (%esi)
   1091 	CFI_PUSH (%edi)
   1092 	ALIGN (4)
   1093 L(shr_11):
   1094 	cmp	$80, %ecx
   1095 	lea	-48(%ecx), %ecx
   1096 	mov	%edx, %eax
   1097 	jae	L(shr_11_gobble)
   1098 
   1099 	movdqa	16(%esi), %xmm1
   1100 	movdqa	%xmm1, %xmm2
   1101 	palignr	$11, (%esi), %xmm1
   1102 	pcmpeqb	(%edi), %xmm1
   1103 
   1104 	movdqa	32(%esi), %xmm3
   1105 	palignr	$11, %xmm2, %xmm3
   1106 	pcmpeqb	16(%edi), %xmm3
   1107 
   1108 	pand	%xmm1, %xmm3
   1109 	pmovmskb %xmm3, %edx
   1110 	lea	32(%edi), %edi
   1111 	lea	32(%esi), %esi
   1112 	sub	$0xffff, %edx
   1113 	jnz	L(exit)
   1114 	lea	(%ecx, %edi,1), %eax
   1115 	lea	11(%ecx, %esi,1), %edx
   1116 	POP (%edi)
   1117 	POP (%esi)
   1118 	jmp	L(less48bytes)
   1119 
   1120 	CFI_PUSH (%esi)
   1121 	CFI_PUSH (%edi)
   1122 	ALIGN (4)
   1123 L(shr_11_gobble):
   1124 	sub	$32, %ecx
   1125 	movdqa	16(%esi), %xmm0
   1126 	palignr	$11, (%esi), %xmm0
   1127 	pcmpeqb	(%edi), %xmm0
   1128 
   1129 	movdqa	32(%esi), %xmm3
   1130 	palignr	$11, 16(%esi), %xmm3
   1131 	pcmpeqb	16(%edi), %xmm3
   1132 
   1133 L(shr_11_gobble_loop):
   1134 	pand	%xmm0, %xmm3
   1135 	sub	$32, %ecx
   1136 	pmovmskb %xmm3, %edx
   1137 	movdqa	%xmm0, %xmm1
   1138 
   1139 	movdqa	64(%esi), %xmm3
   1140 	palignr	$11,48(%esi), %xmm3
   1141 	sbb	$0xffff, %edx
   1142 	movdqa	48(%esi), %xmm0
   1143 	palignr	$11,32(%esi), %xmm0
   1144 	pcmpeqb	32(%edi), %xmm0
   1145 	lea	32(%esi), %esi
   1146 	pcmpeqb	48(%edi), %xmm3
   1147 
   1148 	lea	32(%edi), %edi
   1149 	jz	L(shr_11_gobble_loop)
   1150 	pand	%xmm0, %xmm3
   1151 
   1152 	cmp	$0, %ecx
   1153 	jge	L(shr_11_gobble_next)
   1154 	inc	%edx
   1155 	add	$32, %ecx
   1156 L(shr_11_gobble_next):
   1157 	test	%edx, %edx
   1158 	jnz	L(exit)
   1159 
   1160 	pmovmskb %xmm3, %edx
   1161 	movdqa	%xmm0, %xmm1
   1162 	lea	32(%edi), %edi
   1163 	lea	32(%esi), %esi
   1164 	sub	$0xffff, %edx
   1165 	jnz	L(exit)
   1166 
   1167 	lea	(%ecx, %edi,1), %eax
   1168 	lea	11(%ecx, %esi,1), %edx
   1169 	POP (%edi)
   1170 	POP (%esi)
   1171 	jmp	L(less48bytes)
   1172 
   1173 	CFI_PUSH (%esi)
   1174 	CFI_PUSH (%edi)
   1175 	ALIGN (4)
   1176 L(shr_12):
   1177 	cmp	$80, %ecx
   1178 	lea	-48(%ecx), %ecx
   1179 	mov	%edx, %eax
   1180 	jae	L(shr_12_gobble)
   1181 
   1182 	movdqa	16(%esi), %xmm1
   1183 	movdqa	%xmm1, %xmm2
   1184 	palignr	$12, (%esi), %xmm1
   1185 	pcmpeqb	(%edi), %xmm1
   1186 
   1187 	movdqa	32(%esi), %xmm3
   1188 	palignr	$12, %xmm2, %xmm3
   1189 	pcmpeqb	16(%edi), %xmm3
   1190 
   1191 	pand	%xmm1, %xmm3
   1192 	pmovmskb %xmm3, %edx
   1193 	lea	32(%edi), %edi
   1194 	lea	32(%esi), %esi
   1195 	sub	$0xffff, %edx
   1196 	jnz	L(exit)
   1197 	lea	(%ecx, %edi,1), %eax
   1198 	lea	12(%ecx, %esi,1), %edx
   1199 	POP (%edi)
   1200 	POP (%esi)
   1201 	jmp	L(less48bytes)
   1202 
   1203 	CFI_PUSH (%esi)
   1204 	CFI_PUSH (%edi)
   1205 	ALIGN (4)
   1206 L(shr_12_gobble):
   1207 	sub	$32, %ecx
   1208 	movdqa	16(%esi), %xmm0
   1209 	palignr	$12, (%esi), %xmm0
   1210 	pcmpeqb	(%edi), %xmm0
   1211 
   1212 	movdqa	32(%esi), %xmm3
   1213 	palignr	$12, 16(%esi), %xmm3
   1214 	pcmpeqb	16(%edi), %xmm3
   1215 
   1216 L(shr_12_gobble_loop):
   1217 	pand	%xmm0, %xmm3
   1218 	sub	$32, %ecx
   1219 	pmovmskb %xmm3, %edx
   1220 	movdqa	%xmm0, %xmm1
   1221 
   1222 	movdqa	64(%esi), %xmm3
   1223 	palignr	$12,48(%esi), %xmm3
   1224 	sbb	$0xffff, %edx
   1225 	movdqa	48(%esi), %xmm0
   1226 	palignr	$12,32(%esi), %xmm0
   1227 	pcmpeqb	32(%edi), %xmm0
   1228 	lea	32(%esi), %esi
   1229 	pcmpeqb	48(%edi), %xmm3
   1230 
   1231 	lea	32(%edi), %edi
   1232 	jz	L(shr_12_gobble_loop)
   1233 	pand	%xmm0, %xmm3
   1234 
   1235 	cmp	$0, %ecx
   1236 	jge	L(shr_12_gobble_next)
   1237 	inc	%edx
   1238 	add	$32, %ecx
   1239 L(shr_12_gobble_next):
   1240 	test	%edx, %edx
   1241 	jnz	L(exit)
   1242 
   1243 	pmovmskb %xmm3, %edx
   1244 	movdqa	%xmm0, %xmm1
   1245 	lea	32(%edi), %edi
   1246 	lea	32(%esi), %esi
   1247 	sub	$0xffff, %edx
   1248 	jnz	L(exit)
   1249 
   1250 	lea	(%ecx, %edi,1), %eax
   1251 	lea	12(%ecx, %esi,1), %edx
   1252 	POP (%edi)
   1253 	POP (%esi)
   1254 	jmp	L(less48bytes)
   1255 
   1256 	CFI_PUSH (%esi)
   1257 	CFI_PUSH (%edi)
   1258 	ALIGN (4)
   1259 L(shr_13):
   1260 	cmp	$80, %ecx
   1261 	lea	-48(%ecx), %ecx
   1262 	mov	%edx, %eax
   1263 	jae	L(shr_13_gobble)
   1264 
   1265 	movdqa	16(%esi), %xmm1
   1266 	movdqa	%xmm1, %xmm2
   1267 	palignr	$13, (%esi), %xmm1
   1268 	pcmpeqb	(%edi), %xmm1
   1269 
   1270 	movdqa	32(%esi), %xmm3
   1271 	palignr	$13, %xmm2, %xmm3
   1272 	pcmpeqb	16(%edi), %xmm3
   1273 
   1274 	pand	%xmm1, %xmm3
   1275 	pmovmskb %xmm3, %edx
   1276 	lea	32(%edi), %edi
   1277 	lea	32(%esi), %esi
   1278 	sub	$0xffff, %edx
   1279 	jnz	L(exit)
   1280 	lea	(%ecx, %edi,1), %eax
   1281 	lea	13(%ecx, %esi,1), %edx
   1282 	POP (%edi)
   1283 	POP (%esi)
   1284 	jmp	L(less48bytes)
   1285 
   1286 	CFI_PUSH (%esi)
   1287 	CFI_PUSH (%edi)
   1288 	ALIGN (4)
   1289 L(shr_13_gobble):
   1290 	sub	$32, %ecx
   1291 	movdqa	16(%esi), %xmm0
   1292 	palignr	$13, (%esi), %xmm0
   1293 	pcmpeqb	(%edi), %xmm0
   1294 
   1295 	movdqa	32(%esi), %xmm3
   1296 	palignr	$13, 16(%esi), %xmm3
   1297 	pcmpeqb	16(%edi), %xmm3
   1298 
   1299 L(shr_13_gobble_loop):
   1300 	pand	%xmm0, %xmm3
   1301 	sub	$32, %ecx
   1302 	pmovmskb %xmm3, %edx
   1303 	movdqa	%xmm0, %xmm1
   1304 
   1305 	movdqa	64(%esi), %xmm3
   1306 	palignr	$13,48(%esi), %xmm3
   1307 	sbb	$0xffff, %edx
   1308 	movdqa	48(%esi), %xmm0
   1309 	palignr	$13,32(%esi), %xmm0
   1310 	pcmpeqb	32(%edi), %xmm0
   1311 	lea	32(%esi), %esi
   1312 	pcmpeqb	48(%edi), %xmm3
   1313 
   1314 	lea	32(%edi), %edi
   1315 	jz	L(shr_13_gobble_loop)
   1316 	pand	%xmm0, %xmm3
   1317 
   1318 	cmp	$0, %ecx
   1319 	jge	L(shr_13_gobble_next)
   1320 	inc	%edx
   1321 	add	$32, %ecx
   1322 L(shr_13_gobble_next):
   1323 	test	%edx, %edx
   1324 	jnz	L(exit)
   1325 
   1326 	pmovmskb %xmm3, %edx
   1327 	movdqa	%xmm0, %xmm1
   1328 	lea	32(%edi), %edi
   1329 	lea	32(%esi), %esi
   1330 	sub	$0xffff, %edx
   1331 	jnz	L(exit)
   1332 
   1333 	lea	(%ecx, %edi,1), %eax
   1334 	lea	13(%ecx, %esi,1), %edx
   1335 	POP (%edi)
   1336 	POP (%esi)
   1337 	jmp	L(less48bytes)
   1338 
   1339 	CFI_PUSH (%esi)
   1340 	CFI_PUSH (%edi)
   1341 	ALIGN (4)
   1342 L(shr_14):
   1343 	cmp	$80, %ecx
   1344 	lea	-48(%ecx), %ecx
   1345 	mov	%edx, %eax
   1346 	jae	L(shr_14_gobble)
   1347 
   1348 	movdqa	16(%esi), %xmm1
   1349 	movdqa	%xmm1, %xmm2
   1350 	palignr	$14, (%esi), %xmm1
   1351 	pcmpeqb	(%edi), %xmm1
   1352 
   1353 	movdqa	32(%esi), %xmm3
   1354 	palignr	$14, %xmm2, %xmm3
   1355 	pcmpeqb	16(%edi), %xmm3
   1356 
   1357 	pand	%xmm1, %xmm3
   1358 	pmovmskb %xmm3, %edx
   1359 	lea	32(%edi), %edi
   1360 	lea	32(%esi), %esi
   1361 	sub	$0xffff, %edx
   1362 	jnz	L(exit)
   1363 	lea	(%ecx, %edi,1), %eax
   1364 	lea	14(%ecx, %esi,1), %edx
   1365 	POP (%edi)
   1366 	POP (%esi)
   1367 	jmp	L(less48bytes)
   1368 
   1369 	CFI_PUSH (%esi)
   1370 	CFI_PUSH (%edi)
   1371 	ALIGN (4)
   1372 L(shr_14_gobble):
   1373 	sub	$32, %ecx
   1374 	movdqa	16(%esi), %xmm0
   1375 	palignr	$14, (%esi), %xmm0
   1376 	pcmpeqb	(%edi), %xmm0
   1377 
   1378 	movdqa	32(%esi), %xmm3
   1379 	palignr	$14, 16(%esi), %xmm3
   1380 	pcmpeqb	16(%edi), %xmm3
   1381 
   1382 L(shr_14_gobble_loop):
   1383 	pand	%xmm0, %xmm3
   1384 	sub	$32, %ecx
   1385 	pmovmskb %xmm3, %edx
   1386 	movdqa	%xmm0, %xmm1
   1387 
   1388 	movdqa	64(%esi), %xmm3
   1389 	palignr	$14,48(%esi), %xmm3
   1390 	sbb	$0xffff, %edx
   1391 	movdqa	48(%esi), %xmm0
   1392 	palignr	$14,32(%esi), %xmm0
   1393 	pcmpeqb	32(%edi), %xmm0
   1394 	lea	32(%esi), %esi
   1395 	pcmpeqb	48(%edi), %xmm3
   1396 
   1397 	lea	32(%edi), %edi
   1398 	jz	L(shr_14_gobble_loop)
   1399 	pand	%xmm0, %xmm3
   1400 
   1401 	cmp	$0, %ecx
   1402 	jge	L(shr_14_gobble_next)
   1403 	inc	%edx
   1404 	add	$32, %ecx
   1405 L(shr_14_gobble_next):
   1406 	test	%edx, %edx
   1407 	jnz	L(exit)
   1408 
   1409 	pmovmskb %xmm3, %edx
   1410 	movdqa	%xmm0, %xmm1
   1411 	lea	32(%edi), %edi
   1412 	lea	32(%esi), %esi
   1413 	sub	$0xffff, %edx
   1414 	jnz	L(exit)
   1415 
   1416 	lea	(%ecx, %edi,1), %eax
   1417 	lea	14(%ecx, %esi,1), %edx
   1418 	POP (%edi)
   1419 	POP (%esi)
   1420 	jmp	L(less48bytes)
   1421 
   1422 	CFI_PUSH (%esi)
   1423 	CFI_PUSH (%edi)
   1424 	ALIGN (4)
   1425 L(shr_15):
   1426 	cmp	$80, %ecx
   1427 	lea	-48(%ecx), %ecx
   1428 	mov	%edx, %eax
   1429 	jae	L(shr_15_gobble)
   1430 
   1431 	movdqa	16(%esi), %xmm1
   1432 	movdqa	%xmm1, %xmm2
   1433 	palignr	$15, (%esi), %xmm1
   1434 	pcmpeqb	(%edi), %xmm1
   1435 
   1436 	movdqa	32(%esi), %xmm3
   1437 	palignr	$15, %xmm2, %xmm3
   1438 	pcmpeqb	16(%edi), %xmm3
   1439 
   1440 	pand	%xmm1, %xmm3
   1441 	pmovmskb %xmm3, %edx
   1442 	lea	32(%edi), %edi
   1443 	lea	32(%esi), %esi
   1444 	sub	$0xffff, %edx
   1445 	jnz	L(exit)
   1446 	lea	(%ecx, %edi,1), %eax
   1447 	lea	15(%ecx, %esi,1), %edx
   1448 	POP (%edi)
   1449 	POP (%esi)
   1450 	jmp	L(less48bytes)
   1451 
   1452 	CFI_PUSH (%esi)
   1453 	CFI_PUSH (%edi)
   1454 	ALIGN (4)
   1455 L(shr_15_gobble):
   1456 	sub	$32, %ecx
   1457 	movdqa	16(%esi), %xmm0
   1458 	palignr	$15, (%esi), %xmm0
   1459 	pcmpeqb	(%edi), %xmm0
   1460 
   1461 	movdqa	32(%esi), %xmm3
   1462 	palignr	$15, 16(%esi), %xmm3
   1463 	pcmpeqb	16(%edi), %xmm3
   1464 
   1465 L(shr_15_gobble_loop):
   1466 	pand	%xmm0, %xmm3
   1467 	sub	$32, %ecx
   1468 	pmovmskb %xmm3, %edx
   1469 	movdqa	%xmm0, %xmm1
   1470 
   1471 	movdqa	64(%esi), %xmm3
   1472 	palignr	$15,48(%esi), %xmm3
   1473 	sbb	$0xffff, %edx
   1474 	movdqa	48(%esi), %xmm0
   1475 	palignr	$15,32(%esi), %xmm0
   1476 	pcmpeqb	32(%edi), %xmm0
   1477 	lea	32(%esi), %esi
   1478 	pcmpeqb	48(%edi), %xmm3
   1479 
   1480 	lea	32(%edi), %edi
   1481 	jz	L(shr_15_gobble_loop)
   1482 	pand	%xmm0, %xmm3
   1483 
   1484 	cmp	$0, %ecx
   1485 	jge	L(shr_15_gobble_next)
   1486 	inc	%edx
   1487 	add	$32, %ecx
   1488 L(shr_15_gobble_next):
   1489 	test	%edx, %edx
   1490 	jnz	L(exit)
   1491 
   1492 	pmovmskb %xmm3, %edx
   1493 	movdqa	%xmm0, %xmm1
   1494 	lea	32(%edi), %edi
   1495 	lea	32(%esi), %esi
   1496 	sub	$0xffff, %edx
   1497 	jnz	L(exit)
   1498 
   1499 	lea	(%ecx, %edi,1), %eax
   1500 	lea	15(%ecx, %esi,1), %edx
   1501 	POP (%edi)
   1502 	POP (%esi)
   1503 	jmp	L(less48bytes)
   1504 
   1505 	CFI_PUSH (%esi)
   1506 	CFI_PUSH (%edi)
   1507 	ALIGN (4)
   1508 L(exit):
   1509 	pmovmskb %xmm1, %ebx
   1510 	sub	$0xffff, %ebx
   1511 	jz	L(first16bytes)
   1512 	lea	-16(%esi), %esi
   1513 	lea	-16(%edi), %edi
   1514 	mov	%ebx, %edx
   1515 L(first16bytes):
   1516 	add	%eax, %esi
   1517 L(less16bytes):
   1518 	test	%dl, %dl
   1519 	jz	L(next_24_bytes)
   1520 
   1521 	test	$0x01, %dl
   1522 	jnz	L(Byte16)
   1523 
   1524 	test	$0x02, %dl
   1525 	jnz	L(Byte17)
   1526 
   1527 	test	$0x04, %dl
   1528 	jnz	L(Byte18)
   1529 
   1530 	test	$0x08, %dl
   1531 	jnz	L(Byte19)
   1532 
   1533 	test	$0x10, %dl
   1534 	jnz	L(Byte20)
   1535 
   1536 	test	$0x20, %dl
   1537 	jnz	L(Byte21)
   1538 
   1539 	test	$0x40, %dl
   1540 	jnz	L(Byte22)
   1541 L(Byte23):
   1542 	movzbl	 -9(%edi), %eax
   1543 	movzbl	 -9(%esi), %edx
   1544 	sub	%edx, %eax
   1545 	RETURN
   1546 
   1547 	ALIGN (4)
   1548 L(Byte16):
   1549 	movzbl	 -16(%edi), %eax
   1550 	movzbl	 -16(%esi), %edx
   1551 	sub	%edx, %eax
   1552 	RETURN
   1553 
   1554 	ALIGN (4)
   1555 L(Byte17):
   1556 	movzbl	 -15(%edi), %eax
   1557 	movzbl	 -15(%esi), %edx
   1558 	sub	%edx, %eax
   1559 	RETURN
   1560 
   1561 	ALIGN (4)
   1562 L(Byte18):
   1563 	movzbl	 -14(%edi), %eax
   1564 	movzbl	 -14(%esi), %edx
   1565 	sub	%edx, %eax
   1566 	RETURN
   1567 
   1568 	ALIGN (4)
   1569 L(Byte19):
   1570 	movzbl	 -13(%edi), %eax
   1571 	movzbl	 -13(%esi), %edx
   1572 	sub	%edx, %eax
   1573 	RETURN
   1574 
   1575 	ALIGN (4)
   1576 L(Byte20):
   1577 	movzbl	 -12(%edi), %eax
   1578 	movzbl	 -12(%esi), %edx
   1579 	sub	%edx, %eax
   1580 	RETURN
   1581 
   1582 	ALIGN (4)
   1583 L(Byte21):
   1584 	movzbl	 -11(%edi), %eax
   1585 	movzbl	 -11(%esi), %edx
   1586 	sub	%edx, %eax
   1587 	RETURN
   1588 
   1589 	ALIGN (4)
   1590 L(Byte22):
   1591 	movzbl	 -10(%edi), %eax
   1592 	movzbl	 -10(%esi), %edx
   1593 	sub	%edx, %eax
   1594 	RETURN
   1595 
   1596 	ALIGN (4)
   1597 L(next_24_bytes):
   1598 	lea	8(%edi), %edi
   1599 	lea	8(%esi), %esi
   1600 	test	$0x01, %dh
   1601 	jnz	L(Byte16)
   1602 
   1603 	test	$0x02, %dh
   1604 	jnz	L(Byte17)
   1605 
   1606 	test	$0x04, %dh
   1607 	jnz	L(Byte18)
   1608 
   1609 	test	$0x08, %dh
   1610 	jnz	L(Byte19)
   1611 
   1612 	test	$0x10, %dh
   1613 	jnz	L(Byte20)
   1614 
   1615 	test	$0x20, %dh
   1616 	jnz	L(Byte21)
   1617 
   1618 	test	$0x40, %dh
   1619 	jnz	L(Byte22)
   1620 
   1621 	ALIGN (4)
   1622 L(Byte31):
   1623 	movzbl	 -9(%edi), %eax
   1624 	movzbl	 -9(%esi), %edx
   1625 	sub	%edx, %eax
   1626 	RETURN_END
   1627 	CFI_PUSH (%ebx)
   1628 
   1629 	ALIGN (4)
   1630 L(more8bytes):
   1631 	cmp	$16, %ecx
   1632 	jae	L(more16bytes)
   1633 	cmp	$8, %ecx
   1634 	je	L(8bytes)
   1635 	cmp	$9, %ecx
   1636 	je	L(9bytes)
   1637 	cmp	$10, %ecx
   1638 	je	L(10bytes)
   1639 	cmp	$11, %ecx
   1640 	je	L(11bytes)
   1641 	cmp	$12, %ecx
   1642 	je	L(12bytes)
   1643 	cmp	$13, %ecx
   1644 	je	L(13bytes)
   1645 	cmp	$14, %ecx
   1646 	je	L(14bytes)
   1647 	jmp	L(15bytes)
   1648 
   1649 	ALIGN (4)
   1650 L(more16bytes):
   1651 	cmp	$24, %ecx
   1652 	jae	L(more24bytes)
   1653 	cmp	$16, %ecx
   1654 	je	L(16bytes)
   1655 	cmp	$17, %ecx
   1656 	je	L(17bytes)
   1657 	cmp	$18, %ecx
   1658 	je	L(18bytes)
   1659 	cmp	$19, %ecx
   1660 	je	L(19bytes)
   1661 	cmp	$20, %ecx
   1662 	je	L(20bytes)
   1663 	cmp	$21, %ecx
   1664 	je	L(21bytes)
   1665 	cmp	$22, %ecx
   1666 	je	L(22bytes)
   1667 	jmp	L(23bytes)
   1668 
   1669 	ALIGN (4)
   1670 L(more24bytes):
   1671 	cmp	$32, %ecx
   1672 	jae	L(more32bytes)
   1673 	cmp	$24, %ecx
   1674 	je	L(24bytes)
   1675 	cmp	$25, %ecx
   1676 	je	L(25bytes)
   1677 	cmp	$26, %ecx
   1678 	je	L(26bytes)
   1679 	cmp	$27, %ecx
   1680 	je	L(27bytes)
   1681 	cmp	$28, %ecx
   1682 	je	L(28bytes)
   1683 	cmp	$29, %ecx
   1684 	je	L(29bytes)
   1685 	cmp	$30, %ecx
   1686 	je	L(30bytes)
   1687 	jmp	L(31bytes)
   1688 
   1689 	ALIGN (4)
   1690 L(more32bytes):
   1691 	cmp	$40, %ecx
   1692 	jae	L(more40bytes)
   1693 	cmp	$32, %ecx
   1694 	je	L(32bytes)
   1695 	cmp	$33, %ecx
   1696 	je	L(33bytes)
   1697 	cmp	$34, %ecx
   1698 	je	L(34bytes)
   1699 	cmp	$35, %ecx
   1700 	je	L(35bytes)
   1701 	cmp	$36, %ecx
   1702 	je	L(36bytes)
   1703 	cmp	$37, %ecx
   1704 	je	L(37bytes)
   1705 	cmp	$38, %ecx
   1706 	je	L(38bytes)
   1707 	jmp	L(39bytes)
   1708 
   1709 	ALIGN (4)
   1710 L(more40bytes):
   1711 	cmp	$40, %ecx
   1712 	je	L(40bytes)
   1713 	cmp	$41, %ecx
   1714 	je	L(41bytes)
   1715 	cmp	$42, %ecx
   1716 	je	L(42bytes)
   1717 	cmp	$43, %ecx
   1718 	je	L(43bytes)
   1719 	cmp	$44, %ecx
   1720 	je	L(44bytes)
   1721 	cmp	$45, %ecx
   1722 	je	L(45bytes)
   1723 	cmp	$46, %ecx
   1724 	je	L(46bytes)
   1725 	jmp	L(47bytes)
   1726 
   1727 	ALIGN (4)
   1728 L(less48bytes):
   1729 	cmp	$8, %ecx
   1730 	jae	L(more8bytes)
   1731 	cmp	$2, %ecx
   1732 	je	L(2bytes)
   1733 	cmp	$3, %ecx
   1734 	je	L(3bytes)
   1735 	cmp	$4, %ecx
   1736 	je	L(4bytes)
   1737 	cmp	$5, %ecx
   1738 	je	L(5bytes)
   1739 	cmp	$6, %ecx
   1740 	je	L(6bytes)
   1741 	jmp	L(7bytes)
   1742 
   1743 
   1744 	ALIGN (4)
   1745 L(44bytes):
   1746 	mov	-44(%eax), %ecx
   1747 	mov	-44(%edx), %ebx
   1748 	cmp	%ebx, %ecx
   1749 	jne	L(find_diff)
   1750 L(40bytes):
   1751 	mov	-40(%eax), %ecx
   1752 	mov	-40(%edx), %ebx
   1753 	cmp	%ebx, %ecx
   1754 	jne	L(find_diff)
   1755 L(36bytes):
   1756 	mov	-36(%eax), %ecx
   1757 	mov	-36(%edx), %ebx
   1758 	cmp	%ebx, %ecx
   1759 	jne	L(find_diff)
   1760 L(32bytes):
   1761 	mov	-32(%eax), %ecx
   1762 	mov	-32(%edx), %ebx
   1763 	cmp	%ebx, %ecx
   1764 	jne	L(find_diff)
   1765 L(28bytes):
   1766 	mov	-28(%eax), %ecx
   1767 	mov	-28(%edx), %ebx
   1768 	cmp	%ebx, %ecx
   1769 	jne	L(find_diff)
   1770 L(24bytes):
   1771 	mov	-24(%eax), %ecx
   1772 	mov	-24(%edx), %ebx
   1773 	cmp	%ebx, %ecx
   1774 	jne	L(find_diff)
   1775 L(20bytes):
   1776 	mov	-20(%eax), %ecx
   1777 	mov	-20(%edx), %ebx
   1778 	cmp	%ebx, %ecx
   1779 	jne	L(find_diff)
   1780 L(16bytes):
   1781 	mov	-16(%eax), %ecx
   1782 	mov	-16(%edx), %ebx
   1783 	cmp	%ebx, %ecx
   1784 	jne	L(find_diff)
   1785 L(12bytes):
   1786 	mov	-12(%eax), %ecx
   1787 	mov	-12(%edx), %ebx
   1788 	cmp	%ebx, %ecx
   1789 	jne	L(find_diff)
   1790 L(8bytes):
   1791 	mov	-8(%eax), %ecx
   1792 	mov	-8(%edx), %ebx
   1793 	cmp	%ebx, %ecx
   1794 	jne	L(find_diff)
   1795 L(4bytes):
   1796 	mov	-4(%eax), %ecx
   1797 	mov	-4(%edx), %ebx
   1798 	cmp	%ebx, %ecx
   1799 	mov	$0, %eax
   1800 	jne	L(find_diff)
   1801 	POP (%ebx)
   1802 	ret
   1803 	CFI_PUSH (%ebx)
   1804 
   1805 	ALIGN (4)
   1806 L(45bytes):
   1807 	mov	-45(%eax), %ecx
   1808 	mov	-45(%edx), %ebx
   1809 	cmp	%ebx, %ecx
   1810 	jne	L(find_diff)
   1811 L(41bytes):
   1812 	mov	-41(%eax), %ecx
   1813 	mov	-41(%edx), %ebx
   1814 	cmp	%ebx, %ecx
   1815 	jne	L(find_diff)
   1816 L(37bytes):
   1817 	mov	-37(%eax), %ecx
   1818 	mov	-37(%edx), %ebx
   1819 	cmp	%ebx, %ecx
   1820 	jne	L(find_diff)
   1821 L(33bytes):
   1822 	mov	-33(%eax), %ecx
   1823 	mov	-33(%edx), %ebx
   1824 	cmp	%ebx, %ecx
   1825 	jne	L(find_diff)
   1826 L(29bytes):
   1827 	mov	-29(%eax), %ecx
   1828 	mov	-29(%edx), %ebx
   1829 	cmp	%ebx, %ecx
   1830 	jne	L(find_diff)
   1831 L(25bytes):
   1832 	mov	-25(%eax), %ecx
   1833 	mov	-25(%edx), %ebx
   1834 	cmp	%ebx, %ecx
   1835 	jne	L(find_diff)
   1836 L(21bytes):
   1837 	mov	-21(%eax), %ecx
   1838 	mov	-21(%edx), %ebx
   1839 	cmp	%ebx, %ecx
   1840 	jne	L(find_diff)
   1841 L(17bytes):
   1842 	mov	-17(%eax), %ecx
   1843 	mov	-17(%edx), %ebx
   1844 	cmp	%ebx, %ecx
   1845 	jne	L(find_diff)
   1846 L(13bytes):
   1847 	mov	-13(%eax), %ecx
   1848 	mov	-13(%edx), %ebx
   1849 	cmp	%ebx, %ecx
   1850 	jne	L(find_diff)
   1851 L(9bytes):
   1852 	mov	-9(%eax), %ecx
   1853 	mov	-9(%edx), %ebx
   1854 	cmp	%ebx, %ecx
   1855 	jne	L(find_diff)
   1856 L(5bytes):
   1857 	mov	-5(%eax), %ecx
   1858 	mov	-5(%edx), %ebx
   1859 	cmp	%ebx, %ecx
   1860 	jne	L(find_diff)
   1861 	movzbl	-1(%eax), %ecx
   1862 	cmp	-1(%edx), %cl
   1863 	mov	$0, %eax
   1864 	jne	L(end)
   1865 	POP (%ebx)
   1866 	ret
   1867 	CFI_PUSH (%ebx)
   1868 
   1869 	ALIGN (4)
   1870 L(46bytes):
   1871 	mov	-46(%eax), %ecx
   1872 	mov	-46(%edx), %ebx
   1873 	cmp	%ebx, %ecx
   1874 	jne	L(find_diff)
   1875 L(42bytes):
   1876 	mov	-42(%eax), %ecx
   1877 	mov	-42(%edx), %ebx
   1878 	cmp	%ebx, %ecx
   1879 	jne	L(find_diff)
   1880 L(38bytes):
   1881 	mov	-38(%eax), %ecx
   1882 	mov	-38(%edx), %ebx
   1883 	cmp	%ebx, %ecx
   1884 	jne	L(find_diff)
   1885 L(34bytes):
   1886 	mov	-34(%eax), %ecx
   1887 	mov	-34(%edx), %ebx
   1888 	cmp	%ebx, %ecx
   1889 	jne	L(find_diff)
   1890 L(30bytes):
   1891 	mov	-30(%eax), %ecx
   1892 	mov	-30(%edx), %ebx
   1893 	cmp	%ebx, %ecx
   1894 	jne	L(find_diff)
   1895 L(26bytes):
   1896 	mov	-26(%eax), %ecx
   1897 	mov	-26(%edx), %ebx
   1898 	cmp	%ebx, %ecx
   1899 	jne	L(find_diff)
   1900 L(22bytes):
   1901 	mov	-22(%eax), %ecx
   1902 	mov	-22(%edx), %ebx
   1903 	cmp	%ebx, %ecx
   1904 	jne	L(find_diff)
   1905 L(18bytes):
   1906 	mov	-18(%eax), %ecx
   1907 	mov	-18(%edx), %ebx
   1908 	cmp	%ebx, %ecx
   1909 	jne	L(find_diff)
   1910 L(14bytes):
   1911 	mov	-14(%eax), %ecx
   1912 	mov	-14(%edx), %ebx
   1913 	cmp	%ebx, %ecx
   1914 	jne	L(find_diff)
   1915 L(10bytes):
   1916 	mov	-10(%eax), %ecx
   1917 	mov	-10(%edx), %ebx
   1918 	cmp	%ebx, %ecx
   1919 	jne	L(find_diff)
   1920 L(6bytes):
   1921 	mov	-6(%eax), %ecx
   1922 	mov	-6(%edx), %ebx
   1923 	cmp	%ebx, %ecx
   1924 	jne	L(find_diff)
   1925 L(2bytes):
   1926 	movzwl	-2(%eax), %ecx
   1927 	movzwl	-2(%edx), %ebx
   1928 	cmp	%bl, %cl
   1929 	jne	L(end)
   1930 	cmp	%bh, %ch
   1931 	mov	$0, %eax
   1932 	jne	L(end)
   1933 	POP (%ebx)
   1934 	ret
   1935 	CFI_PUSH (%ebx)
   1936 
   1937 	ALIGN (4)
   1938 L(47bytes):
   1939 	movl	-47(%eax), %ecx
   1940 	movl	-47(%edx), %ebx
   1941 	cmp	%ebx, %ecx
   1942 	jne	L(find_diff)
   1943 L(43bytes):
   1944 	movl	-43(%eax), %ecx
   1945 	movl	-43(%edx), %ebx
   1946 	cmp	%ebx, %ecx
   1947 	jne	L(find_diff)
   1948 L(39bytes):
   1949 	movl	-39(%eax), %ecx
   1950 	movl	-39(%edx), %ebx
   1951 	cmp	%ebx, %ecx
   1952 	jne	L(find_diff)
   1953 L(35bytes):
   1954 	movl	-35(%eax), %ecx
   1955 	movl	-35(%edx), %ebx
   1956 	cmp	%ebx, %ecx
   1957 	jne	L(find_diff)
   1958 L(31bytes):
   1959 	movl	-31(%eax), %ecx
   1960 	movl	-31(%edx), %ebx
   1961 	cmp	%ebx, %ecx
   1962 	jne	L(find_diff)
   1963 L(27bytes):
   1964 	movl	-27(%eax), %ecx
   1965 	movl	-27(%edx), %ebx
   1966 	cmp	%ebx, %ecx
   1967 	jne	L(find_diff)
   1968 L(23bytes):
   1969 	movl	-23(%eax), %ecx
   1970 	movl	-23(%edx), %ebx
   1971 	cmp	%ebx, %ecx
   1972 	jne	L(find_diff)
   1973 L(19bytes):
   1974 	movl	-19(%eax), %ecx
   1975 	movl	-19(%edx), %ebx
   1976 	cmp	%ebx, %ecx
   1977 	jne	L(find_diff)
   1978 L(15bytes):
   1979 	movl	-15(%eax), %ecx
   1980 	movl	-15(%edx), %ebx
   1981 	cmp	%ebx, %ecx
   1982 	jne	L(find_diff)
   1983 L(11bytes):
   1984 	movl	-11(%eax), %ecx
   1985 	movl	-11(%edx), %ebx
   1986 	cmp	%ebx, %ecx
   1987 	jne	L(find_diff)
   1988 L(7bytes):
   1989 	movl	-7(%eax), %ecx
   1990 	movl	-7(%edx), %ebx
   1991 	cmp	%ebx, %ecx
   1992 	jne	L(find_diff)
   1993 L(3bytes):
   1994 	movzwl	-3(%eax), %ecx
   1995 	movzwl	-3(%edx), %ebx
   1996 	cmpb	%bl, %cl
   1997 	jne	L(end)
   1998 	cmp	%bx, %cx
   1999 	jne	L(end)
   2000 	movzbl	-1(%eax), %eax
   2001 	cmpb	-1(%edx), %al
   2002 	mov	$0, %eax
   2003 	jne	L(end)
   2004 	POP (%ebx)
   2005 	ret
   2006 	CFI_PUSH (%ebx)
   2007 
   2008 	ALIGN (4)
   2009 L(find_diff):
   2010 	cmpb	%bl, %cl
   2011 	jne	L(end)
   2012 	cmp	%bx, %cx
   2013 	jne	L(end)
   2014 	shr	$16,%ecx
   2015 	shr	$16,%ebx
   2016 	cmp	%bl, %cl
   2017 	jne	L(end)
   2018 	cmp	%bx, %cx
   2019 L(end):
   2020 	POP (%ebx)
   2021 	mov	$1, %eax
   2022 	ja	L(bigger)
   2023 	neg	%eax
   2024 L(bigger):
   2025 	ret
   2026 
   2027 END (MEMCMP)
   2028