Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2010, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc			.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc			.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)		.cfi_restore reg
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef cfi_remember_state
     56 # define cfi_remember_state		.cfi_remember_state
     57 #endif
     58 
     59 #ifndef cfi_restore_state
     60 # define cfi_restore_state		.cfi_restore_state
     61 #endif
     62 
     63 #ifndef ENTRY
     64 # define ENTRY(name)			\
     65 	.type name,  @function; 	\
     66 	.globl name;			\
     67 	.p2align 4;			\
     68 name:					\
     69 	cfi_startproc
     70 #endif
     71 
     72 #ifndef END
     73 # define END(name)			\
     74 	cfi_endproc;			\
     75 	.size name, .-name
     76 #endif
     77 
     78 #define CFI_PUSH(REG)						\
     79   cfi_adjust_cfa_offset (4);					\
     80   cfi_rel_offset (REG, 0)
     81 
     82 #define CFI_POP(REG)						\
     83   cfi_adjust_cfa_offset (-4);					\
     84   cfi_restore (REG)
     85 
     86 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     87 #define POP(REG)	popl REG; CFI_POP (REG)
     88 
     89 #ifndef USE_AS_STRNCMP
     90 # define STR1		4
     91 # define STR2		STR1+4
     92 # define RETURN		ret
     93 
     94 # define UPDATE_STRNCMP_COUNTER
     95 #else
     96 # define STR1		8
     97 # define STR2		STR1+4
     98 # define CNT		STR2+4
     99 # define RETURN		POP (%ebp); ret; CFI_PUSH (%ebp)
    100 
    101 # define UPDATE_STRNCMP_COUNTER				\
    102 	/* calculate left number to compare */		\
    103 	mov	$16, %esi;				\
    104 	sub	%ecx, %esi;				\
    105 	cmpl	%esi, %ebp;				\
    106 	jbe	L(more8byteseq);			\
    107 	sub	%esi, %ebp
    108 #endif
    109 
    110 #ifndef STRCMP
    111 # define STRCMP strcmp
    112 #endif
    113 
    114 	.section .text.ssse3,"ax",@progbits
    115 ENTRY (STRCMP)
    116 #ifdef USE_AS_STRNCMP
    117 	PUSH	(%ebp)
    118 #endif
    119 	movl	STR1(%esp), %edx
    120 	movl	STR2(%esp), %eax
    121 #ifdef USE_AS_STRNCMP
    122 	movl	CNT(%esp), %ebp
    123 	cmpl	$16, %ebp
    124 	jb	L(less16bytes_sncmp)
    125 	jmp	L(more16bytes)
    126 #endif
    127 
    128 	movzbl	(%eax), %ecx
    129 	cmpb	%cl, (%edx)
    130 	jne	L(neq)
    131 	cmpl	$0, %ecx
    132 	je	L(eq)
    133 
    134 	movzbl	1(%eax), %ecx
    135 	cmpb	%cl, 1(%edx)
    136 	jne	L(neq)
    137 	cmpl	$0, %ecx
    138 	je	L(eq)
    139 
    140 	movzbl	2(%eax), %ecx
    141 	cmpb	%cl, 2(%edx)
    142 	jne	L(neq)
    143 	cmpl	$0, %ecx
    144 	je	L(eq)
    145 
    146 	movzbl	3(%eax), %ecx
    147 	cmpb	%cl, 3(%edx)
    148 	jne	L(neq)
    149 	cmpl	$0, %ecx
    150 	je	L(eq)
    151 
    152 	movzbl	4(%eax), %ecx
    153 	cmpb	%cl, 4(%edx)
    154 	jne	L(neq)
    155 	cmpl	$0, %ecx
    156 	je	L(eq)
    157 
    158 	movzbl	5(%eax), %ecx
    159 	cmpb	%cl, 5(%edx)
    160 	jne	L(neq)
    161 	cmpl	$0, %ecx
    162 	je	L(eq)
    163 
    164 	movzbl	6(%eax), %ecx
    165 	cmpb	%cl, 6(%edx)
    166 	jne	L(neq)
    167 	cmpl	$0, %ecx
    168 	je	L(eq)
    169 
    170 	movzbl	7(%eax), %ecx
    171 	cmpb	%cl, 7(%edx)
    172 	jne	L(neq)
    173 	cmpl	$0, %ecx
    174 	je	L(eq)
    175 
    176 	add	$8, %edx
    177 	add	$8, %eax
    178 #ifdef USE_AS_STRNCMP
    179 	cmpl	$8, %ebp
    180 	lea	-8(%ebp), %ebp
    181 	je	L(eq)
    182 L(more16bytes):
    183 #endif
    184 	movl	%edx, %ecx
    185 	and	$0xfff, %ecx
    186 	cmpl	$0xff0, %ecx
    187 	ja	L(crosspage)
    188 	mov	%eax, %ecx
    189 	and	$0xfff, %ecx
    190 	cmpl	$0xff0, %ecx
    191 	ja	L(crosspage)
    192 	pxor	%xmm0, %xmm0
    193 	movlpd	(%eax), %xmm1
    194 	movlpd	(%edx), %xmm2
    195 	movhpd	8(%eax), %xmm1
    196 	movhpd	8(%edx), %xmm2
    197 	pcmpeqb	%xmm1, %xmm0
    198 	pcmpeqb	%xmm2, %xmm1
    199 	psubb	%xmm0, %xmm1
    200 	pmovmskb %xmm1, %ecx
    201 	sub	$0xffff, %ecx
    202 	jnz	L(less16bytes)
    203 #ifdef USE_AS_STRNCMP
    204 	cmpl	$16, %ebp
    205 	lea	-16(%ebp), %ebp
    206 	jbe	L(eq)
    207 #endif
    208 	add	$16, %eax
    209 	add	$16, %edx
    210 
    211 L(crosspage):
    212 
    213 	PUSH	(%ebx)
    214 	PUSH	(%edi)
    215 	PUSH	(%esi)
    216 #ifdef USE_AS_STRNCMP
    217 	cfi_remember_state
    218 #endif
    219 
    220 	movl	%edx, %edi
    221 	movl	%eax, %ecx
    222 	and	$0xf, %ecx
    223 	and	$0xf, %edi
    224 	xor	%ecx, %eax
    225 	xor	%edi, %edx
    226 	xor	%ebx, %ebx
    227 	cmpl	%edi, %ecx
    228 	je	L(ashr_0)
    229 	ja	L(bigger)
    230 	or	$0x20, %ebx
    231 	xchg	%edx, %eax
    232 	xchg	%ecx, %edi
    233 L(bigger):
    234 	lea	15(%edi), %edi
    235 	sub	%ecx, %edi
    236 	cmpl	$8, %edi
    237 	jle	L(ashr_less_8)
    238 	cmpl	$14, %edi
    239 	je	L(ashr_15)
    240 	cmpl	$13, %edi
    241 	je	L(ashr_14)
    242 	cmpl	$12, %edi
    243 	je	L(ashr_13)
    244 	cmpl	$11, %edi
    245 	je	L(ashr_12)
    246 	cmpl	$10, %edi
    247 	je	L(ashr_11)
    248 	cmpl	$9, %edi
    249 	je	L(ashr_10)
    250 L(ashr_less_8):
    251 	je	L(ashr_9)
    252 	cmpl	$7, %edi
    253 	je	L(ashr_8)
    254 	cmpl	$6, %edi
    255 	je	L(ashr_7)
    256 	cmpl	$5, %edi
    257 	je	L(ashr_6)
    258 	cmpl	$4, %edi
    259 	je	L(ashr_5)
    260 	cmpl	$3, %edi
    261 	je	L(ashr_4)
    262 	cmpl	$2, %edi
    263 	je	L(ashr_3)
    264 	cmpl	$1, %edi
    265 	je	L(ashr_2)
    266 	cmpl	$0, %edi
    267 	je	L(ashr_1)
    268 
    269 /*
    270  * The following cases will be handled by ashr_0
    271  *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case
    272  *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
    273  */
    274 	.p2align 4
    275 L(ashr_0):
    276 	mov	$0xffff, %esi
    277 	movdqa	(%eax), %xmm1
    278 	pxor	%xmm0, %xmm0
    279 	pcmpeqb	%xmm1, %xmm0
    280 	pcmpeqb	(%edx), %xmm1
    281 	psubb	%xmm0, %xmm1
    282 	pmovmskb %xmm1, %edi
    283 	shr	%cl, %esi
    284 	shr	%cl, %edi
    285 	sub	%edi, %esi
    286 	mov	%ecx, %edi
    287 	jne	L(less32bytes)
    288 	UPDATE_STRNCMP_COUNTER
    289 	mov	$0x10, %ebx
    290 	mov	$0x10, %ecx
    291 	pxor	%xmm0, %xmm0
    292 	.p2align 4
    293 L(loop_ashr_0):
    294 	movdqa	(%eax, %ecx), %xmm1
    295 	movdqa	(%edx, %ecx), %xmm2
    296 
    297 	pcmpeqb	%xmm1, %xmm0
    298 	pcmpeqb	%xmm2, %xmm1
    299 	psubb	%xmm0, %xmm1
    300 	pmovmskb %xmm1, %esi
    301 	sub	$0xffff, %esi
    302 	jnz	L(exit)
    303 #ifdef USE_AS_STRNCMP
    304 	cmpl	$16, %ebp
    305 	lea	-16(%ebp), %ebp
    306 	jbe	L(more8byteseq)
    307 #endif
    308 	add	$16, %ecx
    309 	jmp	L(loop_ashr_0)
    310 
    311 /*
    312  * The following cases will be handled by ashr_1
    313  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    314  *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
    315  */
    316 	.p2align 4
    317 L(ashr_1):
    318 	mov	$0xffff, %esi
    319 	pxor	%xmm0, %xmm0
    320 	movdqa	(%edx), %xmm2
    321 	movdqa	(%eax), %xmm1
    322 	pcmpeqb	%xmm1, %xmm0
    323 	pslldq	$15, %xmm2
    324 	pcmpeqb	%xmm1, %xmm2
    325 	psubb	%xmm0, %xmm2
    326 	pmovmskb %xmm2, %edi
    327 	shr	%cl, %esi
    328 	shr	%cl, %edi
    329 	sub	%edi, %esi
    330 	lea	-15(%ecx), %edi
    331 	jnz	L(less32bytes)
    332 
    333 	UPDATE_STRNCMP_COUNTER
    334 
    335 	movdqa	(%edx), %xmm3
    336 	pxor	%xmm0, %xmm0
    337 	mov	$16, %ecx
    338 	or	$1, %ebx
    339 	lea	1(%edx), %edi
    340 	and	$0xfff, %edi
    341 	sub	$0x1000, %edi
    342 
    343 	.p2align 4
    344 L(loop_ashr_1):
    345 	add	$16, %edi
    346 	jg	L(nibble_ashr_1)
    347 
    348 L(gobble_ashr_1):
    349 	movdqa	(%eax, %ecx), %xmm1
    350 	movdqa	(%edx, %ecx), %xmm2
    351 	movdqa	%xmm2, %xmm4
    352 
    353 	palignr	$1, %xmm3, %xmm2
    354 
    355 	pcmpeqb	%xmm1, %xmm0
    356 	pcmpeqb	%xmm2, %xmm1
    357 	psubb	%xmm0, %xmm1
    358 	pmovmskb %xmm1, %esi
    359 	sub	$0xffff, %esi
    360 	jnz	L(exit)
    361 #ifdef USE_AS_STRNCMP
    362 	cmpl	$16, %ebp
    363 	lea	-16(%ebp), %ebp
    364 	jbe	L(more8byteseq)
    365 #endif
    366 
    367 	add	$16, %ecx
    368 	movdqa	%xmm4, %xmm3
    369 
    370 	add	$16, %edi
    371 	jg	L(nibble_ashr_1)
    372 
    373 	movdqa	(%eax, %ecx), %xmm1
    374 	movdqa	(%edx, %ecx), %xmm2
    375 	movdqa	%xmm2, %xmm4
    376 
    377 	palignr	$1, %xmm3, %xmm2
    378 
    379 	pcmpeqb	%xmm1, %xmm0
    380 	pcmpeqb	%xmm2, %xmm1
    381 	psubb	%xmm0, %xmm1
    382 	pmovmskb %xmm1, %esi
    383 	sub	$0xffff, %esi
    384 	jnz	L(exit)
    385 
    386 #ifdef USE_AS_STRNCMP
    387 	cmpl	$16, %ebp
    388 	lea	-16(%ebp), %ebp
    389 	jbe	L(more8byteseq)
    390 #endif
    391 	add	$16, %ecx
    392 	movdqa	%xmm4, %xmm3
    393 	jmp	L(loop_ashr_1)
    394 
    395 	.p2align 4
    396 L(nibble_ashr_1):
    397 	pcmpeqb	%xmm3, %xmm0
    398 	pmovmskb %xmm0, %esi
    399 	test	$0xfffe, %esi
    400 	jnz	L(ashr_1_exittail)
    401 
    402 #ifdef USE_AS_STRNCMP
    403 	cmpl	$15, %ebp
    404 	jbe	L(ashr_1_exittail)
    405 #endif
    406 	pxor	%xmm0, %xmm0
    407 	sub	$0x1000, %edi
    408 	jmp	L(gobble_ashr_1)
    409 
    410 	.p2align 4
    411 L(ashr_1_exittail):
    412 	movdqa	(%eax, %ecx), %xmm1
    413 	psrldq	$1, %xmm0
    414 	psrldq	$1, %xmm3
    415 	jmp	L(aftertail)
    416 
    417 /*
    418  * The following cases will be handled by ashr_2
    419  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    420  *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2
    421  */
    422 	.p2align 4
    423 L(ashr_2):
    424 	mov	$0xffff, %esi
    425 	pxor	%xmm0, %xmm0
    426 	movdqa	(%edx), %xmm2
    427 	movdqa	(%eax), %xmm1
    428 	pcmpeqb	%xmm1, %xmm0
    429 	pslldq	$14, %xmm2
    430 	pcmpeqb	%xmm1, %xmm2
    431 	psubb	%xmm0, %xmm2
    432 	pmovmskb %xmm2, %edi
    433 	shr	%cl, %esi
    434 	shr	%cl, %edi
    435 	sub	%edi, %esi
    436 	lea	-14(%ecx), %edi
    437 	jnz	L(less32bytes)
    438 
    439 	UPDATE_STRNCMP_COUNTER
    440 
    441 	movdqa	(%edx), %xmm3
    442 	pxor	%xmm0, %xmm0
    443 	mov	$16, %ecx
    444 	or	$2, %ebx
    445 	lea	2(%edx), %edi
    446 	and	$0xfff, %edi
    447 	sub	$0x1000, %edi
    448 
    449 	.p2align 4
    450 L(loop_ashr_2):
    451 	add	$16, %edi
    452 	jg	L(nibble_ashr_2)
    453 
    454 L(gobble_ashr_2):
    455 	movdqa	(%eax, %ecx), %xmm1
    456 	movdqa	(%edx, %ecx), %xmm2
    457 	movdqa	%xmm2, %xmm4
    458 
    459 	palignr	$2, %xmm3, %xmm2
    460 
    461 	pcmpeqb	%xmm1, %xmm0
    462 	pcmpeqb	%xmm2, %xmm1
    463 	psubb	%xmm0, %xmm1
    464 	pmovmskb %xmm1, %esi
    465 	sub	$0xffff, %esi
    466 	jnz	L(exit)
    467 
    468 #ifdef USE_AS_STRNCMP
    469 	cmpl	$16, %ebp
    470 	lea	-16(%ebp), %ebp
    471 	jbe	L(more8byteseq)
    472 #endif
    473 	add	$16, %ecx
    474 	movdqa	%xmm4, %xmm3
    475 
    476 	add	$16, %edi
    477 	jg	L(nibble_ashr_2)
    478 
    479 	movdqa	(%eax, %ecx), %xmm1
    480 	movdqa	(%edx, %ecx), %xmm2
    481 	movdqa	%xmm2, %xmm4
    482 
    483 	palignr	$2, %xmm3, %xmm2
    484 
    485 	pcmpeqb	%xmm1, %xmm0
    486 	pcmpeqb	%xmm2, %xmm1
    487 	psubb	%xmm0, %xmm1
    488 	pmovmskb %xmm1, %esi
    489 	sub	$0xffff, %esi
    490 	jnz	L(exit)
    491 
    492 #ifdef USE_AS_STRNCMP
    493 	cmpl	$16, %ebp
    494 	lea	-16(%ebp), %ebp
    495 	jbe	L(more8byteseq)
    496 #endif
    497 	add	$16, %ecx
    498 	movdqa	%xmm4, %xmm3
    499 	jmp	L(loop_ashr_2)
    500 
    501 	.p2align 4
    502 L(nibble_ashr_2):
    503 	pcmpeqb	%xmm3, %xmm0
    504 	pmovmskb %xmm0, %esi
    505 	test	$0xfffc, %esi
    506 	jnz	L(ashr_2_exittail)
    507 
    508 #ifdef USE_AS_STRNCMP
    509 	cmpl	$14, %ebp
    510 	jbe	L(ashr_2_exittail)
    511 #endif
    512 
    513 	pxor	%xmm0, %xmm0
    514 	sub	$0x1000, %edi
    515 	jmp	L(gobble_ashr_2)
    516 
    517 	.p2align 4
    518 L(ashr_2_exittail):
    519 	movdqa	(%eax, %ecx), %xmm1
    520 	psrldq	$2, %xmm0
    521 	psrldq	$2, %xmm3
    522 	jmp	L(aftertail)
    523 
    524 /*
    525  * The following cases will be handled by ashr_3
    526  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    527  *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3
    528  */
    529 	.p2align 4
    530 L(ashr_3):
    531 	mov	$0xffff, %esi
    532 	pxor	%xmm0, %xmm0
    533 	movdqa	(%edx), %xmm2
    534 	movdqa	(%eax), %xmm1
    535 	pcmpeqb	%xmm1, %xmm0
    536 	pslldq	$13, %xmm2
    537 	pcmpeqb	%xmm1, %xmm2
    538 	psubb	%xmm0, %xmm2
    539 	pmovmskb %xmm2, %edi
    540 	shr	%cl, %esi
    541 	shr	%cl, %edi
    542 	sub	%edi, %esi
    543 	lea	-13(%ecx), %edi
    544 	jnz	L(less32bytes)
    545 
    546 	UPDATE_STRNCMP_COUNTER
    547 
    548 	movdqa	(%edx), %xmm3
    549 	pxor	%xmm0, %xmm0
    550 	mov	$16, %ecx
    551 	or	$3, %ebx
    552 	lea	3(%edx), %edi
    553 	and	$0xfff, %edi
    554 	sub	$0x1000, %edi
    555 
    556 	.p2align 4
    557 L(loop_ashr_3):
    558 	add	$16, %edi
    559 	jg	L(nibble_ashr_3)
    560 
    561 L(gobble_ashr_3):
    562 	movdqa	(%eax, %ecx), %xmm1
    563 	movdqa	(%edx, %ecx), %xmm2
    564 	movdqa	%xmm2, %xmm4
    565 
    566 	palignr	$3, %xmm3, %xmm2
    567 
    568 	pcmpeqb	%xmm1, %xmm0
    569 	pcmpeqb	%xmm2, %xmm1
    570 	psubb	%xmm0, %xmm1
    571 	pmovmskb %xmm1, %esi
    572 	sub	$0xffff, %esi
    573 	jnz	L(exit)
    574 
    575 #ifdef USE_AS_STRNCMP
    576 	cmpl	$16, %ebp
    577 	lea	-16(%ebp), %ebp
    578 	jbe	L(more8byteseq)
    579 #endif
    580 	add	$16, %ecx
    581 	movdqa	%xmm4, %xmm3
    582 
    583 	add	$16, %edi
    584 	jg	L(nibble_ashr_3)
    585 
    586 	movdqa	(%eax, %ecx), %xmm1
    587 	movdqa	(%edx, %ecx), %xmm2
    588 	movdqa	%xmm2, %xmm4
    589 
    590 	palignr	$3, %xmm3, %xmm2
    591 
    592 	pcmpeqb	%xmm1, %xmm0
    593 	pcmpeqb	%xmm2, %xmm1
    594 	psubb	%xmm0, %xmm1
    595 	pmovmskb %xmm1, %esi
    596 	sub	$0xffff, %esi
    597 	jnz	L(exit)
    598 
    599 #ifdef USE_AS_STRNCMP
    600 	cmpl	$16, %ebp
    601 	lea	-16(%ebp), %ebp
    602 	jbe	L(more8byteseq)
    603 #endif
    604 	add	$16, %ecx
    605 	movdqa	%xmm4, %xmm3
    606 	jmp	L(loop_ashr_3)
    607 
    608 	.p2align 4
    609 L(nibble_ashr_3):
    610 	pcmpeqb	%xmm3, %xmm0
    611 	pmovmskb %xmm0, %esi
    612 	test	$0xfff8, %esi
    613 	jnz	L(ashr_3_exittail)
    614 
    615 #ifdef USE_AS_STRNCMP
    616 	cmpl	$13, %ebp
    617 	jbe	L(ashr_3_exittail)
    618 #endif
    619 	pxor	%xmm0, %xmm0
    620 	sub	$0x1000, %edi
    621 	jmp	L(gobble_ashr_3)
    622 
    623 	.p2align 4
    624 L(ashr_3_exittail):
    625 	movdqa	(%eax, %ecx), %xmm1
    626 	psrldq	$3, %xmm0
    627 	psrldq	$3, %xmm3
    628 	jmp	L(aftertail)
    629 
    630 /*
    631  * The following cases will be handled by ashr_4
    632  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    633  *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4
    634  */
    635 	.p2align 4
    636 L(ashr_4):
    637 	mov	$0xffff, %esi
    638 	pxor	%xmm0, %xmm0
    639 	movdqa	(%edx), %xmm2
    640 	movdqa	(%eax), %xmm1
    641 	pcmpeqb	%xmm1, %xmm0
    642 	pslldq	$12, %xmm2
    643 	pcmpeqb	%xmm1, %xmm2
    644 	psubb	%xmm0, %xmm2
    645 	pmovmskb %xmm2, %edi
    646 	shr	%cl, %esi
    647 	shr	%cl, %edi
    648 	sub	%edi, %esi
    649 	lea	-12(%ecx), %edi
    650 	jnz	L(less32bytes)
    651 
    652 	UPDATE_STRNCMP_COUNTER
    653 
    654 	movdqa	(%edx), %xmm3
    655 	pxor	%xmm0, %xmm0
    656 	mov	$16, %ecx
    657 	or	$4, %ebx
    658 	lea	4(%edx), %edi
    659 	and	$0xfff, %edi
    660 	sub	$0x1000, %edi
    661 
    662 	.p2align 4
    663 L(loop_ashr_4):
    664 	add	$16, %edi
    665 	jg	L(nibble_ashr_4)
    666 
    667 L(gobble_ashr_4):
    668 	movdqa	(%eax, %ecx), %xmm1
    669 	movdqa	(%edx, %ecx), %xmm2
    670 	movdqa	%xmm2, %xmm4
    671 
    672 	palignr	$4, %xmm3, %xmm2
    673 
    674 	pcmpeqb	%xmm1, %xmm0
    675 	pcmpeqb	%xmm2, %xmm1
    676 	psubb	%xmm0, %xmm1
    677 	pmovmskb %xmm1, %esi
    678 	sub	$0xffff, %esi
    679 	jnz	L(exit)
    680 
    681 #ifdef USE_AS_STRNCMP
    682 	cmpl	$16, %ebp
    683 	lea	-16(%ebp), %ebp
    684 	jbe	L(more8byteseq)
    685 #endif
    686 
    687 	add	$16, %ecx
    688 	movdqa	%xmm4, %xmm3
    689 
    690 	add	$16, %edi
    691 	jg	L(nibble_ashr_4)
    692 
    693 	movdqa	(%eax, %ecx), %xmm1
    694 	movdqa	(%edx, %ecx), %xmm2
    695 	movdqa	%xmm2, %xmm4
    696 
    697 	palignr	$4, %xmm3, %xmm2
    698 
    699 	pcmpeqb	%xmm1, %xmm0
    700 	pcmpeqb	%xmm2, %xmm1
    701 	psubb	%xmm0, %xmm1
    702 	pmovmskb %xmm1, %esi
    703 	sub	$0xffff, %esi
    704 	jnz	L(exit)
    705 
    706 #ifdef USE_AS_STRNCMP
    707 	cmpl	$16, %ebp
    708 	lea	-16(%ebp), %ebp
    709 	jbe	L(more8byteseq)
    710 #endif
    711 
    712 	add	$16, %ecx
    713 	movdqa	%xmm4, %xmm3
    714 	jmp	L(loop_ashr_4)
    715 
    716 	.p2align 4
    717 L(nibble_ashr_4):
    718 	pcmpeqb	%xmm3, %xmm0
    719 	pmovmskb %xmm0, %esi
    720 	test	$0xfff0, %esi
    721 	jnz	L(ashr_4_exittail)
    722 
    723 #ifdef USE_AS_STRNCMP
    724 	cmpl	$12, %ebp
    725 	jbe	L(ashr_4_exittail)
    726 #endif
    727 
    728 	pxor	%xmm0, %xmm0
    729 	sub	$0x1000, %edi
    730 	jmp	L(gobble_ashr_4)
    731 
    732 	.p2align 4
    733 L(ashr_4_exittail):
    734 	movdqa	(%eax, %ecx), %xmm1
    735 	psrldq	$4, %xmm0
    736 	psrldq	$4, %xmm3
    737 	jmp	L(aftertail)
    738 
    739 /*
    740  * The following cases will be handled by ashr_5
    741  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    742  *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5
    743  */
    744 	.p2align 4
    745 L(ashr_5):
    746 	mov	$0xffff, %esi
    747 	pxor	%xmm0, %xmm0
    748 	movdqa	(%edx), %xmm2
    749 	movdqa	(%eax), %xmm1
    750 	pcmpeqb	%xmm1, %xmm0
    751 	pslldq	$11, %xmm2
    752 	pcmpeqb	%xmm1, %xmm2
    753 	psubb	%xmm0, %xmm2
    754 	pmovmskb %xmm2, %edi
    755 	shr	%cl, %esi
    756 	shr	%cl, %edi
    757 	sub	%edi, %esi
    758 	lea	-11(%ecx), %edi
    759 	jnz	L(less32bytes)
    760 
    761 	UPDATE_STRNCMP_COUNTER
    762 
    763 	movdqa	(%edx), %xmm3
    764 	pxor	%xmm0, %xmm0
    765 	mov	$16, %ecx
    766 	or	$5, %ebx
    767 	lea	5(%edx), %edi
    768 	and	$0xfff, %edi
    769 	sub	$0x1000, %edi
    770 
    771 	.p2align 4
    772 L(loop_ashr_5):
    773 	add	$16, %edi
    774 	jg	L(nibble_ashr_5)
    775 
    776 L(gobble_ashr_5):
    777 	movdqa	(%eax, %ecx), %xmm1
    778 	movdqa	(%edx, %ecx), %xmm2
    779 	movdqa	%xmm2, %xmm4
    780 
    781 	palignr	$5, %xmm3, %xmm2
    782 
    783 	pcmpeqb	%xmm1, %xmm0
    784 	pcmpeqb	%xmm2, %xmm1
    785 	psubb	%xmm0, %xmm1
    786 	pmovmskb %xmm1, %esi
    787 	sub	$0xffff, %esi
    788 	jnz	L(exit)
    789 
    790 #ifdef USE_AS_STRNCMP
    791 	cmpl	$16, %ebp
    792 	lea	-16(%ebp), %ebp
    793 	jbe	L(more8byteseq)
    794 #endif
    795 	add	$16, %ecx
    796 	movdqa	%xmm4, %xmm3
    797 
    798 	add	$16, %edi
    799 	jg	L(nibble_ashr_5)
    800 
    801 	movdqa	(%eax, %ecx), %xmm1
    802 	movdqa	(%edx, %ecx), %xmm2
    803 	movdqa	%xmm2, %xmm4
    804 
    805 	palignr	$5, %xmm3, %xmm2
    806 
    807 	pcmpeqb	%xmm1, %xmm0
    808 	pcmpeqb	%xmm2, %xmm1
    809 	psubb	%xmm0, %xmm1
    810 	pmovmskb %xmm1, %esi
    811 	sub	$0xffff, %esi
    812 	jnz	L(exit)
    813 
    814 #ifdef USE_AS_STRNCMP
    815 	cmpl	$16, %ebp
    816 	lea	-16(%ebp), %ebp
    817 	jbe	L(more8byteseq)
    818 #endif
    819 	add	$16, %ecx
    820 	movdqa	%xmm4, %xmm3
    821 	jmp	L(loop_ashr_5)
    822 
    823 	.p2align 4
    824 L(nibble_ashr_5):
    825 	pcmpeqb	%xmm3, %xmm0
    826 	pmovmskb %xmm0, %esi
    827 	test	$0xffe0, %esi
    828 	jnz	L(ashr_5_exittail)
    829 
    830 #ifdef USE_AS_STRNCMP
    831 	cmpl	$11, %ebp
    832 	jbe	L(ashr_5_exittail)
    833 #endif
    834 	pxor	%xmm0, %xmm0
    835 	sub	$0x1000, %edi
    836 	jmp	L(gobble_ashr_5)
    837 
    838 	.p2align 4
    839 L(ashr_5_exittail):
    840 	movdqa	(%eax, %ecx), %xmm1
    841 	psrldq	$5, %xmm0
    842 	psrldq	$5, %xmm3
    843 	jmp	L(aftertail)
    844 
    845 /*
    846  * The following cases will be handled by ashr_6
    847  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    848  *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6
    849  */
    850 
    851 	.p2align 4
    852 L(ashr_6):
    853 	mov	$0xffff, %esi
    854 	pxor	%xmm0, %xmm0
    855 	movdqa	(%edx), %xmm2
    856 	movdqa	(%eax), %xmm1
    857 	pcmpeqb	%xmm1, %xmm0
    858 	pslldq	$10, %xmm2
    859 	pcmpeqb	%xmm1, %xmm2
    860 	psubb	%xmm0, %xmm2
    861 	pmovmskb %xmm2, %edi
    862 	shr	%cl, %esi
    863 	shr	%cl, %edi
    864 	sub	%edi, %esi
    865 	lea	-10(%ecx), %edi
    866 	jnz	L(less32bytes)
    867 
    868 	UPDATE_STRNCMP_COUNTER
    869 
    870 	movdqa	(%edx), %xmm3
    871 	pxor	%xmm0, %xmm0
    872 	mov	$16, %ecx
    873 	or	$6, %ebx
    874 	lea	6(%edx), %edi
    875 	and	$0xfff, %edi
    876 	sub	$0x1000, %edi
    877 
    878 	.p2align 4
    879 L(loop_ashr_6):
    880 	add	$16, %edi
    881 	jg	L(nibble_ashr_6)
    882 
    883 L(gobble_ashr_6):
    884 	movdqa	(%eax, %ecx), %xmm1
    885 	movdqa	(%edx, %ecx), %xmm2
    886 	movdqa	%xmm2, %xmm4
    887 
    888 	palignr	$6, %xmm3, %xmm2
    889 
    890 	pcmpeqb	%xmm1, %xmm0
    891 	pcmpeqb	%xmm2, %xmm1
    892 	psubb	%xmm0, %xmm1
    893 	pmovmskb %xmm1, %esi
    894 	sub	$0xffff, %esi
    895 	jnz	L(exit)
    896 
    897 #ifdef USE_AS_STRNCMP
    898 	cmpl	$16, %ebp
    899 	lea	-16(%ebp), %ebp
    900 	jbe	L(more8byteseq)
    901 #endif
    902 
    903 	add	$16, %ecx
    904 	movdqa	%xmm4, %xmm3
    905 
    906 	add	$16, %edi
    907 	jg	L(nibble_ashr_6)
    908 
    909 	movdqa	(%eax, %ecx), %xmm1
    910 	movdqa	(%edx, %ecx), %xmm2
    911 	movdqa	%xmm2, %xmm4
    912 
    913 	palignr	$6, %xmm3, %xmm2
    914 
    915 	pcmpeqb	%xmm1, %xmm0
    916 	pcmpeqb	%xmm2, %xmm1
    917 	psubb	%xmm0, %xmm1
    918 	pmovmskb %xmm1, %esi
    919 	sub	$0xffff, %esi
    920 	jnz	L(exit)
    921 #ifdef USE_AS_STRNCMP
    922 	cmpl	$16, %ebp
    923 	lea	-16(%ebp), %ebp
    924 	jbe	L(more8byteseq)
    925 #endif
    926 
    927 	add	$16, %ecx
    928 	movdqa	%xmm4, %xmm3
    929 	jmp	L(loop_ashr_6)
    930 
    931 	.p2align 4
    932 L(nibble_ashr_6):
    933 	pcmpeqb	%xmm3, %xmm0
    934 	pmovmskb %xmm0, %esi
    935 	test	$0xffc0, %esi
    936 	jnz	L(ashr_6_exittail)
    937 
    938 #ifdef USE_AS_STRNCMP
    939 	cmpl	$10, %ebp
    940 	jbe	L(ashr_6_exittail)
    941 #endif
    942 	pxor	%xmm0, %xmm0
    943 	sub	$0x1000, %edi
    944 	jmp	L(gobble_ashr_6)
    945 
    946 	.p2align 4
    947 L(ashr_6_exittail):
    948 	movdqa	(%eax, %ecx), %xmm1
    949 	psrldq	$6, %xmm0
    950 	psrldq	$6, %xmm3
    951 	jmp	L(aftertail)
    952 
    953 /*
    954  * The following cases will be handled by ashr_7
    955  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    956  *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7
    957  */
    958 
    959 	.p2align 4
    960 L(ashr_7):
    961 	mov	$0xffff, %esi
    962 	pxor	%xmm0, %xmm0
    963 	movdqa	(%edx), %xmm2
    964 	movdqa	(%eax), %xmm1
    965 	pcmpeqb	%xmm1, %xmm0
    966 	pslldq	$9, %xmm2
    967 	pcmpeqb	%xmm1, %xmm2
    968 	psubb	%xmm0, %xmm2
    969 	pmovmskb %xmm2, %edi
    970 	shr	%cl, %esi
    971 	shr	%cl, %edi
    972 	sub	%edi, %esi
    973 	lea	-9(%ecx), %edi
    974 	jnz	L(less32bytes)
    975 
    976 	UPDATE_STRNCMP_COUNTER
    977 
    978 	movdqa	(%edx), %xmm3
    979 	pxor	%xmm0, %xmm0
    980 	mov	$16, %ecx
    981 	or	$7, %ebx
    982 	lea	8(%edx), %edi
    983 	and	$0xfff, %edi
    984 	sub	$0x1000, %edi
    985 
    986 	.p2align 4
    987 L(loop_ashr_7):
    988 	add	$16, %edi
    989 	jg	L(nibble_ashr_7)
    990 
    991 L(gobble_ashr_7):
    992 	movdqa	(%eax, %ecx), %xmm1
    993 	movdqa	(%edx, %ecx), %xmm2
    994 	movdqa	%xmm2, %xmm4
    995 
    996 	palignr	$7, %xmm3, %xmm2
    997 
    998 	pcmpeqb	%xmm1, %xmm0
    999 	pcmpeqb	%xmm2, %xmm1
   1000 	psubb	%xmm0, %xmm1
   1001 	pmovmskb %xmm1, %esi
   1002 	sub	$0xffff, %esi
   1003 	jnz	L(exit)
   1004 
   1005 #ifdef USE_AS_STRNCMP
   1006 	cmpl	$16, %ebp
   1007 	lea	-16(%ebp), %ebp
   1008 	jbe	L(more8byteseq)
   1009 #endif
   1010 
   1011 	add	$16, %ecx
   1012 	movdqa	%xmm4, %xmm3
   1013 
   1014 	add	$16, %edi
   1015 	jg	L(nibble_ashr_7)
   1016 
   1017 	movdqa	(%eax, %ecx), %xmm1
   1018 	movdqa	(%edx, %ecx), %xmm2
   1019 	movdqa	%xmm2, %xmm4
   1020 
   1021 	palignr	$7, %xmm3, %xmm2
   1022 
   1023 	pcmpeqb	%xmm1, %xmm0
   1024 	pcmpeqb	%xmm2, %xmm1
   1025 	psubb	%xmm0, %xmm1
   1026 	pmovmskb %xmm1, %esi
   1027 	sub	$0xffff, %esi
   1028 	jnz	L(exit)
   1029 
   1030 #ifdef USE_AS_STRNCMP
   1031 	cmpl	$16, %ebp
   1032 	lea	-16(%ebp), %ebp
   1033 	jbe	L(more8byteseq)
   1034 #endif
   1035 
   1036 	add	$16, %ecx
   1037 	movdqa	%xmm4, %xmm3
   1038 	jmp	L(loop_ashr_7)
   1039 
   1040 	.p2align 4
   1041 L(nibble_ashr_7):
   1042 	pcmpeqb	%xmm3, %xmm0
   1043 	pmovmskb %xmm0, %esi
   1044 	test	$0xff80, %esi
   1045 	jnz	L(ashr_7_exittail)
   1046 
   1047 #ifdef USE_AS_STRNCMP
   1048 	cmpl	$9, %ebp
   1049 	jbe	L(ashr_7_exittail)
   1050 #endif
   1051 	pxor	%xmm0, %xmm0
   1052 	pxor	%xmm0, %xmm0
   1053 	sub	$0x1000, %edi
   1054 	jmp	L(gobble_ashr_7)
   1055 
   1056 	.p2align 4
   1057 L(ashr_7_exittail):
   1058 	movdqa	(%eax, %ecx), %xmm1
   1059 	psrldq	$7, %xmm0
   1060 	psrldq	$7, %xmm3
   1061 	jmp	L(aftertail)
   1062 
   1063 /*
   1064  * The following cases will be handled by ashr_8
   1065  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1066  *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8
   1067  */
   1068 	.p2align 4
   1069 L(ashr_8):
   1070 	mov	$0xffff, %esi
   1071 	pxor	%xmm0, %xmm0
   1072 	movdqa	(%edx), %xmm2
   1073 	movdqa	(%eax), %xmm1
   1074 	pcmpeqb	%xmm1, %xmm0
   1075 	pslldq	$8, %xmm2
   1076 	pcmpeqb	%xmm1, %xmm2
   1077 	psubb	%xmm0, %xmm2
   1078 	pmovmskb %xmm2, %edi
   1079 	shr	%cl, %esi
   1080 	shr	%cl, %edi
   1081 	sub	%edi, %esi
   1082 	lea	-8(%ecx), %edi
   1083 	jnz	L(less32bytes)
   1084 
   1085 	UPDATE_STRNCMP_COUNTER
   1086 
   1087 	movdqa	(%edx), %xmm3
   1088 	pxor	%xmm0, %xmm0
   1089 	mov	$16, %ecx
   1090 	or	$8, %ebx
   1091 	lea	8(%edx), %edi
   1092 	and	$0xfff, %edi
   1093 	sub	$0x1000, %edi
   1094 
   1095 	.p2align 4
   1096 L(loop_ashr_8):
   1097 	add	$16, %edi
   1098 	jg	L(nibble_ashr_8)
   1099 
   1100 L(gobble_ashr_8):
   1101 	movdqa	(%eax, %ecx), %xmm1
   1102 	movdqa	(%edx, %ecx), %xmm2
   1103 	movdqa	%xmm2, %xmm4
   1104 
   1105 	palignr	$8, %xmm3, %xmm2
   1106 
   1107 	pcmpeqb	%xmm1, %xmm0
   1108 	pcmpeqb	%xmm2, %xmm1
   1109 	psubb	%xmm0, %xmm1
   1110 	pmovmskb %xmm1, %esi
   1111 	sub	$0xffff, %esi
   1112 	jnz	L(exit)
   1113 
   1114 #ifdef USE_AS_STRNCMP
   1115 	cmpl	$16, %ebp
   1116 	lea	-16(%ebp), %ebp
   1117 	jbe	L(more8byteseq)
   1118 #endif
   1119 	add	$16, %ecx
   1120 	movdqa	%xmm4, %xmm3
   1121 
   1122 	add	$16, %edi
   1123 	jg	L(nibble_ashr_8)
   1124 
   1125 	movdqa	(%eax, %ecx), %xmm1
   1126 	movdqa	(%edx, %ecx), %xmm2
   1127 	movdqa	%xmm2, %xmm4
   1128 
   1129 	palignr	$8, %xmm3, %xmm2
   1130 
   1131 	pcmpeqb	%xmm1, %xmm0
   1132 	pcmpeqb	%xmm2, %xmm1
   1133 	psubb	%xmm0, %xmm1
   1134 	pmovmskb %xmm1, %esi
   1135 	sub	$0xffff, %esi
   1136 	jnz	L(exit)
   1137 
   1138 #ifdef USE_AS_STRNCMP
   1139 	cmpl	$16, %ebp
   1140 	lea	-16(%ebp), %ebp
   1141 	jbe	L(more8byteseq)
   1142 #endif
   1143 	add	$16, %ecx
   1144 	movdqa	%xmm4, %xmm3
   1145 	jmp	L(loop_ashr_8)
   1146 
   1147 	.p2align 4
   1148 L(nibble_ashr_8):
   1149 	pcmpeqb	%xmm3, %xmm0
   1150 	pmovmskb %xmm0, %esi
   1151 	test	$0xff00, %esi
   1152 	jnz	L(ashr_8_exittail)
   1153 
   1154 #ifdef USE_AS_STRNCMP
   1155 	cmpl	$8, %ebp
   1156 	jbe	L(ashr_8_exittail)
   1157 #endif
   1158 	pxor	%xmm0, %xmm0
   1159 	pxor	%xmm0, %xmm0
   1160 	sub	$0x1000, %edi
   1161 	jmp	L(gobble_ashr_8)
   1162 
   1163 	.p2align 4
   1164 L(ashr_8_exittail):
   1165 	movdqa	(%eax, %ecx), %xmm1
   1166 	psrldq	$8, %xmm0
   1167 	psrldq	$8, %xmm3
   1168 	jmp	L(aftertail)
   1169 
   1170 /*
   1171  * The following cases will be handled by ashr_9
   1172  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1173  *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9
   1174  */
   1175 	.p2align 4
   1176 L(ashr_9):
   1177 	mov	$0xffff, %esi
   1178 	pxor	%xmm0, %xmm0
   1179 	movdqa	(%edx), %xmm2
   1180 	movdqa	(%eax), %xmm1
   1181 	pcmpeqb	%xmm1, %xmm0
   1182 	pslldq	$7, %xmm2
   1183 	pcmpeqb	%xmm1, %xmm2
   1184 	psubb	%xmm0, %xmm2
   1185 	pmovmskb %xmm2, %edi
   1186 	shr	%cl, %esi
   1187 	shr	%cl, %edi
   1188 	sub	%edi, %esi
   1189 	lea	-7(%ecx), %edi
   1190 	jnz	L(less32bytes)
   1191 
   1192 	UPDATE_STRNCMP_COUNTER
   1193 
   1194 	movdqa	(%edx), %xmm3
   1195 	pxor	%xmm0, %xmm0
   1196 	mov	$16, %ecx
   1197 	or	$9, %ebx
   1198 	lea	9(%edx), %edi
   1199 	and	$0xfff, %edi
   1200 	sub	$0x1000, %edi
   1201 
   1202 	.p2align 4
   1203 L(loop_ashr_9):
   1204 	add	$16, %edi
   1205 	jg	L(nibble_ashr_9)
   1206 
   1207 L(gobble_ashr_9):
   1208 	movdqa	(%eax, %ecx), %xmm1
   1209 	movdqa	(%edx, %ecx), %xmm2
   1210 	movdqa	%xmm2, %xmm4
   1211 
   1212 	palignr	$9, %xmm3, %xmm2
   1213 
   1214 	pcmpeqb	%xmm1, %xmm0
   1215 	pcmpeqb	%xmm2, %xmm1
   1216 	psubb	%xmm0, %xmm1
   1217 	pmovmskb %xmm1, %esi
   1218 	sub	$0xffff, %esi
   1219 	jnz	L(exit)
   1220 
   1221 #ifdef USE_AS_STRNCMP
   1222 	cmpl	$16, %ebp
   1223 	lea	-16(%ebp), %ebp
   1224 	jbe	L(more8byteseq)
   1225 #endif
   1226 	add	$16, %ecx
   1227 	movdqa	%xmm4, %xmm3
   1228 
   1229 	add	$16, %edi
   1230 	jg	L(nibble_ashr_9)
   1231 
   1232 	movdqa	(%eax, %ecx), %xmm1
   1233 	movdqa	(%edx, %ecx), %xmm2
   1234 	movdqa	%xmm2, %xmm4
   1235 
   1236 	palignr	$9, %xmm3, %xmm2
   1237 
   1238 	pcmpeqb	%xmm1, %xmm0
   1239 	pcmpeqb	%xmm2, %xmm1
   1240 	psubb	%xmm0, %xmm1
   1241 	pmovmskb %xmm1, %esi
   1242 	sub	$0xffff, %esi
   1243 	jnz	L(exit)
   1244 
   1245 #ifdef USE_AS_STRNCMP
   1246 	cmpl	$16, %ebp
   1247 	lea	-16(%ebp), %ebp
   1248 	jbe	L(more8byteseq)
   1249 #endif
   1250 	add	$16, %ecx
   1251 	movdqa	%xmm4, %xmm3
   1252 	jmp	L(loop_ashr_9)
   1253 
   1254 	.p2align 4
   1255 L(nibble_ashr_9):
   1256 	pcmpeqb	%xmm3, %xmm0
   1257 	pmovmskb %xmm0, %esi
   1258 	test	$0xfe00, %esi
   1259 	jnz	L(ashr_9_exittail)
   1260 
   1261 #ifdef USE_AS_STRNCMP
   1262 	cmpl	$7, %ebp
   1263 	jbe	L(ashr_9_exittail)
   1264 #endif
   1265 	pxor	%xmm0, %xmm0
   1266 	sub	$0x1000, %edi
   1267 	jmp	L(gobble_ashr_9)
   1268 
   1269 	.p2align 4
   1270 L(ashr_9_exittail):
   1271 	movdqa	(%eax, %ecx), %xmm1
   1272 	psrldq	$9, %xmm0
   1273 	psrldq	$9, %xmm3
   1274 	jmp	L(aftertail)
   1275 
   1276 /*
   1277  * The following cases will be handled by ashr_10
   1278  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1279  *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10
   1280  */
   1281 	.p2align 4
   1282 L(ashr_10):
   1283 	mov	$0xffff, %esi
   1284 	pxor	%xmm0, %xmm0
   1285 	movdqa	(%edx), %xmm2
   1286 	movdqa	(%eax), %xmm1
   1287 	pcmpeqb	%xmm1, %xmm0
   1288 	pslldq	$6, %xmm2
   1289 	pcmpeqb	%xmm1, %xmm2
   1290 	psubb	%xmm0, %xmm2
   1291 	pmovmskb %xmm2, %edi
   1292 	shr	%cl, %esi
   1293 	shr	%cl, %edi
   1294 	sub	%edi, %esi
   1295 	lea	-6(%ecx), %edi
   1296 	jnz	L(less32bytes)
   1297 
   1298 	UPDATE_STRNCMP_COUNTER
   1299 
   1300 	movdqa	(%edx), %xmm3
   1301 	pxor	%xmm0, %xmm0
   1302 	mov	$16, %ecx
   1303 	or	$10, %ebx
   1304 	lea	10(%edx), %edi
   1305 	and	$0xfff, %edi
   1306 	sub	$0x1000, %edi
   1307 
   1308 	.p2align 4
   1309 L(loop_ashr_10):
   1310 	add	$16, %edi
   1311 	jg	L(nibble_ashr_10)
   1312 
   1313 L(gobble_ashr_10):
   1314 	movdqa	(%eax, %ecx), %xmm1
   1315 	movdqa	(%edx, %ecx), %xmm2
   1316 	movdqa	%xmm2, %xmm4
   1317 
   1318 	palignr	$10, %xmm3, %xmm2
   1319 
   1320 	pcmpeqb	%xmm1, %xmm0
   1321 	pcmpeqb	%xmm2, %xmm1
   1322 	psubb	%xmm0, %xmm1
   1323 	pmovmskb %xmm1, %esi
   1324 	sub	$0xffff, %esi
   1325 	jnz	L(exit)
   1326 
   1327 #ifdef USE_AS_STRNCMP
   1328 	cmpl	$16, %ebp
   1329 	lea	-16(%ebp), %ebp
   1330 	jbe	L(more8byteseq)
   1331 #endif
   1332 	add	$16, %ecx
   1333 	movdqa	%xmm4, %xmm3
   1334 
   1335 	add	$16, %edi
   1336 	jg	L(nibble_ashr_10)
   1337 
   1338 	movdqa	(%eax, %ecx), %xmm1
   1339 	movdqa	(%edx, %ecx), %xmm2
   1340 	movdqa	%xmm2, %xmm4
   1341 
   1342 	palignr	$10, %xmm3, %xmm2
   1343 
   1344 	pcmpeqb	%xmm1, %xmm0
   1345 	pcmpeqb	%xmm2, %xmm1
   1346 	psubb	%xmm0, %xmm1
   1347 	pmovmskb %xmm1, %esi
   1348 	sub	$0xffff, %esi
   1349 	jnz	L(exit)
   1350 
   1351 #ifdef USE_AS_STRNCMP
   1352 	cmpl	$16, %ebp
   1353 	lea	-16(%ebp), %ebp
   1354 	jbe	L(more8byteseq)
   1355 #endif
   1356 	add	$16, %ecx
   1357 	movdqa	%xmm4, %xmm3
   1358 	jmp	L(loop_ashr_10)
   1359 
   1360 	.p2align 4
   1361 L(nibble_ashr_10):
   1362 	pcmpeqb	%xmm3, %xmm0
   1363 	pmovmskb %xmm0, %esi
   1364 	test	$0xfc00, %esi
   1365 	jnz	L(ashr_10_exittail)
   1366 
   1367 #ifdef USE_AS_STRNCMP
   1368 	cmpl	$6, %ebp
   1369 	jbe	L(ashr_10_exittail)
   1370 #endif
   1371 	pxor	%xmm0, %xmm0
   1372 	sub	$0x1000, %edi
   1373 	jmp	L(gobble_ashr_10)
   1374 
   1375 	.p2align 4
   1376 L(ashr_10_exittail):
   1377 	movdqa	(%eax, %ecx), %xmm1
   1378 	psrldq	$10, %xmm0
   1379 	psrldq	$10, %xmm3
   1380 	jmp	L(aftertail)
   1381 
   1382 /*
   1383  * The following cases will be handled by ashr_11
   1384  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1385  *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11
   1386  */
   1387 	.p2align 4
   1388 L(ashr_11):
   1389 	mov	$0xffff, %esi
   1390 	pxor	%xmm0, %xmm0
   1391 	movdqa	(%edx), %xmm2
   1392 	movdqa	(%eax), %xmm1
   1393 	pcmpeqb	%xmm1, %xmm0
   1394 	pslldq	$5, %xmm2
   1395 	pcmpeqb	%xmm1, %xmm2
   1396 	psubb	%xmm0, %xmm2
   1397 	pmovmskb %xmm2, %edi
   1398 	shr	%cl, %esi
   1399 	shr	%cl, %edi
   1400 	sub	%edi, %esi
   1401 	lea	-5(%ecx), %edi
   1402 	jnz	L(less32bytes)
   1403 
   1404 	UPDATE_STRNCMP_COUNTER
   1405 
   1406 	movdqa	(%edx), %xmm3
   1407 	pxor	%xmm0, %xmm0
   1408 	mov	$16, %ecx
   1409 	or	$11, %ebx
   1410 	lea	11(%edx), %edi
   1411 	and	$0xfff, %edi
   1412 	sub	$0x1000, %edi
   1413 
   1414 	.p2align 4
   1415 L(loop_ashr_11):
   1416 	add	$16, %edi
   1417 	jg	L(nibble_ashr_11)
   1418 
   1419 L(gobble_ashr_11):
   1420 	movdqa	(%eax, %ecx), %xmm1
   1421 	movdqa	(%edx, %ecx), %xmm2
   1422 	movdqa	%xmm2, %xmm4
   1423 
   1424 	palignr	$11, %xmm3, %xmm2
   1425 
   1426 	pcmpeqb	%xmm1, %xmm0
   1427 	pcmpeqb	%xmm2, %xmm1
   1428 	psubb	%xmm0, %xmm1
   1429 	pmovmskb %xmm1, %esi
   1430 	sub	$0xffff, %esi
   1431 	jnz	L(exit)
   1432 
   1433 #ifdef USE_AS_STRNCMP
   1434 	cmpl	$16, %ebp
   1435 	lea	-16(%ebp), %ebp
   1436 	jbe	L(more8byteseq)
   1437 #endif
   1438 	add	$16, %ecx
   1439 	movdqa	%xmm4, %xmm3
   1440 
   1441 	add	$16, %edi
   1442 	jg	L(nibble_ashr_11)
   1443 
   1444 	movdqa	(%eax, %ecx), %xmm1
   1445 	movdqa	(%edx, %ecx), %xmm2
   1446 	movdqa	%xmm2, %xmm4
   1447 
   1448 	palignr	$11, %xmm3, %xmm2
   1449 
   1450 	pcmpeqb	%xmm1, %xmm0
   1451 	pcmpeqb	%xmm2, %xmm1
   1452 	psubb	%xmm0, %xmm1
   1453 	pmovmskb %xmm1, %esi
   1454 	sub	$0xffff, %esi
   1455 	jnz	L(exit)
   1456 
   1457 #ifdef USE_AS_STRNCMP
   1458 	cmpl	$16, %ebp
   1459 	lea	-16(%ebp), %ebp
   1460 	jbe	L(more8byteseq)
   1461 #endif
   1462 	add	$16, %ecx
   1463 	movdqa	%xmm4, %xmm3
   1464 	jmp	L(loop_ashr_11)
   1465 
   1466 	.p2align 4
   1467 L(nibble_ashr_11):
   1468 	pcmpeqb	%xmm3, %xmm0
   1469 	pmovmskb %xmm0, %esi
   1470 	test	$0xf800, %esi
   1471 	jnz	L(ashr_11_exittail)
   1472 
   1473 #ifdef USE_AS_STRNCMP
   1474 	cmpl	$5, %ebp
   1475 	jbe	L(ashr_11_exittail)
   1476 #endif
   1477 	pxor	%xmm0, %xmm0
   1478 	sub	$0x1000, %edi
   1479 	jmp	L(gobble_ashr_11)
   1480 
   1481 	.p2align 4
   1482 L(ashr_11_exittail):
   1483 	movdqa	(%eax, %ecx), %xmm1
   1484 	psrldq	$11, %xmm0
   1485 	psrldq	$11, %xmm3
   1486 	jmp	L(aftertail)
   1487 
   1488 /*
   1489  * The following cases will be handled by ashr_12
   1490  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1491  *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12
   1492  */
   1493 	.p2align 4
   1494 L(ashr_12):
   1495 	mov	$0xffff, %esi
   1496 	pxor	%xmm0, %xmm0
   1497 	movdqa	(%edx), %xmm2
   1498 	movdqa	(%eax), %xmm1
   1499 	pcmpeqb	%xmm1, %xmm0
   1500 	pslldq	$4, %xmm2
   1501 	pcmpeqb	%xmm1, %xmm2
   1502 	psubb	%xmm0, %xmm2
   1503 	pmovmskb %xmm2, %edi
   1504 	shr	%cl, %esi
   1505 	shr	%cl, %edi
   1506 	sub	%edi, %esi
   1507 	lea	-4(%ecx), %edi
   1508 	jnz	L(less32bytes)
   1509 
   1510 	UPDATE_STRNCMP_COUNTER
   1511 
   1512 	movdqa	(%edx), %xmm3
   1513 	pxor	%xmm0, %xmm0
   1514 	mov	$16, %ecx
   1515 	or	$12, %ebx
   1516 	lea	12(%edx), %edi
   1517 	and	$0xfff, %edi
   1518 	sub	$0x1000, %edi
   1519 
   1520 	.p2align 4
   1521 L(loop_ashr_12):
   1522 	add	$16, %edi
   1523 	jg	L(nibble_ashr_12)
   1524 
   1525 L(gobble_ashr_12):
   1526 	movdqa	(%eax, %ecx), %xmm1
   1527 	movdqa	(%edx, %ecx), %xmm2
   1528 	movdqa	%xmm2, %xmm4
   1529 
   1530 	palignr	$12, %xmm3, %xmm2
   1531 
   1532 	pcmpeqb	%xmm1, %xmm0
   1533 	pcmpeqb	%xmm2, %xmm1
   1534 	psubb	%xmm0, %xmm1
   1535 	pmovmskb %xmm1, %esi
   1536 	sub	$0xffff, %esi
   1537 	jnz	L(exit)
   1538 
   1539 #ifdef USE_AS_STRNCMP
   1540 	cmpl	$16, %ebp
   1541 	lea	-16(%ebp), %ebp
   1542 	jbe	L(more8byteseq)
   1543 #endif
   1544 
   1545 	add	$16, %ecx
   1546 	movdqa	%xmm4, %xmm3
   1547 
   1548 	add	$16, %edi
   1549 	jg	L(nibble_ashr_12)
   1550 
   1551 	movdqa	(%eax, %ecx), %xmm1
   1552 	movdqa	(%edx, %ecx), %xmm2
   1553 	movdqa	%xmm2, %xmm4
   1554 
   1555 	palignr	$12, %xmm3, %xmm2
   1556 
   1557 	pcmpeqb	%xmm1, %xmm0
   1558 	pcmpeqb	%xmm2, %xmm1
   1559 	psubb	%xmm0, %xmm1
   1560 	pmovmskb %xmm1, %esi
   1561 	sub	$0xffff, %esi
   1562 	jnz	L(exit)
   1563 
   1564 #ifdef USE_AS_STRNCMP
   1565 	cmpl	$16, %ebp
   1566 	lea	-16(%ebp), %ebp
   1567 	jbe	L(more8byteseq)
   1568 #endif
   1569 	add	$16, %ecx
   1570 	movdqa	%xmm4, %xmm3
   1571 	jmp	L(loop_ashr_12)
   1572 
   1573 	.p2align 4
   1574 L(nibble_ashr_12):
   1575 	pcmpeqb	%xmm3, %xmm0
   1576 	pmovmskb %xmm0, %esi
   1577 	test	$0xf000, %esi
   1578 	jnz	L(ashr_12_exittail)
   1579 
   1580 #ifdef USE_AS_STRNCMP
   1581 	cmpl	$4, %ebp
   1582 	jbe	L(ashr_12_exittail)
   1583 #endif
   1584 	pxor	%xmm0, %xmm0
   1585 	sub	$0x1000, %edi
   1586 	jmp	L(gobble_ashr_12)
   1587 
   1588 	.p2align 4
   1589 L(ashr_12_exittail):
   1590 	movdqa	(%eax, %ecx), %xmm1
   1591 	psrldq	$12, %xmm0
   1592 	psrldq	$12, %xmm3
   1593 	jmp	L(aftertail)
   1594 
   1595 /*
   1596  * The following cases will be handled by ashr_13
   1597  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1598  *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13
   1599  */
   1600 	.p2align 4
   1601 L(ashr_13):
   1602 	mov	$0xffff, %esi
   1603 	pxor	%xmm0, %xmm0
   1604 	movdqa	(%edx), %xmm2
   1605 	movdqa	(%eax), %xmm1
   1606 	pcmpeqb	%xmm1, %xmm0
   1607 	pslldq	$3, %xmm2
   1608 	pcmpeqb	%xmm1, %xmm2
   1609 	psubb	%xmm0, %xmm2
   1610 	pmovmskb %xmm2, %edi
   1611 	shr	%cl, %esi
   1612 	shr	%cl, %edi
   1613 	sub	%edi, %esi
   1614 	lea	-3(%ecx), %edi
   1615 	jnz	L(less32bytes)
   1616 
   1617 	UPDATE_STRNCMP_COUNTER
   1618 
   1619 	movdqa	(%edx), %xmm3
   1620 	pxor	%xmm0, %xmm0
   1621 	mov	$16, %ecx
   1622 	or	$13, %ebx
   1623 	lea	13(%edx), %edi
   1624 	and	$0xfff, %edi
   1625 	sub	$0x1000, %edi
   1626 
   1627 	.p2align 4
   1628 L(loop_ashr_13):
   1629 	add	$16, %edi
   1630 	jg	L(nibble_ashr_13)
   1631 
   1632 L(gobble_ashr_13):
   1633 	movdqa	(%eax, %ecx), %xmm1
   1634 	movdqa	(%edx, %ecx), %xmm2
   1635 	movdqa	%xmm2, %xmm4
   1636 
   1637 	palignr	$13, %xmm3, %xmm2
   1638 
   1639 	pcmpeqb	%xmm1, %xmm0
   1640 	pcmpeqb	%xmm2, %xmm1
   1641 	psubb	%xmm0, %xmm1
   1642 	pmovmskb %xmm1, %esi
   1643 	sub	$0xffff, %esi
   1644 	jnz	L(exit)
   1645 
   1646 #ifdef USE_AS_STRNCMP
   1647 	cmpl	$16, %ebp
   1648 	lea	-16(%ebp), %ebp
   1649 	jbe	L(more8byteseq)
   1650 #endif
   1651 	add	$16, %ecx
   1652 	movdqa	%xmm4, %xmm3
   1653 
   1654 	add	$16, %edi
   1655 	jg	L(nibble_ashr_13)
   1656 
   1657 	movdqa	(%eax, %ecx), %xmm1
   1658 	movdqa	(%edx, %ecx), %xmm2
   1659 	movdqa	%xmm2, %xmm4
   1660 
   1661 	palignr	$13, %xmm3, %xmm2
   1662 
   1663 	pcmpeqb	%xmm1, %xmm0
   1664 	pcmpeqb	%xmm2, %xmm1
   1665 	psubb	%xmm0, %xmm1
   1666 	pmovmskb %xmm1, %esi
   1667 	sub	$0xffff, %esi
   1668 	jnz	L(exit)
   1669 
   1670 #ifdef USE_AS_STRNCMP
   1671 	cmpl	$16, %ebp
   1672 	lea	-16(%ebp), %ebp
   1673 	jbe	L(more8byteseq)
   1674 #endif
   1675 	add	$16, %ecx
   1676 	movdqa	%xmm4, %xmm3
   1677 	jmp	L(loop_ashr_13)
   1678 
   1679 	.p2align 4
   1680 L(nibble_ashr_13):
   1681 	pcmpeqb	%xmm3, %xmm0
   1682 	pmovmskb %xmm0, %esi
   1683 	test	$0xe000, %esi
   1684 	jnz	L(ashr_13_exittail)
   1685 
   1686 #ifdef USE_AS_STRNCMP
   1687 	cmpl	$3, %ebp
   1688 	jbe	L(ashr_13_exittail)
   1689 #endif
   1690 	pxor	%xmm0, %xmm0
   1691 	sub	$0x1000, %edi
   1692 	jmp	L(gobble_ashr_13)
   1693 
   1694 	.p2align 4
   1695 L(ashr_13_exittail):
   1696 	movdqa	(%eax, %ecx), %xmm1
   1697 	psrldq	$13, %xmm0
   1698 	psrldq	$13, %xmm3
   1699 	jmp	L(aftertail)
   1700 
   1701 /*
   1702  * The following cases will be handled by ashr_14
   1703  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1704  *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14
   1705  */
   1706 	.p2align 4
   1707 L(ashr_14):
   1708 	mov	$0xffff, %esi
   1709 	pxor	%xmm0, %xmm0
   1710 	movdqa	(%edx), %xmm2
   1711 	movdqa	(%eax), %xmm1
   1712 	pcmpeqb	%xmm1, %xmm0
   1713 	pslldq	$2, %xmm2
   1714 	pcmpeqb	%xmm1, %xmm2
   1715 	psubb	%xmm0, %xmm2
   1716 	pmovmskb %xmm2, %edi
   1717 	shr	%cl, %esi
   1718 	shr	%cl, %edi
   1719 	sub	%edi, %esi
   1720 	lea	-2(%ecx), %edi
   1721 	jnz	L(less32bytes)
   1722 
   1723 	UPDATE_STRNCMP_COUNTER
   1724 
   1725 	movdqa	(%edx), %xmm3
   1726 	pxor	%xmm0, %xmm0
   1727 	mov	$16, %ecx
   1728 	or	$14, %ebx
   1729 	lea	14(%edx), %edi
   1730 	and	$0xfff, %edi
   1731 	sub	$0x1000, %edi
   1732 
   1733 	.p2align 4
   1734 L(loop_ashr_14):
   1735 	add	$16, %edi
   1736 	jg	L(nibble_ashr_14)
   1737 
   1738 L(gobble_ashr_14):
   1739 	movdqa	(%eax, %ecx), %xmm1
   1740 	movdqa	(%edx, %ecx), %xmm2
   1741 	movdqa	%xmm2, %xmm4
   1742 
   1743 	palignr	$14, %xmm3, %xmm2
   1744 
   1745 	pcmpeqb	%xmm1, %xmm0
   1746 	pcmpeqb	%xmm2, %xmm1
   1747 	psubb	%xmm0, %xmm1
   1748 	pmovmskb %xmm1, %esi
   1749 	sub	$0xffff, %esi
   1750 	jnz	L(exit)
   1751 
   1752 #ifdef USE_AS_STRNCMP
   1753 	cmpl	$16, %ebp
   1754 	lea	-16(%ebp), %ebp
   1755 	jbe	L(more8byteseq)
   1756 #endif
   1757 	add	$16, %ecx
   1758 	movdqa	%xmm4, %xmm3
   1759 
   1760 	add	$16, %edi
   1761 	jg	L(nibble_ashr_14)
   1762 
   1763 	movdqa	(%eax, %ecx), %xmm1
   1764 	movdqa	(%edx, %ecx), %xmm2
   1765 	movdqa	%xmm2, %xmm4
   1766 
   1767 	palignr	$14, %xmm3, %xmm2
   1768 
   1769 	pcmpeqb	%xmm1, %xmm0
   1770 	pcmpeqb	%xmm2, %xmm1
   1771 	psubb	%xmm0, %xmm1
   1772 	pmovmskb %xmm1, %esi
   1773 	sub	$0xffff, %esi
   1774 	jnz	L(exit)
   1775 
   1776 #ifdef USE_AS_STRNCMP
   1777 	cmpl	$16, %ebp
   1778 	lea	-16(%ebp), %ebp
   1779 	jbe	L(more8byteseq)
   1780 #endif
   1781 	add	$16, %ecx
   1782 	movdqa	%xmm4, %xmm3
   1783 	jmp	L(loop_ashr_14)
   1784 
   1785 	.p2align 4
   1786 L(nibble_ashr_14):
   1787 	pcmpeqb	%xmm3, %xmm0
   1788 	pmovmskb %xmm0, %esi
   1789 	test	$0xc000, %esi
   1790 	jnz	L(ashr_14_exittail)
   1791 
   1792 #ifdef USE_AS_STRNCMP
   1793 	cmpl	$2, %ebp
   1794 	jbe	L(ashr_14_exittail)
   1795 #endif
   1796 	pxor	%xmm0, %xmm0
   1797 	sub	$0x1000, %edi
   1798 	jmp	L(gobble_ashr_14)
   1799 
   1800 	.p2align 4
   1801 L(ashr_14_exittail):
   1802 	movdqa	(%eax, %ecx), %xmm1
   1803 	psrldq	$14, %xmm0
   1804 	psrldq	$14, %xmm3
   1805 	jmp	L(aftertail)
   1806 
   1807 /*
   1808  * The following cases will be handled by ashr_14
   1809  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1810  *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15
   1811  */
   1812 
   1813 	.p2align 4
   1814 L(ashr_15):
   1815 	mov	$0xffff, %esi
   1816 	pxor	%xmm0, %xmm0
   1817 	movdqa	(%edx), %xmm2
   1818 	movdqa	(%eax), %xmm1
   1819 	pcmpeqb	%xmm1, %xmm0
   1820 	pslldq	$1, %xmm2
   1821 	pcmpeqb	%xmm1, %xmm2
   1822 	psubb	%xmm0, %xmm2
   1823 	pmovmskb %xmm2, %edi
   1824 	shr	%cl, %esi
   1825 	shr	%cl, %edi
   1826 	sub	%edi, %esi
   1827 	lea	-1(%ecx), %edi
   1828 	jnz	L(less32bytes)
   1829 
   1830 	UPDATE_STRNCMP_COUNTER
   1831 
   1832 	movdqa	(%edx), %xmm3
   1833 	pxor	%xmm0, %xmm0
   1834 	mov	$16, %ecx
   1835 	or	$15, %ebx
   1836 	lea	15(%edx), %edi
   1837 	and	$0xfff, %edi
   1838 	sub	$0x1000, %edi
   1839 
   1840 	.p2align 4
   1841 L(loop_ashr_15):
   1842 	add	$16, %edi
   1843 	jg	L(nibble_ashr_15)
   1844 
   1845 L(gobble_ashr_15):
   1846 	movdqa	(%eax, %ecx), %xmm1
   1847 	movdqa	(%edx, %ecx), %xmm2
   1848 	movdqa	%xmm2, %xmm4
   1849 
   1850 	palignr	$15, %xmm3, %xmm2
   1851 
   1852 	pcmpeqb	%xmm1, %xmm0
   1853 	pcmpeqb	%xmm2, %xmm1
   1854 	psubb	%xmm0, %xmm1
   1855 	pmovmskb %xmm1, %esi
   1856 	sub	$0xffff, %esi
   1857 	jnz	L(exit)
   1858 
   1859 #ifdef USE_AS_STRNCMP
   1860 	cmpl	$16, %ebp
   1861 	lea	-16(%ebp), %ebp
   1862 	jbe	L(more8byteseq)
   1863 #endif
   1864 	add	$16, %ecx
   1865 	movdqa	%xmm4, %xmm3
   1866 
   1867 	add	$16, %edi
   1868 	jg	L(nibble_ashr_15)
   1869 
   1870 	movdqa	(%eax, %ecx), %xmm1
   1871 	movdqa	(%edx, %ecx), %xmm2
   1872 	movdqa	%xmm2, %xmm4
   1873 
   1874 	palignr	$15, %xmm3, %xmm2
   1875 
   1876 	pcmpeqb	%xmm1, %xmm0
   1877 	pcmpeqb	%xmm2, %xmm1
   1878 	psubb	%xmm0, %xmm1
   1879 	pmovmskb %xmm1, %esi
   1880 	sub	$0xffff, %esi
   1881 	jnz	L(exit)
   1882 
   1883 #ifdef USE_AS_STRNCMP
   1884 	cmpl	$16, %ebp
   1885 	lea	-16(%ebp), %ebp
   1886 	jbe	L(more8byteseq)
   1887 #endif
   1888 	add	$16, %ecx
   1889 	movdqa	%xmm4, %xmm3
   1890 	jmp	L(loop_ashr_15)
   1891 
   1892 	.p2align 4
   1893 L(nibble_ashr_15):
   1894 	pcmpeqb	%xmm3, %xmm0
   1895 	pmovmskb %xmm0, %esi
   1896 	test	$0x8000, %esi
   1897 	jnz	L(ashr_15_exittail)
   1898 
   1899 #ifdef USE_AS_STRNCMP
   1900 	cmpl	$1, %ebp
   1901 	jbe	L(ashr_15_exittail)
   1902 #endif
   1903 	pxor	%xmm0, %xmm0
   1904 	sub	$0x1000, %edi
   1905 	jmp	L(gobble_ashr_15)
   1906 
   1907 	.p2align 4
   1908 L(ashr_15_exittail):
   1909 	movdqa	(%eax, %ecx), %xmm1
   1910 	psrldq	$15, %xmm0
   1911 	psrldq	$15, %xmm3
   1912 	jmp	L(aftertail)
   1913 
   1914 	.p2align 4
   1915 L(aftertail):
   1916 	pcmpeqb	%xmm3, %xmm1
   1917 	psubb	%xmm0, %xmm1
   1918 	pmovmskb %xmm1, %esi
   1919 	not	%esi
   1920 L(exit):
   1921 	mov	%ebx, %edi
   1922 	and	$0x1f, %edi
   1923 	lea	-16(%edi, %ecx), %edi
   1924 L(less32bytes):
   1925 	add	%edi, %edx
   1926 	add	%ecx, %eax
   1927 	test	$0x20, %ebx
   1928 	jz	L(ret2)
   1929 	xchg	%eax, %edx
   1930 
   1931 	.p2align 4
   1932 L(ret2):
   1933 	mov	%esi, %ecx
   1934 	POP	(%esi)
   1935 	POP	(%edi)
   1936 	POP	(%ebx)
   1937 L(less16bytes):
   1938 	test	%cl, %cl
   1939 	jz	L(2next_8_bytes)
   1940 
   1941 	test	$0x01, %cl
   1942 	jnz	L(Byte0)
   1943 
   1944 	test	$0x02, %cl
   1945 	jnz	L(Byte1)
   1946 
   1947 	test	$0x04, %cl
   1948 	jnz	L(Byte2)
   1949 
   1950 	test	$0x08, %cl
   1951 	jnz	L(Byte3)
   1952 
   1953 	test	$0x10, %cl
   1954 	jnz	L(Byte4)
   1955 
   1956 	test	$0x20, %cl
   1957 	jnz	L(Byte5)
   1958 
   1959 	test	$0x40, %cl
   1960 	jnz	L(Byte6)
   1961 #ifdef USE_AS_STRNCMP
   1962 	cmpl	$7, %ebp
   1963 	jbe	L(eq)
   1964 #endif
   1965 
   1966 	movzbl	7(%eax), %ecx
   1967 	movzbl	7(%edx), %eax
   1968 
   1969 	sub	%ecx, %eax
   1970 	RETURN
   1971 
   1972 	.p2align 4
   1973 L(Byte0):
   1974 #ifdef USE_AS_STRNCMP
   1975 	cmpl	$0, %ebp
   1976 	jbe	L(eq)
   1977 #endif
   1978 	movzbl	(%eax), %ecx
   1979 	movzbl	(%edx), %eax
   1980 
   1981 	sub	%ecx, %eax
   1982 	RETURN
   1983 
   1984 	.p2align 4
   1985 L(Byte1):
   1986 #ifdef USE_AS_STRNCMP
   1987 	cmpl	$1, %ebp
   1988 	jbe	L(eq)
   1989 #endif
   1990 	movzbl	1(%eax), %ecx
   1991 	movzbl	1(%edx), %eax
   1992 
   1993 	sub	%ecx, %eax
   1994 	RETURN
   1995 
   1996 	.p2align 4
   1997 L(Byte2):
   1998 #ifdef USE_AS_STRNCMP
   1999 	cmpl	$2, %ebp
   2000 	jbe	L(eq)
   2001 #endif
   2002 	movzbl	2(%eax), %ecx
   2003 	movzbl	2(%edx), %eax
   2004 
   2005 	sub	%ecx, %eax
   2006 	RETURN
   2007 
   2008 	.p2align 4
   2009 L(Byte3):
   2010 #ifdef USE_AS_STRNCMP
   2011 	cmpl	$3, %ebp
   2012 	jbe	L(eq)
   2013 #endif
   2014 	movzbl	3(%eax), %ecx
   2015 	movzbl	3(%edx), %eax
   2016 
   2017 	sub	%ecx, %eax
   2018 	RETURN
   2019 
   2020 	.p2align 4
   2021 L(Byte4):
   2022 #ifdef USE_AS_STRNCMP
   2023 	cmpl	$4, %ebp
   2024 	jbe	L(eq)
   2025 #endif
   2026 	movzbl	4(%eax), %ecx
   2027 	movzbl	4(%edx), %eax
   2028 
   2029 	sub	%ecx, %eax
   2030 	RETURN
   2031 
   2032 	.p2align 4
   2033 L(Byte5):
   2034 #ifdef USE_AS_STRNCMP
   2035 	cmpl	$5, %ebp
   2036 	jbe	L(eq)
   2037 #endif
   2038 	movzbl	5(%eax), %ecx
   2039 	movzbl	5(%edx), %eax
   2040 
   2041 	sub	%ecx, %eax
   2042 	RETURN
   2043 
   2044 	.p2align 4
   2045 L(Byte6):
   2046 #ifdef USE_AS_STRNCMP
   2047 	cmpl	$6, %ebp
   2048 	jbe	L(eq)
   2049 #endif
   2050 	movzbl	6(%eax), %ecx
   2051 	movzbl	6(%edx), %eax
   2052 
   2053 	sub	%ecx, %eax
   2054 	RETURN
   2055 
   2056 	.p2align 4
   2057 L(2next_8_bytes):
   2058 	add	$8, %eax
   2059 	add	$8, %edx
   2060 #ifdef USE_AS_STRNCMP
   2061 	cmpl	$8, %ebp
   2062 	lea	-8(%ebp), %ebp
   2063 	jbe	L(eq)
   2064 #endif
   2065 
   2066 	test	$0x01, %ch
   2067 	jnz	L(Byte0)
   2068 
   2069 	test	$0x02, %ch
   2070 	jnz	L(Byte1)
   2071 
   2072 	test	$0x04, %ch
   2073 	jnz	L(Byte2)
   2074 
   2075 	test	$0x08, %ch
   2076 	jnz	L(Byte3)
   2077 
   2078 	test	$0x10, %ch
   2079 	jnz	L(Byte4)
   2080 
   2081 	test	$0x20, %ch
   2082 	jnz	L(Byte5)
   2083 
   2084 	test	$0x40, %ch
   2085 	jnz	L(Byte6)
   2086 
   2087 #ifdef USE_AS_STRNCMP
   2088 	cmpl	$7, %ebp
   2089 	jbe	L(eq)
   2090 #endif
   2091 	movzbl	7(%eax), %ecx
   2092 	movzbl	7(%edx), %eax
   2093 
   2094 	sub	%ecx, %eax
   2095 	RETURN
   2096 
   2097 	.p2align 4
   2098 L(neq):
   2099 	mov	$1, %eax
   2100 	ja	L(neq_bigger)
   2101 	neg	%eax
   2102 L(neq_bigger):
   2103 	RETURN
   2104 
   2105 #ifdef USE_AS_STRNCMP
   2106 	cfi_restore_state
   2107 	.p2align 4
   2108 L(more8byteseq):
   2109 	POP	(%esi)
   2110 	POP	(%edi)
   2111 	POP	(%ebx)
   2112 #endif
   2113 
   2114 L(eq):
   2115 
   2116 #ifdef USE_AS_STRNCMP
   2117 	POP	(%ebp)
   2118 #endif
   2119 	xorl	%eax, %eax
   2120 	ret
   2121 
   2122 #ifdef USE_AS_STRNCMP
   2123 	CFI_PUSH (%ebp)
   2124 
   2125 	.p2align 4
   2126 L(less16bytes_sncmp):
   2127 	test	%ebp, %ebp
   2128 	jz	L(eq)
   2129 
   2130 	movzbl	(%eax), %ecx
   2131 	cmpb	%cl, (%edx)
   2132 	jne	L(neq)
   2133 	test	%cl, %cl
   2134 	je	L(eq)
   2135 
   2136 	cmpl	$1, %ebp
   2137 	je	L(eq)
   2138 
   2139 	movzbl	1(%eax), %ecx
   2140 	cmpb	%cl, 1(%edx)
   2141 	jne	L(neq)
   2142 	test	%cl, %cl
   2143 	je	L(eq)
   2144 
   2145 	cmpl	$2, %ebp
   2146 	je	L(eq)
   2147 
   2148 	movzbl	2(%eax), %ecx
   2149 	cmpb	%cl, 2(%edx)
   2150 	jne	L(neq)
   2151 	test	%cl, %cl
   2152 	je	L(eq)
   2153 
   2154 	cmpl	$3, %ebp
   2155 	je	L(eq)
   2156 
   2157 	movzbl	3(%eax), %ecx
   2158 	cmpb	%cl, 3(%edx)
   2159 	jne	L(neq)
   2160 	test	%cl, %cl
   2161 	je	L(eq)
   2162 
   2163 	cmpl	$4, %ebp
   2164 	je	L(eq)
   2165 
   2166 	movzbl	4(%eax), %ecx
   2167 	cmpb	%cl, 4(%edx)
   2168 	jne	L(neq)
   2169 	test	%cl, %cl
   2170 	je	L(eq)
   2171 
   2172 	cmpl	$5, %ebp
   2173 	je	L(eq)
   2174 
   2175 	movzbl	5(%eax), %ecx
   2176 	cmpb	%cl, 5(%edx)
   2177 	jne	L(neq)
   2178 	test	%cl, %cl
   2179 	je	L(eq)
   2180 
   2181 	cmpl	$6, %ebp
   2182 	je	L(eq)
   2183 
   2184 	movzbl	6(%eax), %ecx
   2185 	cmpb	%cl, 6(%edx)
   2186 	jne	L(neq)
   2187 	test	%cl, %cl
   2188 	je	L(eq)
   2189 
   2190 	cmpl	$7, %ebp
   2191 	je	L(eq)
   2192 
   2193 	movzbl	7(%eax), %ecx
   2194 	cmpb	%cl, 7(%edx)
   2195 	jne	L(neq)
   2196 	test	%cl, %cl
   2197 	je	L(eq)
   2198 
   2199 
   2200 	cmpl	$8, %ebp
   2201 	je	L(eq)
   2202 
   2203 	movzbl	8(%eax), %ecx
   2204 	cmpb	%cl, 8(%edx)
   2205 	jne	L(neq)
   2206 	test	%cl, %cl
   2207 	je	L(eq)
   2208 
   2209 	cmpl	$9, %ebp
   2210 	je	L(eq)
   2211 
   2212 	movzbl	9(%eax), %ecx
   2213 	cmpb	%cl, 9(%edx)
   2214 	jne	L(neq)
   2215 	test	%cl, %cl
   2216 	je	L(eq)
   2217 
   2218 	cmpl	$10, %ebp
   2219 	je	L(eq)
   2220 
   2221 	movzbl	10(%eax), %ecx
   2222 	cmpb	%cl, 10(%edx)
   2223 	jne	L(neq)
   2224 	test	%cl, %cl
   2225 	je	L(eq)
   2226 
   2227 	cmpl	$11, %ebp
   2228 	je	L(eq)
   2229 
   2230 	movzbl	11(%eax), %ecx
   2231 	cmpb	%cl, 11(%edx)
   2232 	jne	L(neq)
   2233 	test	%cl, %cl
   2234 	je	L(eq)
   2235 
   2236 
   2237 	cmpl	$12, %ebp
   2238 	je	L(eq)
   2239 
   2240 	movzbl	12(%eax), %ecx
   2241 	cmpb	%cl, 12(%edx)
   2242 	jne	L(neq)
   2243 	test	%cl, %cl
   2244 	je	L(eq)
   2245 
   2246 	cmpl	$13, %ebp
   2247 	je	L(eq)
   2248 
   2249 	movzbl	13(%eax), %ecx
   2250 	cmpb	%cl, 13(%edx)
   2251 	jne	L(neq)
   2252 	test	%cl, %cl
   2253 	je	L(eq)
   2254 
   2255 	cmpl	$14, %ebp
   2256 	je	L(eq)
   2257 
   2258 	movzbl	14(%eax), %ecx
   2259 	cmpb	%cl, 14(%edx)
   2260 	jne	L(neq)
   2261 	test	%cl, %cl
   2262 	je	L(eq)
   2263 
   2264 	cmpl	$15, %ebp
   2265 	je	L(eq)
   2266 
   2267 	movzbl	15(%eax), %ecx
   2268 	cmpb	%cl, 15(%edx)
   2269 	jne	L(neq)
   2270 	test	%cl, %cl
   2271 	je	L(eq)
   2272 
   2273 	POP	(%ebp)
   2274 	xor	%eax, %eax
   2275 	ret
   2276 #endif
   2277 
   2278 END (STRCMP)
   2279