Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2010, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc			.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc			.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)		.cfi_restore reg
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef cfi_remember_state
     56 # define cfi_remember_state		.cfi_remember_state
     57 #endif
     58 
     59 #ifndef cfi_restore_state
     60 # define cfi_restore_state		.cfi_restore_state
     61 #endif
     62 
     63 #ifndef ENTRY
     64 # define ENTRY(name)			\
     65 	.type name,  @function; 	\
     66 	.globl name;			\
     67 	.p2align 4;			\
     68 name:					\
     69 	cfi_startproc
     70 #endif
     71 
     72 #ifndef END
     73 # define END(name)			\
     74 	cfi_endproc;			\
     75 	.size name, .-name
     76 #endif
     77 
     78 #define CFI_PUSH(REG)						\
     79   cfi_adjust_cfa_offset (4);					\
     80   cfi_rel_offset (REG, 0)
     81 
     82 #define CFI_POP(REG)						\
     83   cfi_adjust_cfa_offset (-4);					\
     84   cfi_restore (REG)
     85 
     86 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     87 #define POP(REG)	popl REG; CFI_POP (REG)
     88 
     89 #ifndef USE_AS_STRNCMP
     90 # define STR1		4
     91 # define STR2		STR1+4
     92 # define RETURN		ret
     93 
     94 # define UPDATE_STRNCMP_COUNTER
     95 #else
     96 # define STR1		8
     97 # define STR2		STR1+4
     98 # define CNT		STR2+4
     99 # define RETURN		POP (%ebp); ret; CFI_PUSH (%ebp)
    100 
    101 # define UPDATE_STRNCMP_COUNTER				\
    102 	/* calculate left number to compare */		\
    103 	mov	$16, %esi;				\
    104 	sub	%ecx, %esi;				\
    105 	cmpl	%esi, %ebp;				\
    106 	jbe	L(more8byteseq);			\
    107 	sub	%esi, %ebp
    108 #endif
    109 
    110 #ifndef STRCMP
    111 # define STRCMP strcmp
    112 #endif
    113 
    114 	.section .text.ssse3,"ax",@progbits
    115 ENTRY (STRCMP)
    116 #ifdef USE_AS_STRNCMP
    117 	PUSH	(%ebp)
    118 	cfi_remember_state
    119 #endif
    120 	movl	STR1(%esp), %edx
    121 	movl	STR2(%esp), %eax
    122 #ifdef USE_AS_STRNCMP
    123 	movl	CNT(%esp), %ebp
    124 	cmpl	$16, %ebp
    125 	jb	L(less16bytes_sncmp)
    126 	jmp	L(more16bytes)
    127 #endif
    128 
    129 	movzbl	(%eax), %ecx
    130 	cmpb	%cl, (%edx)
    131 	jne	L(neq)
    132 	cmpl	$0, %ecx
    133 	je	L(eq)
    134 
    135 	movzbl	1(%eax), %ecx
    136 	cmpb	%cl, 1(%edx)
    137 	jne	L(neq)
    138 	cmpl	$0, %ecx
    139 	je	L(eq)
    140 
    141 	movzbl	2(%eax), %ecx
    142 	cmpb	%cl, 2(%edx)
    143 	jne	L(neq)
    144 	cmpl	$0, %ecx
    145 	je	L(eq)
    146 
    147 	movzbl	3(%eax), %ecx
    148 	cmpb	%cl, 3(%edx)
    149 	jne	L(neq)
    150 	cmpl	$0, %ecx
    151 	je	L(eq)
    152 
    153 	movzbl	4(%eax), %ecx
    154 	cmpb	%cl, 4(%edx)
    155 	jne	L(neq)
    156 	cmpl	$0, %ecx
    157 	je	L(eq)
    158 
    159 	movzbl	5(%eax), %ecx
    160 	cmpb	%cl, 5(%edx)
    161 	jne	L(neq)
    162 	cmpl	$0, %ecx
    163 	je	L(eq)
    164 
    165 	movzbl	6(%eax), %ecx
    166 	cmpb	%cl, 6(%edx)
    167 	jne	L(neq)
    168 	cmpl	$0, %ecx
    169 	je	L(eq)
    170 
    171 	movzbl	7(%eax), %ecx
    172 	cmpb	%cl, 7(%edx)
    173 	jne	L(neq)
    174 	cmpl	$0, %ecx
    175 	je	L(eq)
    176 
    177 	add	$8, %edx
    178 	add	$8, %eax
    179 #ifdef USE_AS_STRNCMP
    180 	cmpl	$8, %ebp
    181 	lea	-8(%ebp), %ebp
    182 	je	L(eq)
    183 L(more16bytes):
    184 #endif
    185 	movl	%edx, %ecx
    186 	and	$0xfff, %ecx
    187 	cmpl	$0xff0, %ecx
    188 	ja	L(crosspage)
    189 	mov	%eax, %ecx
    190 	and	$0xfff, %ecx
    191 	cmpl	$0xff0, %ecx
    192 	ja	L(crosspage)
    193 	pxor	%xmm0, %xmm0
    194 	movlpd	(%eax), %xmm1
    195 	movlpd	(%edx), %xmm2
    196 	movhpd	8(%eax), %xmm1
    197 	movhpd	8(%edx), %xmm2
    198 	pcmpeqb	%xmm1, %xmm0
    199 	pcmpeqb	%xmm2, %xmm1
    200 	psubb	%xmm0, %xmm1
    201 	pmovmskb %xmm1, %ecx
    202 	sub	$0xffff, %ecx
    203 	jnz	L(less16bytes)
    204 #ifdef USE_AS_STRNCMP
    205 	cmpl	$16, %ebp
    206 	lea	-16(%ebp), %ebp
    207 	jbe	L(eq)
    208 #endif
    209 	add	$16, %eax
    210 	add	$16, %edx
    211 
    212 L(crosspage):
    213 
    214 	PUSH	(%ebx)
    215 	PUSH	(%edi)
    216 	PUSH	(%esi)
    217 
    218 	movl	%edx, %edi
    219 	movl	%eax, %ecx
    220 	and	$0xf, %ecx
    221 	and	$0xf, %edi
    222 	xor	%ecx, %eax
    223 	xor	%edi, %edx
    224 	xor	%ebx, %ebx
    225 	cmpl	%edi, %ecx
    226 	je	L(ashr_0)
    227 	ja	L(bigger)
    228 	or	$0x20, %ebx
    229 	xchg	%edx, %eax
    230 	xchg	%ecx, %edi
    231 L(bigger):
    232 	lea	15(%edi), %edi
    233 	sub	%ecx, %edi
    234 	cmpl	$8, %edi
    235 	jle	L(ashr_less_8)
    236 	cmpl	$14, %edi
    237 	je	L(ashr_15)
    238 	cmpl	$13, %edi
    239 	je	L(ashr_14)
    240 	cmpl	$12, %edi
    241 	je	L(ashr_13)
    242 	cmpl	$11, %edi
    243 	je	L(ashr_12)
    244 	cmpl	$10, %edi
    245 	je	L(ashr_11)
    246 	cmpl	$9, %edi
    247 	je	L(ashr_10)
    248 L(ashr_less_8):
    249 	je	L(ashr_9)
    250 	cmpl	$7, %edi
    251 	je	L(ashr_8)
    252 	cmpl	$6, %edi
    253 	je	L(ashr_7)
    254 	cmpl	$5, %edi
    255 	je	L(ashr_6)
    256 	cmpl	$4, %edi
    257 	je	L(ashr_5)
    258 	cmpl	$3, %edi
    259 	je	L(ashr_4)
    260 	cmpl	$2, %edi
    261 	je	L(ashr_3)
    262 	cmpl	$1, %edi
    263 	je	L(ashr_2)
    264 	cmpl	$0, %edi
    265 	je	L(ashr_1)
    266 
    267 /*
    268  * The following cases will be handled by ashr_0
    269  *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case
    270  *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
    271  */
    272 	.p2align 4
    273 L(ashr_0):
    274 	mov	$0xffff, %esi
    275 	movdqa	(%eax), %xmm1
    276 	pxor	%xmm0, %xmm0
    277 	pcmpeqb	%xmm1, %xmm0
    278 	pcmpeqb	(%edx), %xmm1
    279 	psubb	%xmm0, %xmm1
    280 	pmovmskb %xmm1, %edi
    281 	shr	%cl, %esi
    282 	shr	%cl, %edi
    283 	sub	%edi, %esi
    284 	mov	%ecx, %edi
    285 	jne	L(less32bytes)
    286 	UPDATE_STRNCMP_COUNTER
    287 	mov	$0x10, %ebx
    288 	mov	$0x10, %ecx
    289 	pxor	%xmm0, %xmm0
    290 	.p2align 4
    291 L(loop_ashr_0):
    292 	movdqa	(%eax, %ecx), %xmm1
    293 	movdqa	(%edx, %ecx), %xmm2
    294 
    295 	pcmpeqb	%xmm1, %xmm0
    296 	pcmpeqb	%xmm2, %xmm1
    297 	psubb	%xmm0, %xmm1
    298 	pmovmskb %xmm1, %esi
    299 	sub	$0xffff, %esi
    300 	jnz	L(exit)
    301 #ifdef USE_AS_STRNCMP
    302 	cmpl	$16, %ebp
    303 	lea	-16(%ebp), %ebp
    304 	jbe	L(more8byteseq)
    305 #endif
    306 	add	$16, %ecx
    307 	jmp	L(loop_ashr_0)
    308 
    309 /*
    310  * The following cases will be handled by ashr_1
    311  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    312  *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
    313  */
    314 	.p2align 4
    315 L(ashr_1):
    316 	mov	$0xffff, %esi
    317 	pxor	%xmm0, %xmm0
    318 	movdqa	(%edx), %xmm2
    319 	movdqa	(%eax), %xmm1
    320 	pcmpeqb	%xmm1, %xmm0
    321 	pslldq	$15, %xmm2
    322 	pcmpeqb	%xmm1, %xmm2
    323 	psubb	%xmm0, %xmm2
    324 	pmovmskb %xmm2, %edi
    325 	shr	%cl, %esi
    326 	shr	%cl, %edi
    327 	sub	%edi, %esi
    328 	lea	-15(%ecx), %edi
    329 	jnz	L(less32bytes)
    330 
    331 	UPDATE_STRNCMP_COUNTER
    332 
    333 	movdqa	(%edx), %xmm3
    334 	pxor	%xmm0, %xmm0
    335 	mov	$16, %ecx
    336 	or	$1, %ebx
    337 	lea	1(%edx), %edi
    338 	and	$0xfff, %edi
    339 	sub	$0x1000, %edi
    340 
    341 	.p2align 4
    342 L(loop_ashr_1):
    343 	add	$16, %edi
    344 	jg	L(nibble_ashr_1)
    345 
    346 L(gobble_ashr_1):
    347 	movdqa	(%eax, %ecx), %xmm1
    348 	movdqa	(%edx, %ecx), %xmm2
    349 	movdqa	%xmm2, %xmm4
    350 
    351 	palignr	$1, %xmm3, %xmm2
    352 
    353 	pcmpeqb	%xmm1, %xmm0
    354 	pcmpeqb	%xmm2, %xmm1
    355 	psubb	%xmm0, %xmm1
    356 	pmovmskb %xmm1, %esi
    357 	sub	$0xffff, %esi
    358 	jnz	L(exit)
    359 #ifdef USE_AS_STRNCMP
    360 	cmpl	$16, %ebp
    361 	lea	-16(%ebp), %ebp
    362 	jbe	L(more8byteseq)
    363 #endif
    364 
    365 	add	$16, %ecx
    366 	movdqa	%xmm4, %xmm3
    367 
    368 	add	$16, %edi
    369 	jg	L(nibble_ashr_1)
    370 
    371 	movdqa	(%eax, %ecx), %xmm1
    372 	movdqa	(%edx, %ecx), %xmm2
    373 	movdqa	%xmm2, %xmm4
    374 
    375 	palignr	$1, %xmm3, %xmm2
    376 
    377 	pcmpeqb	%xmm1, %xmm0
    378 	pcmpeqb	%xmm2, %xmm1
    379 	psubb	%xmm0, %xmm1
    380 	pmovmskb %xmm1, %esi
    381 	sub	$0xffff, %esi
    382 	jnz	L(exit)
    383 
    384 #ifdef USE_AS_STRNCMP
    385 	cmpl	$16, %ebp
    386 	lea	-16(%ebp), %ebp
    387 	jbe	L(more8byteseq)
    388 #endif
    389 	add	$16, %ecx
    390 	movdqa	%xmm4, %xmm3
    391 	jmp	L(loop_ashr_1)
    392 
    393 	.p2align 4
    394 L(nibble_ashr_1):
    395 	pcmpeqb	%xmm3, %xmm0
    396 	pmovmskb %xmm0, %esi
    397 	test	$0xfffe, %esi
    398 	jnz	L(ashr_1_exittail)
    399 
    400 #ifdef USE_AS_STRNCMP
    401 	cmpl	$15, %ebp
    402 	jbe	L(ashr_1_exittail)
    403 #endif
    404 	pxor	%xmm0, %xmm0
    405 	sub	$0x1000, %edi
    406 	jmp	L(gobble_ashr_1)
    407 
    408 	.p2align 4
    409 L(ashr_1_exittail):
    410 	movdqa	(%eax, %ecx), %xmm1
    411 	psrldq	$1, %xmm0
    412 	psrldq	$1, %xmm3
    413 	jmp	L(aftertail)
    414 
    415 /*
    416  * The following cases will be handled by ashr_2
    417  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    418  *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2
    419  */
    420 	.p2align 4
    421 L(ashr_2):
    422 	mov	$0xffff, %esi
    423 	pxor	%xmm0, %xmm0
    424 	movdqa	(%edx), %xmm2
    425 	movdqa	(%eax), %xmm1
    426 	pcmpeqb	%xmm1, %xmm0
    427 	pslldq	$14, %xmm2
    428 	pcmpeqb	%xmm1, %xmm2
    429 	psubb	%xmm0, %xmm2
    430 	pmovmskb %xmm2, %edi
    431 	shr	%cl, %esi
    432 	shr	%cl, %edi
    433 	sub	%edi, %esi
    434 	lea	-14(%ecx), %edi
    435 	jnz	L(less32bytes)
    436 
    437 	UPDATE_STRNCMP_COUNTER
    438 
    439 	movdqa	(%edx), %xmm3
    440 	pxor	%xmm0, %xmm0
    441 	mov	$16, %ecx
    442 	or	$2, %ebx
    443 	lea	2(%edx), %edi
    444 	and	$0xfff, %edi
    445 	sub	$0x1000, %edi
    446 
    447 	.p2align 4
    448 L(loop_ashr_2):
    449 	add	$16, %edi
    450 	jg	L(nibble_ashr_2)
    451 
    452 L(gobble_ashr_2):
    453 	movdqa	(%eax, %ecx), %xmm1
    454 	movdqa	(%edx, %ecx), %xmm2
    455 	movdqa	%xmm2, %xmm4
    456 
    457 	palignr	$2, %xmm3, %xmm2
    458 
    459 	pcmpeqb	%xmm1, %xmm0
    460 	pcmpeqb	%xmm2, %xmm1
    461 	psubb	%xmm0, %xmm1
    462 	pmovmskb %xmm1, %esi
    463 	sub	$0xffff, %esi
    464 	jnz	L(exit)
    465 
    466 #ifdef USE_AS_STRNCMP
    467 	cmpl	$16, %ebp
    468 	lea	-16(%ebp), %ebp
    469 	jbe	L(more8byteseq)
    470 #endif
    471 	add	$16, %ecx
    472 	movdqa	%xmm4, %xmm3
    473 
    474 	add	$16, %edi
    475 	jg	L(nibble_ashr_2)
    476 
    477 	movdqa	(%eax, %ecx), %xmm1
    478 	movdqa	(%edx, %ecx), %xmm2
    479 	movdqa	%xmm2, %xmm4
    480 
    481 	palignr	$2, %xmm3, %xmm2
    482 
    483 	pcmpeqb	%xmm1, %xmm0
    484 	pcmpeqb	%xmm2, %xmm1
    485 	psubb	%xmm0, %xmm1
    486 	pmovmskb %xmm1, %esi
    487 	sub	$0xffff, %esi
    488 	jnz	L(exit)
    489 
    490 #ifdef USE_AS_STRNCMP
    491 	cmpl	$16, %ebp
    492 	lea	-16(%ebp), %ebp
    493 	jbe	L(more8byteseq)
    494 #endif
    495 	add	$16, %ecx
    496 	movdqa	%xmm4, %xmm3
    497 	jmp	L(loop_ashr_2)
    498 
    499 	.p2align 4
    500 L(nibble_ashr_2):
    501 	pcmpeqb	%xmm3, %xmm0
    502 	pmovmskb %xmm0, %esi
    503 	test	$0xfffc, %esi
    504 	jnz	L(ashr_2_exittail)
    505 
    506 #ifdef USE_AS_STRNCMP
    507 	cmpl	$14, %ebp
    508 	jbe	L(ashr_2_exittail)
    509 #endif
    510 
    511 	pxor	%xmm0, %xmm0
    512 	sub	$0x1000, %edi
    513 	jmp	L(gobble_ashr_2)
    514 
    515 	.p2align 4
    516 L(ashr_2_exittail):
    517 	movdqa	(%eax, %ecx), %xmm1
    518 	psrldq	$2, %xmm0
    519 	psrldq	$2, %xmm3
    520 	jmp	L(aftertail)
    521 
    522 /*
    523  * The following cases will be handled by ashr_3
    524  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    525  *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3
    526  */
    527 	.p2align 4
    528 L(ashr_3):
    529 	mov	$0xffff, %esi
    530 	pxor	%xmm0, %xmm0
    531 	movdqa	(%edx), %xmm2
    532 	movdqa	(%eax), %xmm1
    533 	pcmpeqb	%xmm1, %xmm0
    534 	pslldq	$13, %xmm2
    535 	pcmpeqb	%xmm1, %xmm2
    536 	psubb	%xmm0, %xmm2
    537 	pmovmskb %xmm2, %edi
    538 	shr	%cl, %esi
    539 	shr	%cl, %edi
    540 	sub	%edi, %esi
    541 	lea	-13(%ecx), %edi
    542 	jnz	L(less32bytes)
    543 
    544 	UPDATE_STRNCMP_COUNTER
    545 
    546 	movdqa	(%edx), %xmm3
    547 	pxor	%xmm0, %xmm0
    548 	mov	$16, %ecx
    549 	or	$3, %ebx
    550 	lea	3(%edx), %edi
    551 	and	$0xfff, %edi
    552 	sub	$0x1000, %edi
    553 
    554 	.p2align 4
    555 L(loop_ashr_3):
    556 	add	$16, %edi
    557 	jg	L(nibble_ashr_3)
    558 
    559 L(gobble_ashr_3):
    560 	movdqa	(%eax, %ecx), %xmm1
    561 	movdqa	(%edx, %ecx), %xmm2
    562 	movdqa	%xmm2, %xmm4
    563 
    564 	palignr	$3, %xmm3, %xmm2
    565 
    566 	pcmpeqb	%xmm1, %xmm0
    567 	pcmpeqb	%xmm2, %xmm1
    568 	psubb	%xmm0, %xmm1
    569 	pmovmskb %xmm1, %esi
    570 	sub	$0xffff, %esi
    571 	jnz	L(exit)
    572 
    573 #ifdef USE_AS_STRNCMP
    574 	cmpl	$16, %ebp
    575 	lea	-16(%ebp), %ebp
    576 	jbe	L(more8byteseq)
    577 #endif
    578 	add	$16, %ecx
    579 	movdqa	%xmm4, %xmm3
    580 
    581 	add	$16, %edi
    582 	jg	L(nibble_ashr_3)
    583 
    584 	movdqa	(%eax, %ecx), %xmm1
    585 	movdqa	(%edx, %ecx), %xmm2
    586 	movdqa	%xmm2, %xmm4
    587 
    588 	palignr	$3, %xmm3, %xmm2
    589 
    590 	pcmpeqb	%xmm1, %xmm0
    591 	pcmpeqb	%xmm2, %xmm1
    592 	psubb	%xmm0, %xmm1
    593 	pmovmskb %xmm1, %esi
    594 	sub	$0xffff, %esi
    595 	jnz	L(exit)
    596 
    597 #ifdef USE_AS_STRNCMP
    598 	cmpl	$16, %ebp
    599 	lea	-16(%ebp), %ebp
    600 	jbe	L(more8byteseq)
    601 #endif
    602 	add	$16, %ecx
    603 	movdqa	%xmm4, %xmm3
    604 	jmp	L(loop_ashr_3)
    605 
    606 	.p2align 4
    607 L(nibble_ashr_3):
    608 	pcmpeqb	%xmm3, %xmm0
    609 	pmovmskb %xmm0, %esi
    610 	test	$0xfff8, %esi
    611 	jnz	L(ashr_3_exittail)
    612 
    613 #ifdef USE_AS_STRNCMP
    614 	cmpl	$13, %ebp
    615 	jbe	L(ashr_3_exittail)
    616 #endif
    617 	pxor	%xmm0, %xmm0
    618 	sub	$0x1000, %edi
    619 	jmp	L(gobble_ashr_3)
    620 
    621 	.p2align 4
    622 L(ashr_3_exittail):
    623 	movdqa	(%eax, %ecx), %xmm1
    624 	psrldq	$3, %xmm0
    625 	psrldq	$3, %xmm3
    626 	jmp	L(aftertail)
    627 
    628 /*
    629  * The following cases will be handled by ashr_4
    630  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    631  *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4
    632  */
    633 	.p2align 4
    634 L(ashr_4):
    635 	mov	$0xffff, %esi
    636 	pxor	%xmm0, %xmm0
    637 	movdqa	(%edx), %xmm2
    638 	movdqa	(%eax), %xmm1
    639 	pcmpeqb	%xmm1, %xmm0
    640 	pslldq	$12, %xmm2
    641 	pcmpeqb	%xmm1, %xmm2
    642 	psubb	%xmm0, %xmm2
    643 	pmovmskb %xmm2, %edi
    644 	shr	%cl, %esi
    645 	shr	%cl, %edi
    646 	sub	%edi, %esi
    647 	lea	-12(%ecx), %edi
    648 	jnz	L(less32bytes)
    649 
    650 	UPDATE_STRNCMP_COUNTER
    651 
    652 	movdqa	(%edx), %xmm3
    653 	pxor	%xmm0, %xmm0
    654 	mov	$16, %ecx
    655 	or	$4, %ebx
    656 	lea	4(%edx), %edi
    657 	and	$0xfff, %edi
    658 	sub	$0x1000, %edi
    659 
    660 	.p2align 4
    661 L(loop_ashr_4):
    662 	add	$16, %edi
    663 	jg	L(nibble_ashr_4)
    664 
    665 L(gobble_ashr_4):
    666 	movdqa	(%eax, %ecx), %xmm1
    667 	movdqa	(%edx, %ecx), %xmm2
    668 	movdqa	%xmm2, %xmm4
    669 
    670 	palignr	$4, %xmm3, %xmm2
    671 
    672 	pcmpeqb	%xmm1, %xmm0
    673 	pcmpeqb	%xmm2, %xmm1
    674 	psubb	%xmm0, %xmm1
    675 	pmovmskb %xmm1, %esi
    676 	sub	$0xffff, %esi
    677 	jnz	L(exit)
    678 
    679 #ifdef USE_AS_STRNCMP
    680 	cmpl	$16, %ebp
    681 	lea	-16(%ebp), %ebp
    682 	jbe	L(more8byteseq)
    683 #endif
    684 
    685 	add	$16, %ecx
    686 	movdqa	%xmm4, %xmm3
    687 
    688 	add	$16, %edi
    689 	jg	L(nibble_ashr_4)
    690 
    691 	movdqa	(%eax, %ecx), %xmm1
    692 	movdqa	(%edx, %ecx), %xmm2
    693 	movdqa	%xmm2, %xmm4
    694 
    695 	palignr	$4, %xmm3, %xmm2
    696 
    697 	pcmpeqb	%xmm1, %xmm0
    698 	pcmpeqb	%xmm2, %xmm1
    699 	psubb	%xmm0, %xmm1
    700 	pmovmskb %xmm1, %esi
    701 	sub	$0xffff, %esi
    702 	jnz	L(exit)
    703 
    704 #ifdef USE_AS_STRNCMP
    705 	cmpl	$16, %ebp
    706 	lea	-16(%ebp), %ebp
    707 	jbe	L(more8byteseq)
    708 #endif
    709 
    710 	add	$16, %ecx
    711 	movdqa	%xmm4, %xmm3
    712 	jmp	L(loop_ashr_4)
    713 
    714 	.p2align 4
    715 L(nibble_ashr_4):
    716 	pcmpeqb	%xmm3, %xmm0
    717 	pmovmskb %xmm0, %esi
    718 	test	$0xfff0, %esi
    719 	jnz	L(ashr_4_exittail)
    720 
    721 #ifdef USE_AS_STRNCMP
    722 	cmpl	$12, %ebp
    723 	jbe	L(ashr_4_exittail)
    724 #endif
    725 
    726 	pxor	%xmm0, %xmm0
    727 	sub	$0x1000, %edi
    728 	jmp	L(gobble_ashr_4)
    729 
    730 	.p2align 4
    731 L(ashr_4_exittail):
    732 	movdqa	(%eax, %ecx), %xmm1
    733 	psrldq	$4, %xmm0
    734 	psrldq	$4, %xmm3
    735 	jmp	L(aftertail)
    736 
    737 /*
    738  * The following cases will be handled by ashr_5
    739  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    740  *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5
    741  */
    742 	.p2align 4
    743 L(ashr_5):
    744 	mov	$0xffff, %esi
    745 	pxor	%xmm0, %xmm0
    746 	movdqa	(%edx), %xmm2
    747 	movdqa	(%eax), %xmm1
    748 	pcmpeqb	%xmm1, %xmm0
    749 	pslldq	$11, %xmm2
    750 	pcmpeqb	%xmm1, %xmm2
    751 	psubb	%xmm0, %xmm2
    752 	pmovmskb %xmm2, %edi
    753 	shr	%cl, %esi
    754 	shr	%cl, %edi
    755 	sub	%edi, %esi
    756 	lea	-11(%ecx), %edi
    757 	jnz	L(less32bytes)
    758 
    759 	UPDATE_STRNCMP_COUNTER
    760 
    761 	movdqa	(%edx), %xmm3
    762 	pxor	%xmm0, %xmm0
    763 	mov	$16, %ecx
    764 	or	$5, %ebx
    765 	lea	5(%edx), %edi
    766 	and	$0xfff, %edi
    767 	sub	$0x1000, %edi
    768 
    769 	.p2align 4
    770 L(loop_ashr_5):
    771 	add	$16, %edi
    772 	jg	L(nibble_ashr_5)
    773 
    774 L(gobble_ashr_5):
    775 	movdqa	(%eax, %ecx), %xmm1
    776 	movdqa	(%edx, %ecx), %xmm2
    777 	movdqa	%xmm2, %xmm4
    778 
    779 	palignr	$5, %xmm3, %xmm2
    780 
    781 	pcmpeqb	%xmm1, %xmm0
    782 	pcmpeqb	%xmm2, %xmm1
    783 	psubb	%xmm0, %xmm1
    784 	pmovmskb %xmm1, %esi
    785 	sub	$0xffff, %esi
    786 	jnz	L(exit)
    787 
    788 #ifdef USE_AS_STRNCMP
    789 	cmpl	$16, %ebp
    790 	lea	-16(%ebp), %ebp
    791 	jbe	L(more8byteseq)
    792 #endif
    793 	add	$16, %ecx
    794 	movdqa	%xmm4, %xmm3
    795 
    796 	add	$16, %edi
    797 	jg	L(nibble_ashr_5)
    798 
    799 	movdqa	(%eax, %ecx), %xmm1
    800 	movdqa	(%edx, %ecx), %xmm2
    801 	movdqa	%xmm2, %xmm4
    802 
    803 	palignr	$5, %xmm3, %xmm2
    804 
    805 	pcmpeqb	%xmm1, %xmm0
    806 	pcmpeqb	%xmm2, %xmm1
    807 	psubb	%xmm0, %xmm1
    808 	pmovmskb %xmm1, %esi
    809 	sub	$0xffff, %esi
    810 	jnz	L(exit)
    811 
    812 #ifdef USE_AS_STRNCMP
    813 	cmpl	$16, %ebp
    814 	lea	-16(%ebp), %ebp
    815 	jbe	L(more8byteseq)
    816 #endif
    817 	add	$16, %ecx
    818 	movdqa	%xmm4, %xmm3
    819 	jmp	L(loop_ashr_5)
    820 
    821 	.p2align 4
    822 L(nibble_ashr_5):
    823 	pcmpeqb	%xmm3, %xmm0
    824 	pmovmskb %xmm0, %esi
    825 	test	$0xffe0, %esi
    826 	jnz	L(ashr_5_exittail)
    827 
    828 #ifdef USE_AS_STRNCMP
    829 	cmpl	$11, %ebp
    830 	jbe	L(ashr_5_exittail)
    831 #endif
    832 	pxor	%xmm0, %xmm0
    833 	sub	$0x1000, %edi
    834 	jmp	L(gobble_ashr_5)
    835 
    836 	.p2align 4
    837 L(ashr_5_exittail):
    838 	movdqa	(%eax, %ecx), %xmm1
    839 	psrldq	$5, %xmm0
    840 	psrldq	$5, %xmm3
    841 	jmp	L(aftertail)
    842 
    843 /*
    844  * The following cases will be handled by ashr_6
    845  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    846  *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6
    847  */
    848 
    849 	.p2align 4
    850 L(ashr_6):
    851 	mov	$0xffff, %esi
    852 	pxor	%xmm0, %xmm0
    853 	movdqa	(%edx), %xmm2
    854 	movdqa	(%eax), %xmm1
    855 	pcmpeqb	%xmm1, %xmm0
    856 	pslldq	$10, %xmm2
    857 	pcmpeqb	%xmm1, %xmm2
    858 	psubb	%xmm0, %xmm2
    859 	pmovmskb %xmm2, %edi
    860 	shr	%cl, %esi
    861 	shr	%cl, %edi
    862 	sub	%edi, %esi
    863 	lea	-10(%ecx), %edi
    864 	jnz	L(less32bytes)
    865 
    866 	UPDATE_STRNCMP_COUNTER
    867 
    868 	movdqa	(%edx), %xmm3
    869 	pxor	%xmm0, %xmm0
    870 	mov	$16, %ecx
    871 	or	$6, %ebx
    872 	lea	6(%edx), %edi
    873 	and	$0xfff, %edi
    874 	sub	$0x1000, %edi
    875 
    876 	.p2align 4
    877 L(loop_ashr_6):
    878 	add	$16, %edi
    879 	jg	L(nibble_ashr_6)
    880 
    881 L(gobble_ashr_6):
    882 	movdqa	(%eax, %ecx), %xmm1
    883 	movdqa	(%edx, %ecx), %xmm2
    884 	movdqa	%xmm2, %xmm4
    885 
    886 	palignr	$6, %xmm3, %xmm2
    887 
    888 	pcmpeqb	%xmm1, %xmm0
    889 	pcmpeqb	%xmm2, %xmm1
    890 	psubb	%xmm0, %xmm1
    891 	pmovmskb %xmm1, %esi
    892 	sub	$0xffff, %esi
    893 	jnz	L(exit)
    894 
    895 #ifdef USE_AS_STRNCMP
    896 	cmpl	$16, %ebp
    897 	lea	-16(%ebp), %ebp
    898 	jbe	L(more8byteseq)
    899 #endif
    900 
    901 	add	$16, %ecx
    902 	movdqa	%xmm4, %xmm3
    903 
    904 	add	$16, %edi
    905 	jg	L(nibble_ashr_6)
    906 
    907 	movdqa	(%eax, %ecx), %xmm1
    908 	movdqa	(%edx, %ecx), %xmm2
    909 	movdqa	%xmm2, %xmm4
    910 
    911 	palignr	$6, %xmm3, %xmm2
    912 
    913 	pcmpeqb	%xmm1, %xmm0
    914 	pcmpeqb	%xmm2, %xmm1
    915 	psubb	%xmm0, %xmm1
    916 	pmovmskb %xmm1, %esi
    917 	sub	$0xffff, %esi
    918 	jnz	L(exit)
    919 #ifdef USE_AS_STRNCMP
    920 	cmpl	$16, %ebp
    921 	lea	-16(%ebp), %ebp
    922 	jbe	L(more8byteseq)
    923 #endif
    924 
    925 	add	$16, %ecx
    926 	movdqa	%xmm4, %xmm3
    927 	jmp	L(loop_ashr_6)
    928 
    929 	.p2align 4
    930 L(nibble_ashr_6):
    931 	pcmpeqb	%xmm3, %xmm0
    932 	pmovmskb %xmm0, %esi
    933 	test	$0xffc0, %esi
    934 	jnz	L(ashr_6_exittail)
    935 
    936 #ifdef USE_AS_STRNCMP
    937 	cmpl	$10, %ebp
    938 	jbe	L(ashr_6_exittail)
    939 #endif
    940 	pxor	%xmm0, %xmm0
    941 	sub	$0x1000, %edi
    942 	jmp	L(gobble_ashr_6)
    943 
    944 	.p2align 4
    945 L(ashr_6_exittail):
    946 	movdqa	(%eax, %ecx), %xmm1
    947 	psrldq	$6, %xmm0
    948 	psrldq	$6, %xmm3
    949 	jmp	L(aftertail)
    950 
    951 /*
    952  * The following cases will be handled by ashr_7
    953  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    954  *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7
    955  */
    956 
    957 	.p2align 4
    958 L(ashr_7):
    959 	mov	$0xffff, %esi
    960 	pxor	%xmm0, %xmm0
    961 	movdqa	(%edx), %xmm2
    962 	movdqa	(%eax), %xmm1
    963 	pcmpeqb	%xmm1, %xmm0
    964 	pslldq	$9, %xmm2
    965 	pcmpeqb	%xmm1, %xmm2
    966 	psubb	%xmm0, %xmm2
    967 	pmovmskb %xmm2, %edi
    968 	shr	%cl, %esi
    969 	shr	%cl, %edi
    970 	sub	%edi, %esi
    971 	lea	-9(%ecx), %edi
    972 	jnz	L(less32bytes)
    973 
    974 	UPDATE_STRNCMP_COUNTER
    975 
    976 	movdqa	(%edx), %xmm3
    977 	pxor	%xmm0, %xmm0
    978 	mov	$16, %ecx
    979 	or	$7, %ebx
    980 	lea	8(%edx), %edi
    981 	and	$0xfff, %edi
    982 	sub	$0x1000, %edi
    983 
    984 	.p2align 4
    985 L(loop_ashr_7):
    986 	add	$16, %edi
    987 	jg	L(nibble_ashr_7)
    988 
    989 L(gobble_ashr_7):
    990 	movdqa	(%eax, %ecx), %xmm1
    991 	movdqa	(%edx, %ecx), %xmm2
    992 	movdqa	%xmm2, %xmm4
    993 
    994 	palignr	$7, %xmm3, %xmm2
    995 
    996 	pcmpeqb	%xmm1, %xmm0
    997 	pcmpeqb	%xmm2, %xmm1
    998 	psubb	%xmm0, %xmm1
    999 	pmovmskb %xmm1, %esi
   1000 	sub	$0xffff, %esi
   1001 	jnz	L(exit)
   1002 
   1003 #ifdef USE_AS_STRNCMP
   1004 	cmpl	$16, %ebp
   1005 	lea	-16(%ebp), %ebp
   1006 	jbe	L(more8byteseq)
   1007 #endif
   1008 
   1009 	add	$16, %ecx
   1010 	movdqa	%xmm4, %xmm3
   1011 
   1012 	add	$16, %edi
   1013 	jg	L(nibble_ashr_7)
   1014 
   1015 	movdqa	(%eax, %ecx), %xmm1
   1016 	movdqa	(%edx, %ecx), %xmm2
   1017 	movdqa	%xmm2, %xmm4
   1018 
   1019 	palignr	$7, %xmm3, %xmm2
   1020 
   1021 	pcmpeqb	%xmm1, %xmm0
   1022 	pcmpeqb	%xmm2, %xmm1
   1023 	psubb	%xmm0, %xmm1
   1024 	pmovmskb %xmm1, %esi
   1025 	sub	$0xffff, %esi
   1026 	jnz	L(exit)
   1027 
   1028 #ifdef USE_AS_STRNCMP
   1029 	cmpl	$16, %ebp
   1030 	lea	-16(%ebp), %ebp
   1031 	jbe	L(more8byteseq)
   1032 #endif
   1033 
   1034 	add	$16, %ecx
   1035 	movdqa	%xmm4, %xmm3
   1036 	jmp	L(loop_ashr_7)
   1037 
   1038 	.p2align 4
   1039 L(nibble_ashr_7):
   1040 	pcmpeqb	%xmm3, %xmm0
   1041 	pmovmskb %xmm0, %esi
   1042 	test	$0xff80, %esi
   1043 	jnz	L(ashr_7_exittail)
   1044 
   1045 #ifdef USE_AS_STRNCMP
   1046 	cmpl	$9, %ebp
   1047 	jbe	L(ashr_7_exittail)
   1048 #endif
   1049 	pxor	%xmm0, %xmm0
   1050 	pxor	%xmm0, %xmm0
   1051 	sub	$0x1000, %edi
   1052 	jmp	L(gobble_ashr_7)
   1053 
   1054 	.p2align 4
   1055 L(ashr_7_exittail):
   1056 	movdqa	(%eax, %ecx), %xmm1
   1057 	psrldq	$7, %xmm0
   1058 	psrldq	$7, %xmm3
   1059 	jmp	L(aftertail)
   1060 
   1061 /*
   1062  * The following cases will be handled by ashr_8
   1063  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1064  *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8
   1065  */
   1066 	.p2align 4
   1067 L(ashr_8):
   1068 	mov	$0xffff, %esi
   1069 	pxor	%xmm0, %xmm0
   1070 	movdqa	(%edx), %xmm2
   1071 	movdqa	(%eax), %xmm1
   1072 	pcmpeqb	%xmm1, %xmm0
   1073 	pslldq	$8, %xmm2
   1074 	pcmpeqb	%xmm1, %xmm2
   1075 	psubb	%xmm0, %xmm2
   1076 	pmovmskb %xmm2, %edi
   1077 	shr	%cl, %esi
   1078 	shr	%cl, %edi
   1079 	sub	%edi, %esi
   1080 	lea	-8(%ecx), %edi
   1081 	jnz	L(less32bytes)
   1082 
   1083 	UPDATE_STRNCMP_COUNTER
   1084 
   1085 	movdqa	(%edx), %xmm3
   1086 	pxor	%xmm0, %xmm0
   1087 	mov	$16, %ecx
   1088 	or	$8, %ebx
   1089 	lea	8(%edx), %edi
   1090 	and	$0xfff, %edi
   1091 	sub	$0x1000, %edi
   1092 
   1093 	.p2align 4
   1094 L(loop_ashr_8):
   1095 	add	$16, %edi
   1096 	jg	L(nibble_ashr_8)
   1097 
   1098 L(gobble_ashr_8):
   1099 	movdqa	(%eax, %ecx), %xmm1
   1100 	movdqa	(%edx, %ecx), %xmm2
   1101 	movdqa	%xmm2, %xmm4
   1102 
   1103 	palignr	$8, %xmm3, %xmm2
   1104 
   1105 	pcmpeqb	%xmm1, %xmm0
   1106 	pcmpeqb	%xmm2, %xmm1
   1107 	psubb	%xmm0, %xmm1
   1108 	pmovmskb %xmm1, %esi
   1109 	sub	$0xffff, %esi
   1110 	jnz	L(exit)
   1111 
   1112 #ifdef USE_AS_STRNCMP
   1113 	cmpl	$16, %ebp
   1114 	lea	-16(%ebp), %ebp
   1115 	jbe	L(more8byteseq)
   1116 #endif
   1117 	add	$16, %ecx
   1118 	movdqa	%xmm4, %xmm3
   1119 
   1120 	add	$16, %edi
   1121 	jg	L(nibble_ashr_8)
   1122 
   1123 	movdqa	(%eax, %ecx), %xmm1
   1124 	movdqa	(%edx, %ecx), %xmm2
   1125 	movdqa	%xmm2, %xmm4
   1126 
   1127 	palignr	$8, %xmm3, %xmm2
   1128 
   1129 	pcmpeqb	%xmm1, %xmm0
   1130 	pcmpeqb	%xmm2, %xmm1
   1131 	psubb	%xmm0, %xmm1
   1132 	pmovmskb %xmm1, %esi
   1133 	sub	$0xffff, %esi
   1134 	jnz	L(exit)
   1135 
   1136 #ifdef USE_AS_STRNCMP
   1137 	cmpl	$16, %ebp
   1138 	lea	-16(%ebp), %ebp
   1139 	jbe	L(more8byteseq)
   1140 #endif
   1141 	add	$16, %ecx
   1142 	movdqa	%xmm4, %xmm3
   1143 	jmp	L(loop_ashr_8)
   1144 
   1145 	.p2align 4
   1146 L(nibble_ashr_8):
   1147 	pcmpeqb	%xmm3, %xmm0
   1148 	pmovmskb %xmm0, %esi
   1149 	test	$0xff00, %esi
   1150 	jnz	L(ashr_8_exittail)
   1151 
   1152 #ifdef USE_AS_STRNCMP
   1153 	cmpl	$8, %ebp
   1154 	jbe	L(ashr_8_exittail)
   1155 #endif
   1156 	pxor	%xmm0, %xmm0
   1157 	pxor	%xmm0, %xmm0
   1158 	sub	$0x1000, %edi
   1159 	jmp	L(gobble_ashr_8)
   1160 
   1161 	.p2align 4
   1162 L(ashr_8_exittail):
   1163 	movdqa	(%eax, %ecx), %xmm1
   1164 	psrldq	$8, %xmm0
   1165 	psrldq	$8, %xmm3
   1166 	jmp	L(aftertail)
   1167 
   1168 /*
   1169  * The following cases will be handled by ashr_9
   1170  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1171  *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9
   1172  */
   1173 	.p2align 4
   1174 L(ashr_9):
   1175 	mov	$0xffff, %esi
   1176 	pxor	%xmm0, %xmm0
   1177 	movdqa	(%edx), %xmm2
   1178 	movdqa	(%eax), %xmm1
   1179 	pcmpeqb	%xmm1, %xmm0
   1180 	pslldq	$7, %xmm2
   1181 	pcmpeqb	%xmm1, %xmm2
   1182 	psubb	%xmm0, %xmm2
   1183 	pmovmskb %xmm2, %edi
   1184 	shr	%cl, %esi
   1185 	shr	%cl, %edi
   1186 	sub	%edi, %esi
   1187 	lea	-7(%ecx), %edi
   1188 	jnz	L(less32bytes)
   1189 
   1190 	UPDATE_STRNCMP_COUNTER
   1191 
   1192 	movdqa	(%edx), %xmm3
   1193 	pxor	%xmm0, %xmm0
   1194 	mov	$16, %ecx
   1195 	or	$9, %ebx
   1196 	lea	9(%edx), %edi
   1197 	and	$0xfff, %edi
   1198 	sub	$0x1000, %edi
   1199 
   1200 	.p2align 4
   1201 L(loop_ashr_9):
   1202 	add	$16, %edi
   1203 	jg	L(nibble_ashr_9)
   1204 
   1205 L(gobble_ashr_9):
   1206 	movdqa	(%eax, %ecx), %xmm1
   1207 	movdqa	(%edx, %ecx), %xmm2
   1208 	movdqa	%xmm2, %xmm4
   1209 
   1210 	palignr	$9, %xmm3, %xmm2
   1211 
   1212 	pcmpeqb	%xmm1, %xmm0
   1213 	pcmpeqb	%xmm2, %xmm1
   1214 	psubb	%xmm0, %xmm1
   1215 	pmovmskb %xmm1, %esi
   1216 	sub	$0xffff, %esi
   1217 	jnz	L(exit)
   1218 
   1219 #ifdef USE_AS_STRNCMP
   1220 	cmpl	$16, %ebp
   1221 	lea	-16(%ebp), %ebp
   1222 	jbe	L(more8byteseq)
   1223 #endif
   1224 	add	$16, %ecx
   1225 	movdqa	%xmm4, %xmm3
   1226 
   1227 	add	$16, %edi
   1228 	jg	L(nibble_ashr_9)
   1229 
   1230 	movdqa	(%eax, %ecx), %xmm1
   1231 	movdqa	(%edx, %ecx), %xmm2
   1232 	movdqa	%xmm2, %xmm4
   1233 
   1234 	palignr	$9, %xmm3, %xmm2
   1235 
   1236 	pcmpeqb	%xmm1, %xmm0
   1237 	pcmpeqb	%xmm2, %xmm1
   1238 	psubb	%xmm0, %xmm1
   1239 	pmovmskb %xmm1, %esi
   1240 	sub	$0xffff, %esi
   1241 	jnz	L(exit)
   1242 
   1243 #ifdef USE_AS_STRNCMP
   1244 	cmpl	$16, %ebp
   1245 	lea	-16(%ebp), %ebp
   1246 	jbe	L(more8byteseq)
   1247 #endif
   1248 	add	$16, %ecx
   1249 	movdqa	%xmm4, %xmm3
   1250 	jmp	L(loop_ashr_9)
   1251 
   1252 	.p2align 4
   1253 L(nibble_ashr_9):
   1254 	pcmpeqb	%xmm3, %xmm0
   1255 	pmovmskb %xmm0, %esi
   1256 	test	$0xfe00, %esi
   1257 	jnz	L(ashr_9_exittail)
   1258 
   1259 #ifdef USE_AS_STRNCMP
   1260 	cmpl	$7, %ebp
   1261 	jbe	L(ashr_9_exittail)
   1262 #endif
   1263 	pxor	%xmm0, %xmm0
   1264 	sub	$0x1000, %edi
   1265 	jmp	L(gobble_ashr_9)
   1266 
   1267 	.p2align 4
   1268 L(ashr_9_exittail):
   1269 	movdqa	(%eax, %ecx), %xmm1
   1270 	psrldq	$9, %xmm0
   1271 	psrldq	$9, %xmm3
   1272 	jmp	L(aftertail)
   1273 
   1274 /*
   1275  * The following cases will be handled by ashr_10
   1276  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1277  *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10
   1278  */
   1279 	.p2align 4
   1280 L(ashr_10):
   1281 	mov	$0xffff, %esi
   1282 	pxor	%xmm0, %xmm0
   1283 	movdqa	(%edx), %xmm2
   1284 	movdqa	(%eax), %xmm1
   1285 	pcmpeqb	%xmm1, %xmm0
   1286 	pslldq	$6, %xmm2
   1287 	pcmpeqb	%xmm1, %xmm2
   1288 	psubb	%xmm0, %xmm2
   1289 	pmovmskb %xmm2, %edi
   1290 	shr	%cl, %esi
   1291 	shr	%cl, %edi
   1292 	sub	%edi, %esi
   1293 	lea	-6(%ecx), %edi
   1294 	jnz	L(less32bytes)
   1295 
   1296 	UPDATE_STRNCMP_COUNTER
   1297 
   1298 	movdqa	(%edx), %xmm3
   1299 	pxor	%xmm0, %xmm0
   1300 	mov	$16, %ecx
   1301 	or	$10, %ebx
   1302 	lea	10(%edx), %edi
   1303 	and	$0xfff, %edi
   1304 	sub	$0x1000, %edi
   1305 
   1306 	.p2align 4
   1307 L(loop_ashr_10):
   1308 	add	$16, %edi
   1309 	jg	L(nibble_ashr_10)
   1310 
   1311 L(gobble_ashr_10):
   1312 	movdqa	(%eax, %ecx), %xmm1
   1313 	movdqa	(%edx, %ecx), %xmm2
   1314 	movdqa	%xmm2, %xmm4
   1315 
   1316 	palignr	$10, %xmm3, %xmm2
   1317 
   1318 	pcmpeqb	%xmm1, %xmm0
   1319 	pcmpeqb	%xmm2, %xmm1
   1320 	psubb	%xmm0, %xmm1
   1321 	pmovmskb %xmm1, %esi
   1322 	sub	$0xffff, %esi
   1323 	jnz	L(exit)
   1324 
   1325 #ifdef USE_AS_STRNCMP
   1326 	cmpl	$16, %ebp
   1327 	lea	-16(%ebp), %ebp
   1328 	jbe	L(more8byteseq)
   1329 #endif
   1330 	add	$16, %ecx
   1331 	movdqa	%xmm4, %xmm3
   1332 
   1333 	add	$16, %edi
   1334 	jg	L(nibble_ashr_10)
   1335 
   1336 	movdqa	(%eax, %ecx), %xmm1
   1337 	movdqa	(%edx, %ecx), %xmm2
   1338 	movdqa	%xmm2, %xmm4
   1339 
   1340 	palignr	$10, %xmm3, %xmm2
   1341 
   1342 	pcmpeqb	%xmm1, %xmm0
   1343 	pcmpeqb	%xmm2, %xmm1
   1344 	psubb	%xmm0, %xmm1
   1345 	pmovmskb %xmm1, %esi
   1346 	sub	$0xffff, %esi
   1347 	jnz	L(exit)
   1348 
   1349 #ifdef USE_AS_STRNCMP
   1350 	cmpl	$16, %ebp
   1351 	lea	-16(%ebp), %ebp
   1352 	jbe	L(more8byteseq)
   1353 #endif
   1354 	add	$16, %ecx
   1355 	movdqa	%xmm4, %xmm3
   1356 	jmp	L(loop_ashr_10)
   1357 
   1358 	.p2align 4
   1359 L(nibble_ashr_10):
   1360 	pcmpeqb	%xmm3, %xmm0
   1361 	pmovmskb %xmm0, %esi
   1362 	test	$0xfc00, %esi
   1363 	jnz	L(ashr_10_exittail)
   1364 
   1365 #ifdef USE_AS_STRNCMP
   1366 	cmpl	$6, %ebp
   1367 	jbe	L(ashr_10_exittail)
   1368 #endif
   1369 	pxor	%xmm0, %xmm0
   1370 	sub	$0x1000, %edi
   1371 	jmp	L(gobble_ashr_10)
   1372 
   1373 	.p2align 4
   1374 L(ashr_10_exittail):
   1375 	movdqa	(%eax, %ecx), %xmm1
   1376 	psrldq	$10, %xmm0
   1377 	psrldq	$10, %xmm3
   1378 	jmp	L(aftertail)
   1379 
   1380 /*
   1381  * The following cases will be handled by ashr_11
   1382  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1383  *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11
   1384  */
   1385 	.p2align 4
   1386 L(ashr_11):
   1387 	mov	$0xffff, %esi
   1388 	pxor	%xmm0, %xmm0
   1389 	movdqa	(%edx), %xmm2
   1390 	movdqa	(%eax), %xmm1
   1391 	pcmpeqb	%xmm1, %xmm0
   1392 	pslldq	$5, %xmm2
   1393 	pcmpeqb	%xmm1, %xmm2
   1394 	psubb	%xmm0, %xmm2
   1395 	pmovmskb %xmm2, %edi
   1396 	shr	%cl, %esi
   1397 	shr	%cl, %edi
   1398 	sub	%edi, %esi
   1399 	lea	-5(%ecx), %edi
   1400 	jnz	L(less32bytes)
   1401 
   1402 	UPDATE_STRNCMP_COUNTER
   1403 
   1404 	movdqa	(%edx), %xmm3
   1405 	pxor	%xmm0, %xmm0
   1406 	mov	$16, %ecx
   1407 	or	$11, %ebx
   1408 	lea	11(%edx), %edi
   1409 	and	$0xfff, %edi
   1410 	sub	$0x1000, %edi
   1411 
   1412 	.p2align 4
   1413 L(loop_ashr_11):
   1414 	add	$16, %edi
   1415 	jg	L(nibble_ashr_11)
   1416 
   1417 L(gobble_ashr_11):
   1418 	movdqa	(%eax, %ecx), %xmm1
   1419 	movdqa	(%edx, %ecx), %xmm2
   1420 	movdqa	%xmm2, %xmm4
   1421 
   1422 	palignr	$11, %xmm3, %xmm2
   1423 
   1424 	pcmpeqb	%xmm1, %xmm0
   1425 	pcmpeqb	%xmm2, %xmm1
   1426 	psubb	%xmm0, %xmm1
   1427 	pmovmskb %xmm1, %esi
   1428 	sub	$0xffff, %esi
   1429 	jnz	L(exit)
   1430 
   1431 #ifdef USE_AS_STRNCMP
   1432 	cmpl	$16, %ebp
   1433 	lea	-16(%ebp), %ebp
   1434 	jbe	L(more8byteseq)
   1435 #endif
   1436 	add	$16, %ecx
   1437 	movdqa	%xmm4, %xmm3
   1438 
   1439 	add	$16, %edi
   1440 	jg	L(nibble_ashr_11)
   1441 
   1442 	movdqa	(%eax, %ecx), %xmm1
   1443 	movdqa	(%edx, %ecx), %xmm2
   1444 	movdqa	%xmm2, %xmm4
   1445 
   1446 	palignr	$11, %xmm3, %xmm2
   1447 
   1448 	pcmpeqb	%xmm1, %xmm0
   1449 	pcmpeqb	%xmm2, %xmm1
   1450 	psubb	%xmm0, %xmm1
   1451 	pmovmskb %xmm1, %esi
   1452 	sub	$0xffff, %esi
   1453 	jnz	L(exit)
   1454 
   1455 #ifdef USE_AS_STRNCMP
   1456 	cmpl	$16, %ebp
   1457 	lea	-16(%ebp), %ebp
   1458 	jbe	L(more8byteseq)
   1459 #endif
   1460 	add	$16, %ecx
   1461 	movdqa	%xmm4, %xmm3
   1462 	jmp	L(loop_ashr_11)
   1463 
   1464 	.p2align 4
   1465 L(nibble_ashr_11):
   1466 	pcmpeqb	%xmm3, %xmm0
   1467 	pmovmskb %xmm0, %esi
   1468 	test	$0xf800, %esi
   1469 	jnz	L(ashr_11_exittail)
   1470 
   1471 #ifdef USE_AS_STRNCMP
   1472 	cmpl	$5, %ebp
   1473 	jbe	L(ashr_11_exittail)
   1474 #endif
   1475 	pxor	%xmm0, %xmm0
   1476 	sub	$0x1000, %edi
   1477 	jmp	L(gobble_ashr_11)
   1478 
   1479 	.p2align 4
   1480 L(ashr_11_exittail):
   1481 	movdqa	(%eax, %ecx), %xmm1
   1482 	psrldq	$11, %xmm0
   1483 	psrldq	$11, %xmm3
   1484 	jmp	L(aftertail)
   1485 
   1486 /*
   1487  * The following cases will be handled by ashr_12
   1488  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1489  *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12
   1490  */
   1491 	.p2align 4
   1492 L(ashr_12):
   1493 	mov	$0xffff, %esi
   1494 	pxor	%xmm0, %xmm0
   1495 	movdqa	(%edx), %xmm2
   1496 	movdqa	(%eax), %xmm1
   1497 	pcmpeqb	%xmm1, %xmm0
   1498 	pslldq	$4, %xmm2
   1499 	pcmpeqb	%xmm1, %xmm2
   1500 	psubb	%xmm0, %xmm2
   1501 	pmovmskb %xmm2, %edi
   1502 	shr	%cl, %esi
   1503 	shr	%cl, %edi
   1504 	sub	%edi, %esi
   1505 	lea	-4(%ecx), %edi
   1506 	jnz	L(less32bytes)
   1507 
   1508 	UPDATE_STRNCMP_COUNTER
   1509 
   1510 	movdqa	(%edx), %xmm3
   1511 	pxor	%xmm0, %xmm0
   1512 	mov	$16, %ecx
   1513 	or	$12, %ebx
   1514 	lea	12(%edx), %edi
   1515 	and	$0xfff, %edi
   1516 	sub	$0x1000, %edi
   1517 
   1518 	.p2align 4
   1519 L(loop_ashr_12):
   1520 	add	$16, %edi
   1521 	jg	L(nibble_ashr_12)
   1522 
   1523 L(gobble_ashr_12):
   1524 	movdqa	(%eax, %ecx), %xmm1
   1525 	movdqa	(%edx, %ecx), %xmm2
   1526 	movdqa	%xmm2, %xmm4
   1527 
   1528 	palignr	$12, %xmm3, %xmm2
   1529 
   1530 	pcmpeqb	%xmm1, %xmm0
   1531 	pcmpeqb	%xmm2, %xmm1
   1532 	psubb	%xmm0, %xmm1
   1533 	pmovmskb %xmm1, %esi
   1534 	sub	$0xffff, %esi
   1535 	jnz	L(exit)
   1536 
   1537 #ifdef USE_AS_STRNCMP
   1538 	cmpl	$16, %ebp
   1539 	lea	-16(%ebp), %ebp
   1540 	jbe	L(more8byteseq)
   1541 #endif
   1542 
   1543 	add	$16, %ecx
   1544 	movdqa	%xmm4, %xmm3
   1545 
   1546 	add	$16, %edi
   1547 	jg	L(nibble_ashr_12)
   1548 
   1549 	movdqa	(%eax, %ecx), %xmm1
   1550 	movdqa	(%edx, %ecx), %xmm2
   1551 	movdqa	%xmm2, %xmm4
   1552 
   1553 	palignr	$12, %xmm3, %xmm2
   1554 
   1555 	pcmpeqb	%xmm1, %xmm0
   1556 	pcmpeqb	%xmm2, %xmm1
   1557 	psubb	%xmm0, %xmm1
   1558 	pmovmskb %xmm1, %esi
   1559 	sub	$0xffff, %esi
   1560 	jnz	L(exit)
   1561 
   1562 #ifdef USE_AS_STRNCMP
   1563 	cmpl	$16, %ebp
   1564 	lea	-16(%ebp), %ebp
   1565 	jbe	L(more8byteseq)
   1566 #endif
   1567 	add	$16, %ecx
   1568 	movdqa	%xmm4, %xmm3
   1569 	jmp	L(loop_ashr_12)
   1570 
   1571 	.p2align 4
   1572 L(nibble_ashr_12):
   1573 	pcmpeqb	%xmm3, %xmm0
   1574 	pmovmskb %xmm0, %esi
   1575 	test	$0xf000, %esi
   1576 	jnz	L(ashr_12_exittail)
   1577 
   1578 #ifdef USE_AS_STRNCMP
   1579 	cmpl	$4, %ebp
   1580 	jbe	L(ashr_12_exittail)
   1581 #endif
   1582 	pxor	%xmm0, %xmm0
   1583 	sub	$0x1000, %edi
   1584 	jmp	L(gobble_ashr_12)
   1585 
   1586 	.p2align 4
   1587 L(ashr_12_exittail):
   1588 	movdqa	(%eax, %ecx), %xmm1
   1589 	psrldq	$12, %xmm0
   1590 	psrldq	$12, %xmm3
   1591 	jmp	L(aftertail)
   1592 
   1593 /*
   1594  * The following cases will be handled by ashr_13
   1595  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1596  *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13
   1597  */
   1598 	.p2align 4
   1599 L(ashr_13):
   1600 	mov	$0xffff, %esi
   1601 	pxor	%xmm0, %xmm0
   1602 	movdqa	(%edx), %xmm2
   1603 	movdqa	(%eax), %xmm1
   1604 	pcmpeqb	%xmm1, %xmm0
   1605 	pslldq	$3, %xmm2
   1606 	pcmpeqb	%xmm1, %xmm2
   1607 	psubb	%xmm0, %xmm2
   1608 	pmovmskb %xmm2, %edi
   1609 	shr	%cl, %esi
   1610 	shr	%cl, %edi
   1611 	sub	%edi, %esi
   1612 	lea	-3(%ecx), %edi
   1613 	jnz	L(less32bytes)
   1614 
   1615 	UPDATE_STRNCMP_COUNTER
   1616 
   1617 	movdqa	(%edx), %xmm3
   1618 	pxor	%xmm0, %xmm0
   1619 	mov	$16, %ecx
   1620 	or	$13, %ebx
   1621 	lea	13(%edx), %edi
   1622 	and	$0xfff, %edi
   1623 	sub	$0x1000, %edi
   1624 
   1625 	.p2align 4
   1626 L(loop_ashr_13):
   1627 	add	$16, %edi
   1628 	jg	L(nibble_ashr_13)
   1629 
   1630 L(gobble_ashr_13):
   1631 	movdqa	(%eax, %ecx), %xmm1
   1632 	movdqa	(%edx, %ecx), %xmm2
   1633 	movdqa	%xmm2, %xmm4
   1634 
   1635 	palignr	$13, %xmm3, %xmm2
   1636 
   1637 	pcmpeqb	%xmm1, %xmm0
   1638 	pcmpeqb	%xmm2, %xmm1
   1639 	psubb	%xmm0, %xmm1
   1640 	pmovmskb %xmm1, %esi
   1641 	sub	$0xffff, %esi
   1642 	jnz	L(exit)
   1643 
   1644 #ifdef USE_AS_STRNCMP
   1645 	cmpl	$16, %ebp
   1646 	lea	-16(%ebp), %ebp
   1647 	jbe	L(more8byteseq)
   1648 #endif
   1649 	add	$16, %ecx
   1650 	movdqa	%xmm4, %xmm3
   1651 
   1652 	add	$16, %edi
   1653 	jg	L(nibble_ashr_13)
   1654 
   1655 	movdqa	(%eax, %ecx), %xmm1
   1656 	movdqa	(%edx, %ecx), %xmm2
   1657 	movdqa	%xmm2, %xmm4
   1658 
   1659 	palignr	$13, %xmm3, %xmm2
   1660 
   1661 	pcmpeqb	%xmm1, %xmm0
   1662 	pcmpeqb	%xmm2, %xmm1
   1663 	psubb	%xmm0, %xmm1
   1664 	pmovmskb %xmm1, %esi
   1665 	sub	$0xffff, %esi
   1666 	jnz	L(exit)
   1667 
   1668 #ifdef USE_AS_STRNCMP
   1669 	cmpl	$16, %ebp
   1670 	lea	-16(%ebp), %ebp
   1671 	jbe	L(more8byteseq)
   1672 #endif
   1673 	add	$16, %ecx
   1674 	movdqa	%xmm4, %xmm3
   1675 	jmp	L(loop_ashr_13)
   1676 
   1677 	.p2align 4
   1678 L(nibble_ashr_13):
   1679 	pcmpeqb	%xmm3, %xmm0
   1680 	pmovmskb %xmm0, %esi
   1681 	test	$0xe000, %esi
   1682 	jnz	L(ashr_13_exittail)
   1683 
   1684 #ifdef USE_AS_STRNCMP
   1685 	cmpl	$3, %ebp
   1686 	jbe	L(ashr_13_exittail)
   1687 #endif
   1688 	pxor	%xmm0, %xmm0
   1689 	sub	$0x1000, %edi
   1690 	jmp	L(gobble_ashr_13)
   1691 
   1692 	.p2align 4
   1693 L(ashr_13_exittail):
   1694 	movdqa	(%eax, %ecx), %xmm1
   1695 	psrldq	$13, %xmm0
   1696 	psrldq	$13, %xmm3
   1697 	jmp	L(aftertail)
   1698 
   1699 /*
   1700  * The following cases will be handled by ashr_14
   1701  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1702  *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14
   1703  */
   1704 	.p2align 4
   1705 L(ashr_14):
   1706 	mov	$0xffff, %esi
   1707 	pxor	%xmm0, %xmm0
   1708 	movdqa	(%edx), %xmm2
   1709 	movdqa	(%eax), %xmm1
   1710 	pcmpeqb	%xmm1, %xmm0
   1711 	pslldq	$2, %xmm2
   1712 	pcmpeqb	%xmm1, %xmm2
   1713 	psubb	%xmm0, %xmm2
   1714 	pmovmskb %xmm2, %edi
   1715 	shr	%cl, %esi
   1716 	shr	%cl, %edi
   1717 	sub	%edi, %esi
   1718 	lea	-2(%ecx), %edi
   1719 	jnz	L(less32bytes)
   1720 
   1721 	UPDATE_STRNCMP_COUNTER
   1722 
   1723 	movdqa	(%edx), %xmm3
   1724 	pxor	%xmm0, %xmm0
   1725 	mov	$16, %ecx
   1726 	or	$14, %ebx
   1727 	lea	14(%edx), %edi
   1728 	and	$0xfff, %edi
   1729 	sub	$0x1000, %edi
   1730 
   1731 	.p2align 4
   1732 L(loop_ashr_14):
   1733 	add	$16, %edi
   1734 	jg	L(nibble_ashr_14)
   1735 
   1736 L(gobble_ashr_14):
   1737 	movdqa	(%eax, %ecx), %xmm1
   1738 	movdqa	(%edx, %ecx), %xmm2
   1739 	movdqa	%xmm2, %xmm4
   1740 
   1741 	palignr	$14, %xmm3, %xmm2
   1742 
   1743 	pcmpeqb	%xmm1, %xmm0
   1744 	pcmpeqb	%xmm2, %xmm1
   1745 	psubb	%xmm0, %xmm1
   1746 	pmovmskb %xmm1, %esi
   1747 	sub	$0xffff, %esi
   1748 	jnz	L(exit)
   1749 
   1750 #ifdef USE_AS_STRNCMP
   1751 	cmpl	$16, %ebp
   1752 	lea	-16(%ebp), %ebp
   1753 	jbe	L(more8byteseq)
   1754 #endif
   1755 	add	$16, %ecx
   1756 	movdqa	%xmm4, %xmm3
   1757 
   1758 	add	$16, %edi
   1759 	jg	L(nibble_ashr_14)
   1760 
   1761 	movdqa	(%eax, %ecx), %xmm1
   1762 	movdqa	(%edx, %ecx), %xmm2
   1763 	movdqa	%xmm2, %xmm4
   1764 
   1765 	palignr	$14, %xmm3, %xmm2
   1766 
   1767 	pcmpeqb	%xmm1, %xmm0
   1768 	pcmpeqb	%xmm2, %xmm1
   1769 	psubb	%xmm0, %xmm1
   1770 	pmovmskb %xmm1, %esi
   1771 	sub	$0xffff, %esi
   1772 	jnz	L(exit)
   1773 
   1774 #ifdef USE_AS_STRNCMP
   1775 	cmpl	$16, %ebp
   1776 	lea	-16(%ebp), %ebp
   1777 	jbe	L(more8byteseq)
   1778 #endif
   1779 	add	$16, %ecx
   1780 	movdqa	%xmm4, %xmm3
   1781 	jmp	L(loop_ashr_14)
   1782 
   1783 	.p2align 4
   1784 L(nibble_ashr_14):
   1785 	pcmpeqb	%xmm3, %xmm0
   1786 	pmovmskb %xmm0, %esi
   1787 	test	$0xc000, %esi
   1788 	jnz	L(ashr_14_exittail)
   1789 
   1790 #ifdef USE_AS_STRNCMP
   1791 	cmpl	$2, %ebp
   1792 	jbe	L(ashr_14_exittail)
   1793 #endif
   1794 	pxor	%xmm0, %xmm0
   1795 	sub	$0x1000, %edi
   1796 	jmp	L(gobble_ashr_14)
   1797 
   1798 	.p2align 4
   1799 L(ashr_14_exittail):
   1800 	movdqa	(%eax, %ecx), %xmm1
   1801 	psrldq	$14, %xmm0
   1802 	psrldq	$14, %xmm3
   1803 	jmp	L(aftertail)
   1804 
   1805 /*
   1806  * The following cases will be handled by ashr_14
   1807  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1808  *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15
   1809  */
   1810 
   1811 	.p2align 4
   1812 L(ashr_15):
   1813 	mov	$0xffff, %esi
   1814 	pxor	%xmm0, %xmm0
   1815 	movdqa	(%edx), %xmm2
   1816 	movdqa	(%eax), %xmm1
   1817 	pcmpeqb	%xmm1, %xmm0
   1818 	pslldq	$1, %xmm2
   1819 	pcmpeqb	%xmm1, %xmm2
   1820 	psubb	%xmm0, %xmm2
   1821 	pmovmskb %xmm2, %edi
   1822 	shr	%cl, %esi
   1823 	shr	%cl, %edi
   1824 	sub	%edi, %esi
   1825 	lea	-1(%ecx), %edi
   1826 	jnz	L(less32bytes)
   1827 
   1828 	UPDATE_STRNCMP_COUNTER
   1829 
   1830 	movdqa	(%edx), %xmm3
   1831 	pxor	%xmm0, %xmm0
   1832 	mov	$16, %ecx
   1833 	or	$15, %ebx
   1834 	lea	15(%edx), %edi
   1835 	and	$0xfff, %edi
   1836 	sub	$0x1000, %edi
   1837 
   1838 	.p2align 4
   1839 L(loop_ashr_15):
   1840 	add	$16, %edi
   1841 	jg	L(nibble_ashr_15)
   1842 
   1843 L(gobble_ashr_15):
   1844 	movdqa	(%eax, %ecx), %xmm1
   1845 	movdqa	(%edx, %ecx), %xmm2
   1846 	movdqa	%xmm2, %xmm4
   1847 
   1848 	palignr	$15, %xmm3, %xmm2
   1849 
   1850 	pcmpeqb	%xmm1, %xmm0
   1851 	pcmpeqb	%xmm2, %xmm1
   1852 	psubb	%xmm0, %xmm1
   1853 	pmovmskb %xmm1, %esi
   1854 	sub	$0xffff, %esi
   1855 	jnz	L(exit)
   1856 
   1857 #ifdef USE_AS_STRNCMP
   1858 	cmpl	$16, %ebp
   1859 	lea	-16(%ebp), %ebp
   1860 	jbe	L(more8byteseq)
   1861 #endif
   1862 	add	$16, %ecx
   1863 	movdqa	%xmm4, %xmm3
   1864 
   1865 	add	$16, %edi
   1866 	jg	L(nibble_ashr_15)
   1867 
   1868 	movdqa	(%eax, %ecx), %xmm1
   1869 	movdqa	(%edx, %ecx), %xmm2
   1870 	movdqa	%xmm2, %xmm4
   1871 
   1872 	palignr	$15, %xmm3, %xmm2
   1873 
   1874 	pcmpeqb	%xmm1, %xmm0
   1875 	pcmpeqb	%xmm2, %xmm1
   1876 	psubb	%xmm0, %xmm1
   1877 	pmovmskb %xmm1, %esi
   1878 	sub	$0xffff, %esi
   1879 	jnz	L(exit)
   1880 
   1881 #ifdef USE_AS_STRNCMP
   1882 	cmpl	$16, %ebp
   1883 	lea	-16(%ebp), %ebp
   1884 	jbe	L(more8byteseq)
   1885 #endif
   1886 	add	$16, %ecx
   1887 	movdqa	%xmm4, %xmm3
   1888 	jmp	L(loop_ashr_15)
   1889 
   1890 	.p2align 4
   1891 L(nibble_ashr_15):
   1892 	pcmpeqb	%xmm3, %xmm0
   1893 	pmovmskb %xmm0, %esi
   1894 	test	$0x8000, %esi
   1895 	jnz	L(ashr_15_exittail)
   1896 
   1897 #ifdef USE_AS_STRNCMP
   1898 	cmpl	$1, %ebp
   1899 	jbe	L(ashr_15_exittail)
   1900 #endif
   1901 	pxor	%xmm0, %xmm0
   1902 	sub	$0x1000, %edi
   1903 	jmp	L(gobble_ashr_15)
   1904 
   1905 	.p2align 4
   1906 L(ashr_15_exittail):
   1907 	movdqa	(%eax, %ecx), %xmm1
   1908 	psrldq	$15, %xmm0
   1909 	psrldq	$15, %xmm3
   1910 	jmp	L(aftertail)
   1911 
   1912 	.p2align 4
   1913 L(aftertail):
   1914 	pcmpeqb	%xmm3, %xmm1
   1915 	psubb	%xmm0, %xmm1
   1916 	pmovmskb %xmm1, %esi
   1917 	not	%esi
   1918 L(exit):
   1919 	mov	%ebx, %edi
   1920 	and	$0x1f, %edi
   1921 	lea	-16(%edi, %ecx), %edi
   1922 L(less32bytes):
   1923 	add	%edi, %edx
   1924 	add	%ecx, %eax
   1925 	test	$0x20, %ebx
   1926 	jz	L(ret2)
   1927 	xchg	%eax, %edx
   1928 
   1929 	.p2align 4
   1930 L(ret2):
   1931 	mov	%esi, %ecx
   1932 	POP	(%esi)
   1933 	POP	(%edi)
   1934 	POP	(%ebx)
   1935 L(less16bytes):
   1936 	test	%cl, %cl
   1937 	jz	L(2next_8_bytes)
   1938 
   1939 	test	$0x01, %cl
   1940 	jnz	L(Byte0)
   1941 
   1942 	test	$0x02, %cl
   1943 	jnz	L(Byte1)
   1944 
   1945 	test	$0x04, %cl
   1946 	jnz	L(Byte2)
   1947 
   1948 	test	$0x08, %cl
   1949 	jnz	L(Byte3)
   1950 
   1951 	test	$0x10, %cl
   1952 	jnz	L(Byte4)
   1953 
   1954 	test	$0x20, %cl
   1955 	jnz	L(Byte5)
   1956 
   1957 	test	$0x40, %cl
   1958 	jnz	L(Byte6)
   1959 #ifdef USE_AS_STRNCMP
   1960 	cmpl	$7, %ebp
   1961 	jbe	L(eq)
   1962 #endif
   1963 
   1964 	movzbl	7(%eax), %ecx
   1965 	movzbl	7(%edx), %eax
   1966 
   1967 	sub	%ecx, %eax
   1968 	RETURN
   1969 
   1970 	.p2align 4
   1971 L(Byte0):
   1972 #ifdef USE_AS_STRNCMP
   1973 	cmpl	$0, %ebp
   1974 	jbe	L(eq)
   1975 #endif
   1976 	movzbl	(%eax), %ecx
   1977 	movzbl	(%edx), %eax
   1978 
   1979 	sub	%ecx, %eax
   1980 	RETURN
   1981 
   1982 	.p2align 4
   1983 L(Byte1):
   1984 #ifdef USE_AS_STRNCMP
   1985 	cmpl	$1, %ebp
   1986 	jbe	L(eq)
   1987 #endif
   1988 	movzbl	1(%eax), %ecx
   1989 	movzbl	1(%edx), %eax
   1990 
   1991 	sub	%ecx, %eax
   1992 	RETURN
   1993 
   1994 	.p2align 4
   1995 L(Byte2):
   1996 #ifdef USE_AS_STRNCMP
   1997 	cmpl	$2, %ebp
   1998 	jbe	L(eq)
   1999 #endif
   2000 	movzbl	2(%eax), %ecx
   2001 	movzbl	2(%edx), %eax
   2002 
   2003 	sub	%ecx, %eax
   2004 	RETURN
   2005 
   2006 	.p2align 4
   2007 L(Byte3):
   2008 #ifdef USE_AS_STRNCMP
   2009 	cmpl	$3, %ebp
   2010 	jbe	L(eq)
   2011 #endif
   2012 	movzbl	3(%eax), %ecx
   2013 	movzbl	3(%edx), %eax
   2014 
   2015 	sub	%ecx, %eax
   2016 	RETURN
   2017 
   2018 	.p2align 4
   2019 L(Byte4):
   2020 #ifdef USE_AS_STRNCMP
   2021 	cmpl	$4, %ebp
   2022 	jbe	L(eq)
   2023 #endif
   2024 	movzbl	4(%eax), %ecx
   2025 	movzbl	4(%edx), %eax
   2026 
   2027 	sub	%ecx, %eax
   2028 	RETURN
   2029 
   2030 	.p2align 4
   2031 L(Byte5):
   2032 #ifdef USE_AS_STRNCMP
   2033 	cmpl	$5, %ebp
   2034 	jbe	L(eq)
   2035 #endif
   2036 	movzbl	5(%eax), %ecx
   2037 	movzbl	5(%edx), %eax
   2038 
   2039 	sub	%ecx, %eax
   2040 	RETURN
   2041 
   2042 	.p2align 4
   2043 L(Byte6):
   2044 #ifdef USE_AS_STRNCMP
   2045 	cmpl	$6, %ebp
   2046 	jbe	L(eq)
   2047 #endif
   2048 	movzbl	6(%eax), %ecx
   2049 	movzbl	6(%edx), %eax
   2050 
   2051 	sub	%ecx, %eax
   2052 	RETURN
   2053 
   2054 	.p2align 4
   2055 L(2next_8_bytes):
   2056 	add	$8, %eax
   2057 	add	$8, %edx
   2058 #ifdef USE_AS_STRNCMP
   2059 	cmpl	$8, %ebp
   2060 	lea	-8(%ebp), %ebp
   2061 	jbe	L(eq)
   2062 #endif
   2063 
   2064 	test	$0x01, %ch
   2065 	jnz	L(Byte0)
   2066 
   2067 	test	$0x02, %ch
   2068 	jnz	L(Byte1)
   2069 
   2070 	test	$0x04, %ch
   2071 	jnz	L(Byte2)
   2072 
   2073 	test	$0x08, %ch
   2074 	jnz	L(Byte3)
   2075 
   2076 	test	$0x10, %ch
   2077 	jnz	L(Byte4)
   2078 
   2079 	test	$0x20, %ch
   2080 	jnz	L(Byte5)
   2081 
   2082 	test	$0x40, %ch
   2083 	jnz	L(Byte6)
   2084 
   2085 #ifdef USE_AS_STRNCMP
   2086 	cmpl	$7, %ebp
   2087 	jbe	L(eq)
   2088 #endif
   2089 	movzbl	7(%eax), %ecx
   2090 	movzbl	7(%edx), %eax
   2091 
   2092 	sub	%ecx, %eax
   2093 	RETURN
   2094 
   2095 	.p2align 4
   2096 L(neq):
   2097 	mov	$1, %eax
   2098 	ja	L(neq_bigger)
   2099 	neg	%eax
   2100 L(neq_bigger):
   2101 	RETURN
   2102 
   2103 #ifdef USE_AS_STRNCMP
   2104 	.p2align 4
   2105 L(more8byteseq):
   2106 	POP	(%esi)
   2107 	POP	(%edi)
   2108 	POP	(%ebx)
   2109 #endif
   2110 
   2111 L(eq):
   2112 
   2113 #ifdef USE_AS_STRNCMP
   2114 	POP	(%ebp)
   2115 #endif
   2116 	xorl	%eax, %eax
   2117 	ret
   2118 
   2119 #ifdef USE_AS_STRNCMP
   2120 	cfi_restore_state
   2121 
   2122 	.p2align 4
   2123 L(less16bytes_sncmp):
   2124 	test	%ebp, %ebp
   2125 	jz	L(eq)
   2126 
   2127 	movzbl	(%eax), %ecx
   2128 	cmpb	%cl, (%edx)
   2129 	jne	L(neq)
   2130 	test	%cl, %cl
   2131 	je	L(eq)
   2132 
   2133 	cmpl	$1, %ebp
   2134 	je	L(eq)
   2135 
   2136 	movzbl	1(%eax), %ecx
   2137 	cmpb	%cl, 1(%edx)
   2138 	jne	L(neq)
   2139 	test	%cl, %cl
   2140 	je	L(eq)
   2141 
   2142 	cmpl	$2, %ebp
   2143 	je	L(eq)
   2144 
   2145 	movzbl	2(%eax), %ecx
   2146 	cmpb	%cl, 2(%edx)
   2147 	jne	L(neq)
   2148 	test	%cl, %cl
   2149 	je	L(eq)
   2150 
   2151 	cmpl	$3, %ebp
   2152 	je	L(eq)
   2153 
   2154 	movzbl	3(%eax), %ecx
   2155 	cmpb	%cl, 3(%edx)
   2156 	jne	L(neq)
   2157 	test	%cl, %cl
   2158 	je	L(eq)
   2159 
   2160 	cmpl	$4, %ebp
   2161 	je	L(eq)
   2162 
   2163 	movzbl	4(%eax), %ecx
   2164 	cmpb	%cl, 4(%edx)
   2165 	jne	L(neq)
   2166 	test	%cl, %cl
   2167 	je	L(eq)
   2168 
   2169 	cmpl	$5, %ebp
   2170 	je	L(eq)
   2171 
   2172 	movzbl	5(%eax), %ecx
   2173 	cmpb	%cl, 5(%edx)
   2174 	jne	L(neq)
   2175 	test	%cl, %cl
   2176 	je	L(eq)
   2177 
   2178 	cmpl	$6, %ebp
   2179 	je	L(eq)
   2180 
   2181 	movzbl	6(%eax), %ecx
   2182 	cmpb	%cl, 6(%edx)
   2183 	jne	L(neq)
   2184 	test	%cl, %cl
   2185 	je	L(eq)
   2186 
   2187 	cmpl	$7, %ebp
   2188 	je	L(eq)
   2189 
   2190 	movzbl	7(%eax), %ecx
   2191 	cmpb	%cl, 7(%edx)
   2192 	jne	L(neq)
   2193 	test	%cl, %cl
   2194 	je	L(eq)
   2195 
   2196 
   2197 	cmpl	$8, %ebp
   2198 	je	L(eq)
   2199 
   2200 	movzbl	8(%eax), %ecx
   2201 	cmpb	%cl, 8(%edx)
   2202 	jne	L(neq)
   2203 	test	%cl, %cl
   2204 	je	L(eq)
   2205 
   2206 	cmpl	$9, %ebp
   2207 	je	L(eq)
   2208 
   2209 	movzbl	9(%eax), %ecx
   2210 	cmpb	%cl, 9(%edx)
   2211 	jne	L(neq)
   2212 	test	%cl, %cl
   2213 	je	L(eq)
   2214 
   2215 	cmpl	$10, %ebp
   2216 	je	L(eq)
   2217 
   2218 	movzbl	10(%eax), %ecx
   2219 	cmpb	%cl, 10(%edx)
   2220 	jne	L(neq)
   2221 	test	%cl, %cl
   2222 	je	L(eq)
   2223 
   2224 	cmpl	$11, %ebp
   2225 	je	L(eq)
   2226 
   2227 	movzbl	11(%eax), %ecx
   2228 	cmpb	%cl, 11(%edx)
   2229 	jne	L(neq)
   2230 	test	%cl, %cl
   2231 	je	L(eq)
   2232 
   2233 
   2234 	cmpl	$12, %ebp
   2235 	je	L(eq)
   2236 
   2237 	movzbl	12(%eax), %ecx
   2238 	cmpb	%cl, 12(%edx)
   2239 	jne	L(neq)
   2240 	test	%cl, %cl
   2241 	je	L(eq)
   2242 
   2243 	cmpl	$13, %ebp
   2244 	je	L(eq)
   2245 
   2246 	movzbl	13(%eax), %ecx
   2247 	cmpb	%cl, 13(%edx)
   2248 	jne	L(neq)
   2249 	test	%cl, %cl
   2250 	je	L(eq)
   2251 
   2252 	cmpl	$14, %ebp
   2253 	je	L(eq)
   2254 
   2255 	movzbl	14(%eax), %ecx
   2256 	cmpb	%cl, 14(%edx)
   2257 	jne	L(neq)
   2258 	test	%cl, %cl
   2259 	je	L(eq)
   2260 
   2261 	cmpl	$15, %ebp
   2262 	je	L(eq)
   2263 
   2264 	movzbl	15(%eax), %ecx
   2265 	cmpb	%cl, 15(%edx)
   2266 	jne	L(neq)
   2267 	test	%cl, %cl
   2268 	je	L(eq)
   2269 
   2270 	POP	(%ebp)
   2271 	xor	%eax, %eax
   2272 	ret
   2273 #endif
   2274 
   2275 END (STRCMP)
   2276