Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2010, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc			.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc			.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)		.cfi_restore (reg)
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef ENTRY
     56 # define ENTRY(name)			\
     57 	.type name,  @function; 	\
     58 	.globl name;			\
     59 	.p2align 4;			\
     60 name:					\
     61 	cfi_startproc
     62 #endif
     63 
     64 #ifndef END
     65 # define END(name)			\
     66 	cfi_endproc;			\
     67 	.size name, .-name
     68 #endif
     69 
     70 #define CFI_PUSH(REG)						\
     71   cfi_adjust_cfa_offset (4);					\
     72   cfi_rel_offset (REG, 0)
     73 
     74 #define CFI_POP(REG)						\
     75   cfi_adjust_cfa_offset (-4);					\
     76   cfi_restore (REG)
     77 
     78 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     79 #define POP(REG)	popl REG; CFI_POP (REG)
     80 
     81 #ifndef USE_AS_STRNCMP
     82 # define STR1		4
     83 # define STR2		STR1+4
     84 # define RETURN		ret
     85 
     86 # define UPDATE_STRNCMP_COUNTER
     87 #else
     88 # define STR1		8
     89 # define STR2		STR1+4
     90 # define CNT		STR2+4
     91 # define RETURN		POP (%ebp); ret; CFI_PUSH (%ebp)
     92 
     93 # define UPDATE_STRNCMP_COUNTER				\
     94 	/* calculate left number to compare */		\
     95 	mov	$16, %esi;				\
     96 	sub	%ecx, %esi;				\
     97 	cmp	%esi, %ebp;				\
     98 	jbe	L(more8byteseq);			\
     99 	sub	%esi, %ebp
    100 #endif
    101 
    102 	.section .text.ssse3,"ax",@progbits
    103 ENTRY (ssse3_strcmp_latest)
    104 #ifdef USE_AS_STRNCMP
    105 	PUSH	(%ebp)
    106 #endif
    107 	movl	STR1(%esp), %edx
    108 	movl	STR2(%esp), %eax
    109 #ifdef USE_AS_STRNCMP
    110 	movl	CNT(%esp), %ebp
    111 	cmp	$16, %ebp
    112 	jb	L(less16bytes_sncmp)
    113 	jmp	L(more16bytes)
    114 #endif
    115 
    116 	movzbl	(%eax), %ecx
    117 	cmpb	%cl, (%edx)
    118 	jne	L(neq)
    119 	cmpl	$0, %ecx
    120 	je	L(eq)
    121 
    122 	movzbl	1(%eax), %ecx
    123 	cmpb	%cl, 1(%edx)
    124 	jne	L(neq)
    125 	cmpl	$0, %ecx
    126 	je	L(eq)
    127 
    128 	movzbl	2(%eax), %ecx
    129 	cmpb	%cl, 2(%edx)
    130 	jne	L(neq)
    131 	cmpl	$0, %ecx
    132 	je	L(eq)
    133 
    134 	movzbl	3(%eax), %ecx
    135 	cmpb	%cl, 3(%edx)
    136 	jne	L(neq)
    137 	cmpl	$0, %ecx
    138 	je	L(eq)
    139 
    140 	movzbl	4(%eax), %ecx
    141 	cmpb	%cl, 4(%edx)
    142 	jne	L(neq)
    143 	cmpl	$0, %ecx
    144 	je	L(eq)
    145 
    146 	movzbl	5(%eax), %ecx
    147 	cmpb	%cl, 5(%edx)
    148 	jne	L(neq)
    149 	cmpl	$0, %ecx
    150 	je	L(eq)
    151 
    152 	movzbl	6(%eax), %ecx
    153 	cmpb	%cl, 6(%edx)
    154 	jne	L(neq)
    155 	cmpl	$0, %ecx
    156 	je	L(eq)
    157 
    158 	movzbl	7(%eax), %ecx
    159 	cmpb	%cl, 7(%edx)
    160 	jne	L(neq)
    161 	cmpl	$0, %ecx
    162 	je	L(eq)
    163 
    164 	add	$8, %edx
    165 	add	$8, %eax
    166 #ifdef USE_AS_STRNCMP
    167 	cmp	$8, %ebp
    168 	lea	-8(%ebp), %ebp
    169 	je	L(eq)
    170 L(more16bytes):
    171 #endif
    172 	movl	%edx, %ecx
    173 	and	$0xfff, %ecx
    174 	cmp	$0xff0, %ecx
    175 	ja	L(crosspage)
    176 	mov	%eax, %ecx
    177 	and	$0xfff, %ecx
    178 	cmp	$0xff0, %ecx
    179 	ja	L(crosspage)
    180 	pxor	%xmm0, %xmm0
    181 	movlpd	(%eax), %xmm1
    182 	movlpd	(%edx), %xmm2
    183 	movhpd	8(%eax), %xmm1
    184 	movhpd	8(%edx), %xmm2
    185 	pcmpeqb	%xmm1, %xmm0
    186 	pcmpeqb	%xmm2, %xmm1
    187 	psubb	%xmm0, %xmm1
    188 	pmovmskb %xmm1, %ecx
    189 	sub	$0xffff, %ecx
    190 	jnz	L(less16bytes)
    191 #ifdef USE_AS_STRNCMP
    192 	cmp	$16, %ebp
    193 	lea	-16(%ebp), %ebp
    194 	jbe	L(eq)
    195 #endif
    196 	add	$16, %eax
    197 	add	$16, %edx
    198 
    199 L(crosspage):
    200 
    201 	PUSH	(%ebx)
    202 	PUSH	(%edi)
    203 	PUSH	(%esi)
    204 
    205 	movl	%edx, %edi
    206 	movl	%eax, %ecx
    207 	and	$0xf, %ecx
    208 	and	$0xf, %edi
    209 	xor	%ecx, %eax
    210 	xor	%edi, %edx
    211 	xor	%ebx, %ebx
    212 	cmp	%edi, %ecx
    213 	je	L(ashr_0)
    214 	ja	L(bigger)
    215 	or	$0x20, %ebx
    216 	xchg	%edx, %eax
    217 	xchg	%ecx, %edi
    218 L(bigger):
    219 	lea	15(%edi), %edi
    220 	sub	%ecx, %edi
    221 	cmp	$8, %edi
    222 	jle	L(ashr_less_8)
    223 	cmp	$14, %edi
    224 	je	L(ashr_15)
    225 	cmp	$13, %edi
    226 	je	L(ashr_14)
    227 	cmp	$12, %edi
    228 	je	L(ashr_13)
    229 	cmp	$11, %edi
    230 	je	L(ashr_12)
    231 	cmp	$10, %edi
    232 	je	L(ashr_11)
    233 	cmp	$9, %edi
    234 	je	L(ashr_10)
    235 L(ashr_less_8):
    236 	je	L(ashr_9)
    237 	cmp	$7, %edi
    238 	je	L(ashr_8)
    239 	cmp	$6, %edi
    240 	je	L(ashr_7)
    241 	cmp	$5, %edi
    242 	je	L(ashr_6)
    243 	cmp	$4, %edi
    244 	je	L(ashr_5)
    245 	cmp	$3, %edi
    246 	je	L(ashr_4)
    247 	cmp	$2, %edi
    248 	je	L(ashr_3)
    249 	cmp	$1, %edi
    250 	je	L(ashr_2)
    251 	cmp	$0, %edi
    252 	je	L(ashr_1)
    253 
    254 /*
    255  * The following cases will be handled by ashr_0
    256  *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case
    257  *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
    258  */
    259 	.p2align 4
    260 L(ashr_0):
    261 	mov	$0xffff, %esi
    262 	movdqa	(%eax), %xmm1
    263 	pxor	%xmm0, %xmm0
    264 	pcmpeqb	%xmm1, %xmm0
    265 	pcmpeqb	(%edx), %xmm1
    266 	psubb	%xmm0, %xmm1
    267 	pmovmskb %xmm1, %edi
    268 	shr	%cl, %esi
    269 	shr	%cl, %edi
    270 	sub	%edi, %esi
    271 	mov	%ecx, %edi
    272 	jne	L(less32bytes)
    273 	UPDATE_STRNCMP_COUNTER
    274 	mov	$0x10, %ebx
    275 	mov	$0x10, %ecx
    276 	pxor	%xmm0, %xmm0
    277 	.p2align 4
    278 L(loop_ashr_0):
    279 	movdqa	(%eax, %ecx), %xmm1
    280 	movdqa	(%edx, %ecx), %xmm2
    281 
    282 	pcmpeqb	%xmm1, %xmm0
    283 	pcmpeqb	%xmm2, %xmm1
    284 	psubb	%xmm0, %xmm1
    285 	pmovmskb %xmm1, %esi
    286 	sub	$0xffff, %esi
    287 	jnz	L(exit)
    288 #ifdef USE_AS_STRNCMP
    289 	cmp	$16, %ebp
    290 	lea	-16(%ebp), %ebp
    291 	jbe	L(more8byteseq)
    292 #endif
    293 	add	$16, %ecx
    294 	jmp	L(loop_ashr_0)
    295 
    296 /*
    297  * The following cases will be handled by ashr_1
    298  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    299  *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
    300  */
    301 	.p2align 4
    302 L(ashr_1):
    303 	mov	$0xffff, %esi
    304 	pxor	%xmm0, %xmm0
    305 	movdqa	(%edx), %xmm2
    306 	movdqa	(%eax), %xmm1
    307 	pcmpeqb	%xmm1, %xmm0
    308 	pslldq	$15, %xmm2
    309 	pcmpeqb	%xmm1, %xmm2
    310 	psubb	%xmm0, %xmm2
    311 	pmovmskb %xmm2, %edi
    312 	shr	%cl, %esi
    313 	shr	%cl, %edi
    314 	sub	%edi, %esi
    315 	lea	-15(%ecx), %edi
    316 	jnz	L(less32bytes)
    317 
    318 	UPDATE_STRNCMP_COUNTER
    319 
    320 	movdqa	(%edx), %xmm3
    321 	pxor	%xmm0, %xmm0
    322 	mov	$16, %ecx
    323 	or	$1, %ebx
    324 	lea	1(%edx), %edi
    325 	and	$0xfff, %edi
    326 	sub	$0x1000, %edi
    327 
    328 	.p2align 4
    329 L(loop_ashr_1):
    330 	add	$16, %edi
    331 	jg	L(nibble_ashr_1)
    332 
    333 L(gobble_ashr_1):
    334 	movdqa	(%eax, %ecx), %xmm1
    335 	movdqa	(%edx, %ecx), %xmm2
    336 	movdqa	%xmm2, %xmm4
    337 
    338 	palignr	$1, %xmm3, %xmm2
    339 
    340 	pcmpeqb	%xmm1, %xmm0
    341 	pcmpeqb	%xmm2, %xmm1
    342 	psubb	%xmm0, %xmm1
    343 	pmovmskb %xmm1, %esi
    344 	sub	$0xffff, %esi
    345 	jnz	L(exit)
    346 #ifdef USE_AS_STRNCMP
    347 	cmp	$16, %ebp
    348 	lea	-16(%ebp), %ebp
    349 	jbe	L(more8byteseq)
    350 #endif
    351 
    352 	add	$16, %ecx
    353 	movdqa	%xmm4, %xmm3
    354 
    355 	add	$16, %edi
    356 	jg	L(nibble_ashr_1)
    357 
    358 	movdqa	(%eax, %ecx), %xmm1
    359 	movdqa	(%edx, %ecx), %xmm2
    360 	movdqa	%xmm2, %xmm4
    361 
    362 	palignr	$1, %xmm3, %xmm2
    363 
    364 	pcmpeqb	%xmm1, %xmm0
    365 	pcmpeqb	%xmm2, %xmm1
    366 	psubb	%xmm0, %xmm1
    367 	pmovmskb %xmm1, %esi
    368 	sub	$0xffff, %esi
    369 	jnz	L(exit)
    370 
    371 #ifdef USE_AS_STRNCMP
    372 	cmp	$16, %ebp
    373 	lea	-16(%ebp), %ebp
    374 	jbe	L(more8byteseq)
    375 #endif
    376 	add	$16, %ecx
    377 	movdqa	%xmm4, %xmm3
    378 	jmp	L(loop_ashr_1)
    379 
    380 	.p2align 4
    381 L(nibble_ashr_1):
    382 	pcmpeqb	%xmm3, %xmm0
    383 	pmovmskb %xmm0, %esi
    384 	test	$0xfffe, %esi
    385 	jnz	L(ashr_1_exittail)
    386 
    387 #ifdef USE_AS_STRNCMP
    388 	cmp	$15, %ebp
    389 	jbe	L(ashr_1_exittail)
    390 #endif
    391 	pxor	%xmm0, %xmm0
    392 	sub	$0x1000, %edi
    393 	jmp	L(gobble_ashr_1)
    394 
    395 	.p2align 4
    396 L(ashr_1_exittail):
    397 	movdqa	(%eax, %ecx), %xmm1
    398 	psrldq	$1, %xmm0
    399 	psrldq	$1, %xmm3
    400 	jmp	L(aftertail)
    401 
    402 /*
    403  * The following cases will be handled by ashr_2
    404  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    405  *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2
    406  */
    407 	.p2align 4
    408 L(ashr_2):
    409 	mov	$0xffff, %esi
    410 	pxor	%xmm0, %xmm0
    411 	movdqa	(%edx), %xmm2
    412 	movdqa	(%eax), %xmm1
    413 	pcmpeqb	%xmm1, %xmm0
    414 	pslldq	$14, %xmm2
    415 	pcmpeqb	%xmm1, %xmm2
    416 	psubb	%xmm0, %xmm2
    417 	pmovmskb %xmm2, %edi
    418 	shr	%cl, %esi
    419 	shr	%cl, %edi
    420 	sub	%edi, %esi
    421 	lea	-14(%ecx), %edi
    422 	jnz	L(less32bytes)
    423 
    424 	UPDATE_STRNCMP_COUNTER
    425 
    426 	movdqa	(%edx), %xmm3
    427 	pxor	%xmm0, %xmm0
    428 	mov	$16, %ecx
    429 	or	$2, %ebx
    430 	lea	2(%edx), %edi
    431 	and	$0xfff, %edi
    432 	sub	$0x1000, %edi
    433 
    434 	.p2align 4
    435 L(loop_ashr_2):
    436 	add	$16, %edi
    437 	jg	L(nibble_ashr_2)
    438 
    439 L(gobble_ashr_2):
    440 	movdqa	(%eax, %ecx), %xmm1
    441 	movdqa	(%edx, %ecx), %xmm2
    442 	movdqa	%xmm2, %xmm4
    443 
    444 	palignr	$2, %xmm3, %xmm2
    445 
    446 	pcmpeqb	%xmm1, %xmm0
    447 	pcmpeqb	%xmm2, %xmm1
    448 	psubb	%xmm0, %xmm1
    449 	pmovmskb %xmm1, %esi
    450 	sub	$0xffff, %esi
    451 	jnz	L(exit)
    452 
    453 #ifdef USE_AS_STRNCMP
    454 	cmp	$16, %ebp
    455 	lea	-16(%ebp), %ebp
    456 	jbe	L(more8byteseq)
    457 #endif
    458 	add	$16, %ecx
    459 	movdqa	%xmm4, %xmm3
    460 
    461 	add	$16, %edi
    462 	jg	L(nibble_ashr_2)
    463 
    464 	movdqa	(%eax, %ecx), %xmm1
    465 	movdqa	(%edx, %ecx), %xmm2
    466 	movdqa	%xmm2, %xmm4
    467 
    468 	palignr	$2, %xmm3, %xmm2
    469 
    470 	pcmpeqb	%xmm1, %xmm0
    471 	pcmpeqb	%xmm2, %xmm1
    472 	psubb	%xmm0, %xmm1
    473 	pmovmskb %xmm1, %esi
    474 	sub	$0xffff, %esi
    475 	jnz	L(exit)
    476 
    477 #ifdef USE_AS_STRNCMP
    478 	cmp	$16, %ebp
    479 	lea	-16(%ebp), %ebp
    480 	jbe	L(more8byteseq)
    481 #endif
    482 	add	$16, %ecx
    483 	movdqa	%xmm4, %xmm3
    484 	jmp	L(loop_ashr_2)
    485 
    486 	.p2align 4
    487 L(nibble_ashr_2):
    488 	pcmpeqb	%xmm3, %xmm0
    489 	pmovmskb %xmm0, %esi
    490 	test	$0xfffc, %esi
    491 	jnz	L(ashr_2_exittail)
    492 
    493 #ifdef USE_AS_STRNCMP
    494 	cmp	$14, %ebp
    495 	jbe	L(ashr_2_exittail)
    496 #endif
    497 
    498 	pxor	%xmm0, %xmm0
    499 	sub	$0x1000, %edi
    500 	jmp	L(gobble_ashr_2)
    501 
    502 	.p2align 4
    503 L(ashr_2_exittail):
    504 	movdqa	(%eax, %ecx), %xmm1
    505 	psrldq	$2, %xmm0
    506 	psrldq	$2, %xmm3
    507 	jmp	L(aftertail)
    508 
    509 /*
    510  * The following cases will be handled by ashr_3
    511  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    512  *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3
    513  */
    514 	.p2align 4
    515 L(ashr_3):
    516 	mov	$0xffff, %esi
    517 	pxor	%xmm0, %xmm0
    518 	movdqa	(%edx), %xmm2
    519 	movdqa	(%eax), %xmm1
    520 	pcmpeqb	%xmm1, %xmm0
    521 	pslldq	$13, %xmm2
    522 	pcmpeqb	%xmm1, %xmm2
    523 	psubb	%xmm0, %xmm2
    524 	pmovmskb %xmm2, %edi
    525 	shr	%cl, %esi
    526 	shr	%cl, %edi
    527 	sub	%edi, %esi
    528 	lea	-13(%ecx), %edi
    529 	jnz	L(less32bytes)
    530 
    531 	UPDATE_STRNCMP_COUNTER
    532 
    533 	movdqa	(%edx), %xmm3
    534 	pxor	%xmm0, %xmm0
    535 	mov	$16, %ecx
    536 	or	$3, %ebx
    537 	lea	3(%edx), %edi
    538 	and	$0xfff, %edi
    539 	sub	$0x1000, %edi
    540 
    541 	.p2align 4
    542 L(loop_ashr_3):
    543 	add	$16, %edi
    544 	jg	L(nibble_ashr_3)
    545 
    546 L(gobble_ashr_3):
    547 	movdqa	(%eax, %ecx), %xmm1
    548 	movdqa	(%edx, %ecx), %xmm2
    549 	movdqa	%xmm2, %xmm4
    550 
    551 	palignr	$3, %xmm3, %xmm2
    552 
    553 	pcmpeqb	%xmm1, %xmm0
    554 	pcmpeqb	%xmm2, %xmm1
    555 	psubb	%xmm0, %xmm1
    556 	pmovmskb %xmm1, %esi
    557 	sub	$0xffff, %esi
    558 	jnz	L(exit)
    559 
    560 #ifdef USE_AS_STRNCMP
    561 	cmp	$16, %ebp
    562 	lea	-16(%ebp), %ebp
    563 	jbe	L(more8byteseq)
    564 #endif
    565 	add	$16, %ecx
    566 	movdqa	%xmm4, %xmm3
    567 
    568 	add	$16, %edi
    569 	jg	L(nibble_ashr_3)
    570 
    571 	movdqa	(%eax, %ecx), %xmm1
    572 	movdqa	(%edx, %ecx), %xmm2
    573 	movdqa	%xmm2, %xmm4
    574 
    575 	palignr	$3, %xmm3, %xmm2
    576 
    577 	pcmpeqb	%xmm1, %xmm0
    578 	pcmpeqb	%xmm2, %xmm1
    579 	psubb	%xmm0, %xmm1
    580 	pmovmskb %xmm1, %esi
    581 	sub	$0xffff, %esi
    582 	jnz	L(exit)
    583 
    584 #ifdef USE_AS_STRNCMP
    585 	cmp	$16, %ebp
    586 	lea	-16(%ebp), %ebp
    587 	jbe	L(more8byteseq)
    588 #endif
    589 	add	$16, %ecx
    590 	movdqa	%xmm4, %xmm3
    591 	jmp	L(loop_ashr_3)
    592 
    593 	.p2align 4
    594 L(nibble_ashr_3):
    595 	pcmpeqb	%xmm3, %xmm0
    596 	pmovmskb %xmm0, %esi
    597 	test	$0xfff8, %esi
    598 	jnz	L(ashr_3_exittail)
    599 
    600 #ifdef USE_AS_STRNCMP
    601 	cmp	$13, %ebp
    602 	jbe	L(ashr_3_exittail)
    603 #endif
    604 	pxor	%xmm0, %xmm0
    605 	sub	$0x1000, %edi
    606 	jmp	L(gobble_ashr_3)
    607 
    608 	.p2align 4
    609 L(ashr_3_exittail):
    610 	movdqa	(%eax, %ecx), %xmm1
    611 	psrldq	$3, %xmm0
    612 	psrldq	$3, %xmm3
    613 	jmp	L(aftertail)
    614 
    615 /*
    616  * The following cases will be handled by ashr_4
    617  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    618  *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4
    619  */
    620 	.p2align 4
    621 L(ashr_4):
    622 	mov	$0xffff, %esi
    623 	pxor	%xmm0, %xmm0
    624 	movdqa	(%edx), %xmm2
    625 	movdqa	(%eax), %xmm1
    626 	pcmpeqb	%xmm1, %xmm0
    627 	pslldq	$12, %xmm2
    628 	pcmpeqb	%xmm1, %xmm2
    629 	psubb	%xmm0, %xmm2
    630 	pmovmskb %xmm2, %edi
    631 	shr	%cl, %esi
    632 	shr	%cl, %edi
    633 	sub	%edi, %esi
    634 	lea	-12(%ecx), %edi
    635 	jnz	L(less32bytes)
    636 
    637 	UPDATE_STRNCMP_COUNTER
    638 
    639 	movdqa	(%edx), %xmm3
    640 	pxor	%xmm0, %xmm0
    641 	mov	$16, %ecx
    642 	or	$4, %ebx
    643 	lea	4(%edx), %edi
    644 	and	$0xfff, %edi
    645 	sub	$0x1000, %edi
    646 
    647 	.p2align 4
    648 L(loop_ashr_4):
    649 	add	$16, %edi
    650 	jg	L(nibble_ashr_4)
    651 
    652 L(gobble_ashr_4):
    653 	movdqa	(%eax, %ecx), %xmm1
    654 	movdqa	(%edx, %ecx), %xmm2
    655 	movdqa	%xmm2, %xmm4
    656 
    657 	palignr	$4, %xmm3, %xmm2
    658 
    659 	pcmpeqb	%xmm1, %xmm0
    660 	pcmpeqb	%xmm2, %xmm1
    661 	psubb	%xmm0, %xmm1
    662 	pmovmskb %xmm1, %esi
    663 	sub	$0xffff, %esi
    664 	jnz	L(exit)
    665 
    666 #ifdef USE_AS_STRNCMP
    667 	cmp	$16, %ebp
    668 	lea	-16(%ebp), %ebp
    669 	jbe	L(more8byteseq)
    670 #endif
    671 
    672 	add	$16, %ecx
    673 	movdqa	%xmm4, %xmm3
    674 
    675 	add	$16, %edi
    676 	jg	L(nibble_ashr_4)
    677 
    678 	movdqa	(%eax, %ecx), %xmm1
    679 	movdqa	(%edx, %ecx), %xmm2
    680 	movdqa	%xmm2, %xmm4
    681 
    682 	palignr	$4, %xmm3, %xmm2
    683 
    684 	pcmpeqb	%xmm1, %xmm0
    685 	pcmpeqb	%xmm2, %xmm1
    686 	psubb	%xmm0, %xmm1
    687 	pmovmskb %xmm1, %esi
    688 	sub	$0xffff, %esi
    689 	jnz	L(exit)
    690 
    691 #ifdef USE_AS_STRNCMP
    692 	cmp	$16, %ebp
    693 	lea	-16(%ebp), %ebp
    694 	jbe	L(more8byteseq)
    695 #endif
    696 
    697 	add	$16, %ecx
    698 	movdqa	%xmm4, %xmm3
    699 	jmp	L(loop_ashr_4)
    700 
    701 	.p2align 4
    702 L(nibble_ashr_4):
    703 	pcmpeqb	%xmm3, %xmm0
    704 	pmovmskb %xmm0, %esi
    705 	test	$0xfff0, %esi
    706 	jnz	L(ashr_4_exittail)
    707 
    708 #ifdef USE_AS_STRNCMP
    709 	cmp	$12, %ebp
    710 	jbe	L(ashr_4_exittail)
    711 #endif
    712 
    713 	pxor	%xmm0, %xmm0
    714 	sub	$0x1000, %edi
    715 	jmp	L(gobble_ashr_4)
    716 
    717 	.p2align 4
    718 L(ashr_4_exittail):
    719 	movdqa	(%eax, %ecx), %xmm1
    720 	psrldq	$4, %xmm0
    721 	psrldq	$4, %xmm3
    722 	jmp	L(aftertail)
    723 
    724 /*
    725  * The following cases will be handled by ashr_5
    726  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    727  *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5
    728  */
    729 	.p2align 4
    730 L(ashr_5):
    731 	mov	$0xffff, %esi
    732 	pxor	%xmm0, %xmm0
    733 	movdqa	(%edx), %xmm2
    734 	movdqa	(%eax), %xmm1
    735 	pcmpeqb	%xmm1, %xmm0
    736 	pslldq	$11, %xmm2
    737 	pcmpeqb	%xmm1, %xmm2
    738 	psubb	%xmm0, %xmm2
    739 	pmovmskb %xmm2, %edi
    740 	shr	%cl, %esi
    741 	shr	%cl, %edi
    742 	sub	%edi, %esi
    743 	lea	-11(%ecx), %edi
    744 	jnz	L(less32bytes)
    745 
    746 	UPDATE_STRNCMP_COUNTER
    747 
    748 	movdqa	(%edx), %xmm3
    749 	pxor	%xmm0, %xmm0
    750 	mov	$16, %ecx
    751 	or	$5, %ebx
    752 	lea	5(%edx), %edi
    753 	and	$0xfff, %edi
    754 	sub	$0x1000, %edi
    755 
    756 	.p2align 4
    757 L(loop_ashr_5):
    758 	add	$16, %edi
    759 	jg	L(nibble_ashr_5)
    760 
    761 L(gobble_ashr_5):
    762 	movdqa	(%eax, %ecx), %xmm1
    763 	movdqa	(%edx, %ecx), %xmm2
    764 	movdqa	%xmm2, %xmm4
    765 
    766 	palignr	$5, %xmm3, %xmm2
    767 
    768 	pcmpeqb	%xmm1, %xmm0
    769 	pcmpeqb	%xmm2, %xmm1
    770 	psubb	%xmm0, %xmm1
    771 	pmovmskb %xmm1, %esi
    772 	sub	$0xffff, %esi
    773 	jnz	L(exit)
    774 
    775 #ifdef USE_AS_STRNCMP
    776 	cmp	$16, %ebp
    777 	lea	-16(%ebp), %ebp
    778 	jbe	L(more8byteseq)
    779 #endif
    780 	add	$16, %ecx
    781 	movdqa	%xmm4, %xmm3
    782 
    783 	add	$16, %edi
    784 	jg	L(nibble_ashr_5)
    785 
    786 	movdqa	(%eax, %ecx), %xmm1
    787 	movdqa	(%edx, %ecx), %xmm2
    788 	movdqa	%xmm2, %xmm4
    789 
    790 	palignr	$5, %xmm3, %xmm2
    791 
    792 	pcmpeqb	%xmm1, %xmm0
    793 	pcmpeqb	%xmm2, %xmm1
    794 	psubb	%xmm0, %xmm1
    795 	pmovmskb %xmm1, %esi
    796 	sub	$0xffff, %esi
    797 	jnz	L(exit)
    798 
    799 #ifdef USE_AS_STRNCMP
    800 	cmp	$16, %ebp
    801 	lea	-16(%ebp), %ebp
    802 	jbe	L(more8byteseq)
    803 #endif
    804 	add	$16, %ecx
    805 	movdqa	%xmm4, %xmm3
    806 	jmp	L(loop_ashr_5)
    807 
    808 	.p2align 4
    809 L(nibble_ashr_5):
    810 	pcmpeqb	%xmm3, %xmm0
    811 	pmovmskb %xmm0, %esi
    812 	test	$0xffe0, %esi
    813 	jnz	L(ashr_5_exittail)
    814 
    815 #ifdef USE_AS_STRNCMP
    816 	cmp	$11, %ebp
    817 	jbe	L(ashr_5_exittail)
    818 #endif
    819 	pxor	%xmm0, %xmm0
    820 	sub	$0x1000, %edi
    821 	jmp	L(gobble_ashr_5)
    822 
    823 	.p2align 4
    824 L(ashr_5_exittail):
    825 	movdqa	(%eax, %ecx), %xmm1
    826 	psrldq	$5, %xmm0
    827 	psrldq	$5, %xmm3
    828 	jmp	L(aftertail)
    829 
    830 /*
    831  * The following cases will be handled by ashr_6
    832  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    833  *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6
    834  */
    835 
    836 	.p2align 4
    837 L(ashr_6):
    838 	mov	$0xffff, %esi
    839 	pxor	%xmm0, %xmm0
    840 	movdqa	(%edx), %xmm2
    841 	movdqa	(%eax), %xmm1
    842 	pcmpeqb	%xmm1, %xmm0
    843 	pslldq	$10, %xmm2
    844 	pcmpeqb	%xmm1, %xmm2
    845 	psubb	%xmm0, %xmm2
    846 	pmovmskb %xmm2, %edi
    847 	shr	%cl, %esi
    848 	shr	%cl, %edi
    849 	sub	%edi, %esi
    850 	lea	-10(%ecx), %edi
    851 	jnz	L(less32bytes)
    852 
    853 	UPDATE_STRNCMP_COUNTER
    854 
    855 	movdqa	(%edx), %xmm3
    856 	pxor	%xmm0, %xmm0
    857 	mov	$16, %ecx
    858 	or	$6, %ebx
    859 	lea	6(%edx), %edi
    860 	and	$0xfff, %edi
    861 	sub	$0x1000, %edi
    862 
    863 	.p2align 4
    864 L(loop_ashr_6):
    865 	add	$16, %edi
    866 	jg	L(nibble_ashr_6)
    867 
    868 L(gobble_ashr_6):
    869 	movdqa	(%eax, %ecx), %xmm1
    870 	movdqa	(%edx, %ecx), %xmm2
    871 	movdqa	%xmm2, %xmm4
    872 
    873 	palignr	$6, %xmm3, %xmm2
    874 
    875 	pcmpeqb	%xmm1, %xmm0
    876 	pcmpeqb	%xmm2, %xmm1
    877 	psubb	%xmm0, %xmm1
    878 	pmovmskb %xmm1, %esi
    879 	sub	$0xffff, %esi
    880 	jnz	L(exit)
    881 
    882 #ifdef USE_AS_STRNCMP
    883 	cmp	$16, %ebp
    884 	lea	-16(%ebp), %ebp
    885 	jbe	L(more8byteseq)
    886 #endif
    887 
    888 	add	$16, %ecx
    889 	movdqa	%xmm4, %xmm3
    890 
    891 	add	$16, %edi
    892 	jg	L(nibble_ashr_6)
    893 
    894 	movdqa	(%eax, %ecx), %xmm1
    895 	movdqa	(%edx, %ecx), %xmm2
    896 	movdqa	%xmm2, %xmm4
    897 
    898 	palignr	$6, %xmm3, %xmm2
    899 
    900 	pcmpeqb	%xmm1, %xmm0
    901 	pcmpeqb	%xmm2, %xmm1
    902 	psubb	%xmm0, %xmm1
    903 	pmovmskb %xmm1, %esi
    904 	sub	$0xffff, %esi
    905 	jnz	L(exit)
    906 #ifdef USE_AS_STRNCMP
    907 	cmp	$16, %ebp
    908 	lea	-16(%ebp), %ebp
    909 	jbe	L(more8byteseq)
    910 #endif
    911 
    912 	add	$16, %ecx
    913 	movdqa	%xmm4, %xmm3
    914 	jmp	L(loop_ashr_6)
    915 
    916 	.p2align 4
    917 L(nibble_ashr_6):
    918 	pcmpeqb	%xmm3, %xmm0
    919 	pmovmskb %xmm0, %esi
    920 	test	$0xffc0, %esi
    921 	jnz	L(ashr_6_exittail)
    922 
    923 #ifdef USE_AS_STRNCMP
    924 	cmp	$10, %ebp
    925 	jbe	L(ashr_6_exittail)
    926 #endif
    927 	pxor	%xmm0, %xmm0
    928 	sub	$0x1000, %edi
    929 	jmp	L(gobble_ashr_6)
    930 
    931 	.p2align 4
    932 L(ashr_6_exittail):
    933 	movdqa	(%eax, %ecx), %xmm1
    934 	psrldq	$6, %xmm0
    935 	psrldq	$6, %xmm3
    936 	jmp	L(aftertail)
    937 
    938 /*
    939  * The following cases will be handled by ashr_7
    940  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
    941  *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7
    942  */
    943 
    944 	.p2align 4
    945 L(ashr_7):
    946 	mov	$0xffff, %esi
    947 	pxor	%xmm0, %xmm0
    948 	movdqa	(%edx), %xmm2
    949 	movdqa	(%eax), %xmm1
    950 	pcmpeqb	%xmm1, %xmm0
    951 	pslldq	$9, %xmm2
    952 	pcmpeqb	%xmm1, %xmm2
    953 	psubb	%xmm0, %xmm2
    954 	pmovmskb %xmm2, %edi
    955 	shr	%cl, %esi
    956 	shr	%cl, %edi
    957 	sub	%edi, %esi
    958 	lea	-9(%ecx), %edi
    959 	jnz	L(less32bytes)
    960 
    961 	UPDATE_STRNCMP_COUNTER
    962 
    963 	movdqa	(%edx), %xmm3
    964 	pxor	%xmm0, %xmm0
    965 	mov	$16, %ecx
    966 	or	$7, %ebx
    967 	lea	8(%edx), %edi
    968 	and	$0xfff, %edi
    969 	sub	$0x1000, %edi
    970 
    971 	.p2align 4
    972 L(loop_ashr_7):
    973 	add	$16, %edi
    974 	jg	L(nibble_ashr_7)
    975 
    976 L(gobble_ashr_7):
    977 	movdqa	(%eax, %ecx), %xmm1
    978 	movdqa	(%edx, %ecx), %xmm2
    979 	movdqa	%xmm2, %xmm4
    980 
    981 	palignr	$7, %xmm3, %xmm2
    982 
    983 	pcmpeqb	%xmm1, %xmm0
    984 	pcmpeqb	%xmm2, %xmm1
    985 	psubb	%xmm0, %xmm1
    986 	pmovmskb %xmm1, %esi
    987 	sub	$0xffff, %esi
    988 	jnz	L(exit)
    989 
    990 #ifdef USE_AS_STRNCMP
    991 	cmp	$16, %ebp
    992 	lea	-16(%ebp), %ebp
    993 	jbe	L(more8byteseq)
    994 #endif
    995 
    996 	add	$16, %ecx
    997 	movdqa	%xmm4, %xmm3
    998 
    999 	add	$16, %edi
   1000 	jg	L(nibble_ashr_7)
   1001 
   1002 	movdqa	(%eax, %ecx), %xmm1
   1003 	movdqa	(%edx, %ecx), %xmm2
   1004 	movdqa	%xmm2, %xmm4
   1005 
   1006 	palignr	$7, %xmm3, %xmm2
   1007 
   1008 	pcmpeqb	%xmm1, %xmm0
   1009 	pcmpeqb	%xmm2, %xmm1
   1010 	psubb	%xmm0, %xmm1
   1011 	pmovmskb %xmm1, %esi
   1012 	sub	$0xffff, %esi
   1013 	jnz	L(exit)
   1014 
   1015 #ifdef USE_AS_STRNCMP
   1016 	cmp	$16, %ebp
   1017 	lea	-16(%ebp), %ebp
   1018 	jbe	L(more8byteseq)
   1019 #endif
   1020 
   1021 	add	$16, %ecx
   1022 	movdqa	%xmm4, %xmm3
   1023 	jmp	L(loop_ashr_7)
   1024 
   1025 	.p2align 4
   1026 L(nibble_ashr_7):
   1027 	pcmpeqb	%xmm3, %xmm0
   1028 	pmovmskb %xmm0, %esi
   1029 	test	$0xff80, %esi
   1030 	jnz	L(ashr_7_exittail)
   1031 
   1032 #ifdef USE_AS_STRNCMP
   1033 	cmp	$9, %ebp
   1034 	jbe	L(ashr_7_exittail)
   1035 #endif
   1036 	pxor	%xmm0, %xmm0
   1037 	pxor	%xmm0, %xmm0
   1038 	sub	$0x1000, %edi
   1039 	jmp	L(gobble_ashr_7)
   1040 
   1041 	.p2align 4
   1042 L(ashr_7_exittail):
   1043 	movdqa	(%eax, %ecx), %xmm1
   1044 	psrldq	$7, %xmm0
   1045 	psrldq	$7, %xmm3
   1046 	jmp	L(aftertail)
   1047 
   1048 /*
   1049  * The following cases will be handled by ashr_8
   1050  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1051  *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8
   1052  */
   1053 	.p2align 4
   1054 L(ashr_8):
   1055 	mov	$0xffff, %esi
   1056 	pxor	%xmm0, %xmm0
   1057 	movdqa	(%edx), %xmm2
   1058 	movdqa	(%eax), %xmm1
   1059 	pcmpeqb	%xmm1, %xmm0
   1060 	pslldq	$8, %xmm2
   1061 	pcmpeqb	%xmm1, %xmm2
   1062 	psubb	%xmm0, %xmm2
   1063 	pmovmskb %xmm2, %edi
   1064 	shr	%cl, %esi
   1065 	shr	%cl, %edi
   1066 	sub	%edi, %esi
   1067 	lea	-8(%ecx), %edi
   1068 	jnz	L(less32bytes)
   1069 
   1070 	UPDATE_STRNCMP_COUNTER
   1071 
   1072 	movdqa	(%edx), %xmm3
   1073 	pxor	%xmm0, %xmm0
   1074 	mov	$16, %ecx
   1075 	or	$8, %ebx
   1076 	lea	8(%edx), %edi
   1077 	and	$0xfff, %edi
   1078 	sub	$0x1000, %edi
   1079 
   1080 	.p2align 4
   1081 L(loop_ashr_8):
   1082 	add	$16, %edi
   1083 	jg	L(nibble_ashr_8)
   1084 
   1085 L(gobble_ashr_8):
   1086 	movdqa	(%eax, %ecx), %xmm1
   1087 	movdqa	(%edx, %ecx), %xmm2
   1088 	movdqa	%xmm2, %xmm4
   1089 
   1090 	palignr	$8, %xmm3, %xmm2
   1091 
   1092 	pcmpeqb	%xmm1, %xmm0
   1093 	pcmpeqb	%xmm2, %xmm1
   1094 	psubb	%xmm0, %xmm1
   1095 	pmovmskb %xmm1, %esi
   1096 	sub	$0xffff, %esi
   1097 	jnz	L(exit)
   1098 
   1099 #ifdef USE_AS_STRNCMP
   1100 	cmp	$16, %ebp
   1101 	lea	-16(%ebp), %ebp
   1102 	jbe	L(more8byteseq)
   1103 #endif
   1104 	add	$16, %ecx
   1105 	movdqa	%xmm4, %xmm3
   1106 
   1107 	add	$16, %edi
   1108 	jg	L(nibble_ashr_8)
   1109 
   1110 	movdqa	(%eax, %ecx), %xmm1
   1111 	movdqa	(%edx, %ecx), %xmm2
   1112 	movdqa	%xmm2, %xmm4
   1113 
   1114 	palignr	$8, %xmm3, %xmm2
   1115 
   1116 	pcmpeqb	%xmm1, %xmm0
   1117 	pcmpeqb	%xmm2, %xmm1
   1118 	psubb	%xmm0, %xmm1
   1119 	pmovmskb %xmm1, %esi
   1120 	sub	$0xffff, %esi
   1121 	jnz	L(exit)
   1122 
   1123 #ifdef USE_AS_STRNCMP
   1124 	cmp	$16, %ebp
   1125 	lea	-16(%ebp), %ebp
   1126 	jbe	L(more8byteseq)
   1127 #endif
   1128 	add	$16, %ecx
   1129 	movdqa	%xmm4, %xmm3
   1130 	jmp	L(loop_ashr_8)
   1131 
   1132 	.p2align 4
   1133 L(nibble_ashr_8):
   1134 	pcmpeqb	%xmm3, %xmm0
   1135 	pmovmskb %xmm0, %esi
   1136 	test	$0xff00, %esi
   1137 	jnz	L(ashr_8_exittail)
   1138 
   1139 #ifdef USE_AS_STRNCMP
   1140 	cmp	$8, %ebp
   1141 	jbe	L(ashr_8_exittail)
   1142 #endif
   1143 	pxor	%xmm0, %xmm0
   1144 	pxor	%xmm0, %xmm0
   1145 	sub	$0x1000, %edi
   1146 	jmp	L(gobble_ashr_8)
   1147 
   1148 	.p2align 4
   1149 L(ashr_8_exittail):
   1150 	movdqa	(%eax, %ecx), %xmm1
   1151 	psrldq	$8, %xmm0
   1152 	psrldq	$8, %xmm3
   1153 	jmp	L(aftertail)
   1154 
   1155 /*
   1156  * The following cases will be handled by ashr_9
   1157  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1158  *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9
   1159  */
   1160 	.p2align 4
   1161 L(ashr_9):
   1162 	mov	$0xffff, %esi
   1163 	pxor	%xmm0, %xmm0
   1164 	movdqa	(%edx), %xmm2
   1165 	movdqa	(%eax), %xmm1
   1166 	pcmpeqb	%xmm1, %xmm0
   1167 	pslldq	$7, %xmm2
   1168 	pcmpeqb	%xmm1, %xmm2
   1169 	psubb	%xmm0, %xmm2
   1170 	pmovmskb %xmm2, %edi
   1171 	shr	%cl, %esi
   1172 	shr	%cl, %edi
   1173 	sub	%edi, %esi
   1174 	lea	-7(%ecx), %edi
   1175 	jnz	L(less32bytes)
   1176 
   1177 	UPDATE_STRNCMP_COUNTER
   1178 
   1179 	movdqa	(%edx), %xmm3
   1180 	pxor	%xmm0, %xmm0
   1181 	mov	$16, %ecx
   1182 	or	$9, %ebx
   1183 	lea	9(%edx), %edi
   1184 	and	$0xfff, %edi
   1185 	sub	$0x1000, %edi
   1186 
   1187 	.p2align 4
   1188 L(loop_ashr_9):
   1189 	add	$16, %edi
   1190 	jg	L(nibble_ashr_9)
   1191 
   1192 L(gobble_ashr_9):
   1193 	movdqa	(%eax, %ecx), %xmm1
   1194 	movdqa	(%edx, %ecx), %xmm2
   1195 	movdqa	%xmm2, %xmm4
   1196 
   1197 	palignr	$9, %xmm3, %xmm2
   1198 
   1199 	pcmpeqb	%xmm1, %xmm0
   1200 	pcmpeqb	%xmm2, %xmm1
   1201 	psubb	%xmm0, %xmm1
   1202 	pmovmskb %xmm1, %esi
   1203 	sub	$0xffff, %esi
   1204 	jnz	L(exit)
   1205 
   1206 #ifdef USE_AS_STRNCMP
   1207 	cmp	$16, %ebp
   1208 	lea	-16(%ebp), %ebp
   1209 	jbe	L(more8byteseq)
   1210 #endif
   1211 	add	$16, %ecx
   1212 	movdqa	%xmm4, %xmm3
   1213 
   1214 	add	$16, %edi
   1215 	jg	L(nibble_ashr_9)
   1216 
   1217 	movdqa	(%eax, %ecx), %xmm1
   1218 	movdqa	(%edx, %ecx), %xmm2
   1219 	movdqa	%xmm2, %xmm4
   1220 
   1221 	palignr	$9, %xmm3, %xmm2
   1222 
   1223 	pcmpeqb	%xmm1, %xmm0
   1224 	pcmpeqb	%xmm2, %xmm1
   1225 	psubb	%xmm0, %xmm1
   1226 	pmovmskb %xmm1, %esi
   1227 	sub	$0xffff, %esi
   1228 	jnz	L(exit)
   1229 
   1230 #ifdef USE_AS_STRNCMP
   1231 	cmp	$16, %ebp
   1232 	lea	-16(%ebp), %ebp
   1233 	jbe	L(more8byteseq)
   1234 #endif
   1235 	add	$16, %ecx
   1236 	movdqa	%xmm4, %xmm3
   1237 	jmp	L(loop_ashr_9)
   1238 
   1239 	.p2align 4
   1240 L(nibble_ashr_9):
   1241 	pcmpeqb	%xmm3, %xmm0
   1242 	pmovmskb %xmm0, %esi
   1243 	test	$0xfe00, %esi
   1244 	jnz	L(ashr_9_exittail)
   1245 
   1246 #ifdef USE_AS_STRNCMP
   1247 	cmp	$7, %ebp
   1248 	jbe	L(ashr_9_exittail)
   1249 #endif
   1250 	pxor	%xmm0, %xmm0
   1251 	sub	$0x1000, %edi
   1252 	jmp	L(gobble_ashr_9)
   1253 
   1254 	.p2align 4
   1255 L(ashr_9_exittail):
   1256 	movdqa	(%eax, %ecx), %xmm1
   1257 	psrldq	$9, %xmm0
   1258 	psrldq	$9, %xmm3
   1259 	jmp	L(aftertail)
   1260 
   1261 /*
   1262  * The following cases will be handled by ashr_10
   1263  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1264  *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10
   1265  */
   1266 	.p2align 4
   1267 L(ashr_10):
   1268 	mov	$0xffff, %esi
   1269 	pxor	%xmm0, %xmm0
   1270 	movdqa	(%edx), %xmm2
   1271 	movdqa	(%eax), %xmm1
   1272 	pcmpeqb	%xmm1, %xmm0
   1273 	pslldq	$6, %xmm2
   1274 	pcmpeqb	%xmm1, %xmm2
   1275 	psubb	%xmm0, %xmm2
   1276 	pmovmskb %xmm2, %edi
   1277 	shr	%cl, %esi
   1278 	shr	%cl, %edi
   1279 	sub	%edi, %esi
   1280 	lea	-6(%ecx), %edi
   1281 	jnz	L(less32bytes)
   1282 
   1283 	UPDATE_STRNCMP_COUNTER
   1284 
   1285 	movdqa	(%edx), %xmm3
   1286 	pxor	%xmm0, %xmm0
   1287 	mov	$16, %ecx
   1288 	or	$10, %ebx
   1289 	lea	10(%edx), %edi
   1290 	and	$0xfff, %edi
   1291 	sub	$0x1000, %edi
   1292 
   1293 	.p2align 4
   1294 L(loop_ashr_10):
   1295 	add	$16, %edi
   1296 	jg	L(nibble_ashr_10)
   1297 
   1298 L(gobble_ashr_10):
   1299 	movdqa	(%eax, %ecx), %xmm1
   1300 	movdqa	(%edx, %ecx), %xmm2
   1301 	movdqa	%xmm2, %xmm4
   1302 
   1303 	palignr	$10, %xmm3, %xmm2
   1304 
   1305 	pcmpeqb	%xmm1, %xmm0
   1306 	pcmpeqb	%xmm2, %xmm1
   1307 	psubb	%xmm0, %xmm1
   1308 	pmovmskb %xmm1, %esi
   1309 	sub	$0xffff, %esi
   1310 	jnz	L(exit)
   1311 
   1312 #ifdef USE_AS_STRNCMP
   1313 	cmp	$16, %ebp
   1314 	lea	-16(%ebp), %ebp
   1315 	jbe	L(more8byteseq)
   1316 #endif
   1317 	add	$16, %ecx
   1318 	movdqa	%xmm4, %xmm3
   1319 
   1320 	add	$16, %edi
   1321 	jg	L(nibble_ashr_10)
   1322 
   1323 	movdqa	(%eax, %ecx), %xmm1
   1324 	movdqa	(%edx, %ecx), %xmm2
   1325 	movdqa	%xmm2, %xmm4
   1326 
   1327 	palignr	$10, %xmm3, %xmm2
   1328 
   1329 	pcmpeqb	%xmm1, %xmm0
   1330 	pcmpeqb	%xmm2, %xmm1
   1331 	psubb	%xmm0, %xmm1
   1332 	pmovmskb %xmm1, %esi
   1333 	sub	$0xffff, %esi
   1334 	jnz	L(exit)
   1335 
   1336 #ifdef USE_AS_STRNCMP
   1337 	cmp	$16, %ebp
   1338 	lea	-16(%ebp), %ebp
   1339 	jbe	L(more8byteseq)
   1340 #endif
   1341 	add	$16, %ecx
   1342 	movdqa	%xmm4, %xmm3
   1343 	jmp	L(loop_ashr_10)
   1344 
   1345 	.p2align 4
   1346 L(nibble_ashr_10):
   1347 	pcmpeqb	%xmm3, %xmm0
   1348 	pmovmskb %xmm0, %esi
   1349 	test	$0xfc00, %esi
   1350 	jnz	L(ashr_10_exittail)
   1351 
   1352 #ifdef USE_AS_STRNCMP
   1353 	cmp	$6, %ebp
   1354 	jbe	L(ashr_10_exittail)
   1355 #endif
   1356 	pxor	%xmm0, %xmm0
   1357 	sub	$0x1000, %edi
   1358 	jmp	L(gobble_ashr_10)
   1359 
   1360 	.p2align 4
   1361 L(ashr_10_exittail):
   1362 	movdqa	(%eax, %ecx), %xmm1
   1363 	psrldq	$10, %xmm0
   1364 	psrldq	$10, %xmm3
   1365 	jmp	L(aftertail)
   1366 
   1367 /*
   1368  * The following cases will be handled by ashr_11
   1369  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1370  *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11
   1371  */
   1372 	.p2align 4
   1373 L(ashr_11):
   1374 	mov	$0xffff, %esi
   1375 	pxor	%xmm0, %xmm0
   1376 	movdqa	(%edx), %xmm2
   1377 	movdqa	(%eax), %xmm1
   1378 	pcmpeqb	%xmm1, %xmm0
   1379 	pslldq	$5, %xmm2
   1380 	pcmpeqb	%xmm1, %xmm2
   1381 	psubb	%xmm0, %xmm2
   1382 	pmovmskb %xmm2, %edi
   1383 	shr	%cl, %esi
   1384 	shr	%cl, %edi
   1385 	sub	%edi, %esi
   1386 	lea	-5(%ecx), %edi
   1387 	jnz	L(less32bytes)
   1388 
   1389 	UPDATE_STRNCMP_COUNTER
   1390 
   1391 	movdqa	(%edx), %xmm3
   1392 	pxor	%xmm0, %xmm0
   1393 	mov	$16, %ecx
   1394 	or	$11, %ebx
   1395 	lea	11(%edx), %edi
   1396 	and	$0xfff, %edi
   1397 	sub	$0x1000, %edi
   1398 
   1399 	.p2align 4
   1400 L(loop_ashr_11):
   1401 	add	$16, %edi
   1402 	jg	L(nibble_ashr_11)
   1403 
   1404 L(gobble_ashr_11):
   1405 	movdqa	(%eax, %ecx), %xmm1
   1406 	movdqa	(%edx, %ecx), %xmm2
   1407 	movdqa	%xmm2, %xmm4
   1408 
   1409 	palignr	$11, %xmm3, %xmm2
   1410 
   1411 	pcmpeqb	%xmm1, %xmm0
   1412 	pcmpeqb	%xmm2, %xmm1
   1413 	psubb	%xmm0, %xmm1
   1414 	pmovmskb %xmm1, %esi
   1415 	sub	$0xffff, %esi
   1416 	jnz	L(exit)
   1417 
   1418 #ifdef USE_AS_STRNCMP
   1419 	cmp	$16, %ebp
   1420 	lea	-16(%ebp), %ebp
   1421 	jbe	L(more8byteseq)
   1422 #endif
   1423 	add	$16, %ecx
   1424 	movdqa	%xmm4, %xmm3
   1425 
   1426 	add	$16, %edi
   1427 	jg	L(nibble_ashr_11)
   1428 
   1429 	movdqa	(%eax, %ecx), %xmm1
   1430 	movdqa	(%edx, %ecx), %xmm2
   1431 	movdqa	%xmm2, %xmm4
   1432 
   1433 	palignr	$11, %xmm3, %xmm2
   1434 
   1435 	pcmpeqb	%xmm1, %xmm0
   1436 	pcmpeqb	%xmm2, %xmm1
   1437 	psubb	%xmm0, %xmm1
   1438 	pmovmskb %xmm1, %esi
   1439 	sub	$0xffff, %esi
   1440 	jnz	L(exit)
   1441 
   1442 #ifdef USE_AS_STRNCMP
   1443 	cmp	$16, %ebp
   1444 	lea	-16(%ebp), %ebp
   1445 	jbe	L(more8byteseq)
   1446 #endif
   1447 	add	$16, %ecx
   1448 	movdqa	%xmm4, %xmm3
   1449 	jmp	L(loop_ashr_11)
   1450 
   1451 	.p2align 4
   1452 L(nibble_ashr_11):
   1453 	pcmpeqb	%xmm3, %xmm0
   1454 	pmovmskb %xmm0, %esi
   1455 	test	$0xf800, %esi
   1456 	jnz	L(ashr_11_exittail)
   1457 
   1458 #ifdef USE_AS_STRNCMP
   1459 	cmp	$5, %ebp
   1460 	jbe	L(ashr_11_exittail)
   1461 #endif
   1462 	pxor	%xmm0, %xmm0
   1463 	sub	$0x1000, %edi
   1464 	jmp	L(gobble_ashr_11)
   1465 
   1466 	.p2align 4
   1467 L(ashr_11_exittail):
   1468 	movdqa	(%eax, %ecx), %xmm1
   1469 	psrldq	$11, %xmm0
   1470 	psrldq	$11, %xmm3
   1471 	jmp	L(aftertail)
   1472 
   1473 /*
   1474  * The following cases will be handled by ashr_12
   1475  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1476  *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12
   1477  */
   1478 	.p2align 4
   1479 L(ashr_12):
   1480 	mov	$0xffff, %esi
   1481 	pxor	%xmm0, %xmm0
   1482 	movdqa	(%edx), %xmm2
   1483 	movdqa	(%eax), %xmm1
   1484 	pcmpeqb	%xmm1, %xmm0
   1485 	pslldq	$4, %xmm2
   1486 	pcmpeqb	%xmm1, %xmm2
   1487 	psubb	%xmm0, %xmm2
   1488 	pmovmskb %xmm2, %edi
   1489 	shr	%cl, %esi
   1490 	shr	%cl, %edi
   1491 	sub	%edi, %esi
   1492 	lea	-4(%ecx), %edi
   1493 	jnz	L(less32bytes)
   1494 
   1495 	UPDATE_STRNCMP_COUNTER
   1496 
   1497 	movdqa	(%edx), %xmm3
   1498 	pxor	%xmm0, %xmm0
   1499 	mov	$16, %ecx
   1500 	or	$12, %ebx
   1501 	lea	12(%edx), %edi
   1502 	and	$0xfff, %edi
   1503 	sub	$0x1000, %edi
   1504 
   1505 	.p2align 4
   1506 L(loop_ashr_12):
   1507 	add	$16, %edi
   1508 	jg	L(nibble_ashr_12)
   1509 
   1510 L(gobble_ashr_12):
   1511 	movdqa	(%eax, %ecx), %xmm1
   1512 	movdqa	(%edx, %ecx), %xmm2
   1513 	movdqa	%xmm2, %xmm4
   1514 
   1515 	palignr	$12, %xmm3, %xmm2
   1516 
   1517 	pcmpeqb	%xmm1, %xmm0
   1518 	pcmpeqb	%xmm2, %xmm1
   1519 	psubb	%xmm0, %xmm1
   1520 	pmovmskb %xmm1, %esi
   1521 	sub	$0xffff, %esi
   1522 	jnz	L(exit)
   1523 
   1524 	add	$16, %ecx
   1525 	movdqa	%xmm4, %xmm3
   1526 
   1527 	add	$16, %edi
   1528 	jg	L(nibble_ashr_12)
   1529 
   1530 #ifdef USE_AS_STRNCMP
   1531 	cmp	$16, %ebp
   1532 	lea	-16(%ebp), %ebp
   1533 	jbe	L(more8byteseq)
   1534 #endif
   1535 	movdqa	(%eax, %ecx), %xmm1
   1536 	movdqa	(%edx, %ecx), %xmm2
   1537 	movdqa	%xmm2, %xmm4
   1538 
   1539 	palignr	$12, %xmm3, %xmm2
   1540 
   1541 	pcmpeqb	%xmm1, %xmm0
   1542 	pcmpeqb	%xmm2, %xmm1
   1543 	psubb	%xmm0, %xmm1
   1544 	pmovmskb %xmm1, %esi
   1545 	sub	$0xffff, %esi
   1546 	jnz	L(exit)
   1547 
   1548 #ifdef USE_AS_STRNCMP
   1549 	cmp	$16, %ebp
   1550 	lea	-16(%ebp), %ebp
   1551 	jbe	L(more8byteseq)
   1552 #endif
   1553 	add	$16, %ecx
   1554 	movdqa	%xmm4, %xmm3
   1555 	jmp	L(loop_ashr_12)
   1556 
   1557 	.p2align 4
   1558 L(nibble_ashr_12):
   1559 	pcmpeqb	%xmm3, %xmm0
   1560 	pmovmskb %xmm0, %esi
   1561 	test	$0xf000, %esi
   1562 	jnz	L(ashr_12_exittail)
   1563 
   1564 #ifdef USE_AS_STRNCMP
   1565 	cmp	$4, %ebp
   1566 	jbe	L(ashr_12_exittail)
   1567 #endif
   1568 	pxor	%xmm0, %xmm0
   1569 	sub	$0x1000, %edi
   1570 	jmp	L(gobble_ashr_12)
   1571 
   1572 	.p2align 4
   1573 L(ashr_12_exittail):
   1574 	movdqa	(%eax, %ecx), %xmm1
   1575 	psrldq	$12, %xmm0
   1576 	psrldq	$12, %xmm3
   1577 	jmp	L(aftertail)
   1578 
   1579 /*
   1580  * The following cases will be handled by ashr_13
   1581  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1582  *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13
   1583  */
   1584 	.p2align 4
   1585 L(ashr_13):
   1586 	mov	$0xffff, %esi
   1587 	pxor	%xmm0, %xmm0
   1588 	movdqa	(%edx), %xmm2
   1589 	movdqa	(%eax), %xmm1
   1590 	pcmpeqb	%xmm1, %xmm0
   1591 	pslldq	$3, %xmm2
   1592 	pcmpeqb	%xmm1, %xmm2
   1593 	psubb	%xmm0, %xmm2
   1594 	pmovmskb %xmm2, %edi
   1595 	shr	%cl, %esi
   1596 	shr	%cl, %edi
   1597 	sub	%edi, %esi
   1598 	lea	-3(%ecx), %edi
   1599 	jnz	L(less32bytes)
   1600 
   1601 	UPDATE_STRNCMP_COUNTER
   1602 
   1603 	movdqa	(%edx), %xmm3
   1604 	pxor	%xmm0, %xmm0
   1605 	mov	$16, %ecx
   1606 	or	$13, %ebx
   1607 	lea	13(%edx), %edi
   1608 	and	$0xfff, %edi
   1609 	sub	$0x1000, %edi
   1610 
   1611 	.p2align 4
   1612 L(loop_ashr_13):
   1613 	add	$16, %edi
   1614 	jg	L(nibble_ashr_13)
   1615 
   1616 L(gobble_ashr_13):
   1617 	movdqa	(%eax, %ecx), %xmm1
   1618 	movdqa	(%edx, %ecx), %xmm2
   1619 	movdqa	%xmm2, %xmm4
   1620 
   1621 	palignr	$13, %xmm3, %xmm2
   1622 
   1623 	pcmpeqb	%xmm1, %xmm0
   1624 	pcmpeqb	%xmm2, %xmm1
   1625 	psubb	%xmm0, %xmm1
   1626 	pmovmskb %xmm1, %esi
   1627 	sub	$0xffff, %esi
   1628 	jnz	L(exit)
   1629 
   1630 #ifdef USE_AS_STRNCMP
   1631 	cmp	$16, %ebp
   1632 	lea	-16(%ebp), %ebp
   1633 	jbe	L(more8byteseq)
   1634 #endif
   1635 	add	$16, %ecx
   1636 	movdqa	%xmm4, %xmm3
   1637 
   1638 	add	$16, %edi
   1639 	jg	L(nibble_ashr_13)
   1640 
   1641 	movdqa	(%eax, %ecx), %xmm1
   1642 	movdqa	(%edx, %ecx), %xmm2
   1643 	movdqa	%xmm2, %xmm4
   1644 
   1645 	palignr	$13, %xmm3, %xmm2
   1646 
   1647 	pcmpeqb	%xmm1, %xmm0
   1648 	pcmpeqb	%xmm2, %xmm1
   1649 	psubb	%xmm0, %xmm1
   1650 	pmovmskb %xmm1, %esi
   1651 	sub	$0xffff, %esi
   1652 	jnz	L(exit)
   1653 
   1654 #ifdef USE_AS_STRNCMP
   1655 	cmp	$16, %ebp
   1656 	lea	-16(%ebp), %ebp
   1657 	jbe	L(more8byteseq)
   1658 #endif
   1659 	add	$16, %ecx
   1660 	movdqa	%xmm4, %xmm3
   1661 	jmp	L(loop_ashr_13)
   1662 
   1663 	.p2align 4
   1664 L(nibble_ashr_13):
   1665 	pcmpeqb	%xmm3, %xmm0
   1666 	pmovmskb %xmm0, %esi
   1667 	test	$0xe000, %esi
   1668 	jnz	L(ashr_13_exittail)
   1669 
   1670 #ifdef USE_AS_STRNCMP
   1671 	cmp	$3, %ebp
   1672 	jbe	L(ashr_13_exittail)
   1673 #endif
   1674 	pxor	%xmm0, %xmm0
   1675 	sub	$0x1000, %edi
   1676 	jmp	L(gobble_ashr_13)
   1677 
   1678 	.p2align 4
   1679 L(ashr_13_exittail):
   1680 	movdqa	(%eax, %ecx), %xmm1
   1681 	psrldq	$13, %xmm0
   1682 	psrldq	$13, %xmm3
   1683 	jmp	L(aftertail)
   1684 
   1685 /*
   1686  * The following cases will be handled by ashr_14
   1687  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1688  *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14
   1689  */
   1690 	.p2align 4
   1691 L(ashr_14):
   1692 	mov	$0xffff, %esi
   1693 	pxor	%xmm0, %xmm0
   1694 	movdqa	(%edx), %xmm2
   1695 	movdqa	(%eax), %xmm1
   1696 	pcmpeqb	%xmm1, %xmm0
   1697 	pslldq	$2, %xmm2
   1698 	pcmpeqb	%xmm1, %xmm2
   1699 	psubb	%xmm0, %xmm2
   1700 	pmovmskb %xmm2, %edi
   1701 	shr	%cl, %esi
   1702 	shr	%cl, %edi
   1703 	sub	%edi, %esi
   1704 	lea	-2(%ecx), %edi
   1705 	jnz	L(less32bytes)
   1706 
   1707 	UPDATE_STRNCMP_COUNTER
   1708 
   1709 	movdqa	(%edx), %xmm3
   1710 	pxor	%xmm0, %xmm0
   1711 	mov	$16, %ecx
   1712 	or	$14, %ebx
   1713 	lea	14(%edx), %edi
   1714 	and	$0xfff, %edi
   1715 	sub	$0x1000, %edi
   1716 
   1717 	.p2align 4
   1718 L(loop_ashr_14):
   1719 	add	$16, %edi
   1720 	jg	L(nibble_ashr_14)
   1721 
   1722 L(gobble_ashr_14):
   1723 	movdqa	(%eax, %ecx), %xmm1
   1724 	movdqa	(%edx, %ecx), %xmm2
   1725 	movdqa	%xmm2, %xmm4
   1726 
   1727 	palignr	$14, %xmm3, %xmm2
   1728 
   1729 	pcmpeqb	%xmm1, %xmm0
   1730 	pcmpeqb	%xmm2, %xmm1
   1731 	psubb	%xmm0, %xmm1
   1732 	pmovmskb %xmm1, %esi
   1733 	sub	$0xffff, %esi
   1734 	jnz	L(exit)
   1735 
   1736 #ifdef USE_AS_STRNCMP
   1737 	cmp	$16, %ebp
   1738 	lea	-16(%ebp), %ebp
   1739 	jbe	L(more8byteseq)
   1740 #endif
   1741 	add	$16, %ecx
   1742 	movdqa	%xmm4, %xmm3
   1743 
   1744 	add	$16, %edi
   1745 	jg	L(nibble_ashr_14)
   1746 
   1747 	movdqa	(%eax, %ecx), %xmm1
   1748 	movdqa	(%edx, %ecx), %xmm2
   1749 	movdqa	%xmm2, %xmm4
   1750 
   1751 	palignr	$14, %xmm3, %xmm2
   1752 
   1753 	pcmpeqb	%xmm1, %xmm0
   1754 	pcmpeqb	%xmm2, %xmm1
   1755 	psubb	%xmm0, %xmm1
   1756 	pmovmskb %xmm1, %esi
   1757 	sub	$0xffff, %esi
   1758 	jnz	L(exit)
   1759 
   1760 #ifdef USE_AS_STRNCMP
   1761 	cmp	$16, %ebp
   1762 	lea	-16(%ebp), %ebp
   1763 	jbe	L(more8byteseq)
   1764 #endif
   1765 	add	$16, %ecx
   1766 	movdqa	%xmm4, %xmm3
   1767 	jmp	L(loop_ashr_14)
   1768 
   1769 	.p2align 4
   1770 L(nibble_ashr_14):
   1771 	pcmpeqb	%xmm3, %xmm0
   1772 	pmovmskb %xmm0, %esi
   1773 	test	$0xc000, %esi
   1774 	jnz	L(ashr_14_exittail)
   1775 
   1776 #ifdef USE_AS_STRNCMP
   1777 	cmp	$2, %ebp
   1778 	jbe	L(ashr_14_exittail)
   1779 #endif
   1780 	pxor	%xmm0, %xmm0
   1781 	sub	$0x1000, %edi
   1782 	jmp	L(gobble_ashr_14)
   1783 
   1784 	.p2align 4
   1785 L(ashr_14_exittail):
   1786 	movdqa	(%eax, %ecx), %xmm1
   1787 	psrldq	$14, %xmm0
   1788 	psrldq	$14, %xmm3
   1789 	jmp	L(aftertail)
   1790 
   1791 /*
   1792  * The following cases will be handled by ashr_14
   1793  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
   1794  *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15
   1795  */
   1796 
   1797 	.p2align 4
   1798 L(ashr_15):
   1799 	mov	$0xffff, %esi
   1800 	pxor	%xmm0, %xmm0
   1801 	movdqa	(%edx), %xmm2
   1802 	movdqa	(%eax), %xmm1
   1803 	pcmpeqb	%xmm1, %xmm0
   1804 	pslldq	$1, %xmm2
   1805 	pcmpeqb	%xmm1, %xmm2
   1806 	psubb	%xmm0, %xmm2
   1807 	pmovmskb %xmm2, %edi
   1808 	shr	%cl, %esi
   1809 	shr	%cl, %edi
   1810 	sub	%edi, %esi
   1811 	lea	-1(%ecx), %edi
   1812 	jnz	L(less32bytes)
   1813 
   1814 	UPDATE_STRNCMP_COUNTER
   1815 
   1816 	movdqa	(%edx), %xmm3
   1817 	pxor	%xmm0, %xmm0
   1818 	mov	$16, %ecx
   1819 	or	$15, %ebx
   1820 	lea	15(%edx), %edi
   1821 	and	$0xfff, %edi
   1822 	sub	$0x1000, %edi
   1823 
   1824 	.p2align 4
   1825 L(loop_ashr_15):
   1826 	add	$16, %edi
   1827 	jg	L(nibble_ashr_15)
   1828 
   1829 L(gobble_ashr_15):
   1830 	movdqa	(%eax, %ecx), %xmm1
   1831 	movdqa	(%edx, %ecx), %xmm2
   1832 	movdqa	%xmm2, %xmm4
   1833 
   1834 	palignr	$15, %xmm3, %xmm2
   1835 
   1836 	pcmpeqb	%xmm1, %xmm0
   1837 	pcmpeqb	%xmm2, %xmm1
   1838 	psubb	%xmm0, %xmm1
   1839 	pmovmskb %xmm1, %esi
   1840 	sub	$0xffff, %esi
   1841 	jnz	L(exit)
   1842 
   1843 #ifdef USE_AS_STRNCMP
   1844 	cmp	$16, %ebp
   1845 	lea	-16(%ebp), %ebp
   1846 	jbe	L(more8byteseq)
   1847 #endif
   1848 	add	$16, %ecx
   1849 	movdqa	%xmm4, %xmm3
   1850 
   1851 	add	$16, %edi
   1852 	jg	L(nibble_ashr_15)
   1853 
   1854 	movdqa	(%eax, %ecx), %xmm1
   1855 	movdqa	(%edx, %ecx), %xmm2
   1856 	movdqa	%xmm2, %xmm4
   1857 
   1858 	palignr	$15, %xmm3, %xmm2
   1859 
   1860 	pcmpeqb	%xmm1, %xmm0
   1861 	pcmpeqb	%xmm2, %xmm1
   1862 	psubb	%xmm0, %xmm1
   1863 	pmovmskb %xmm1, %esi
   1864 	sub	$0xffff, %esi
   1865 	jnz	L(exit)
   1866 
   1867 #ifdef USE_AS_STRNCMP
   1868 	cmp	$16, %ebp
   1869 	lea	-16(%ebp), %ebp
   1870 	jbe	L(more8byteseq)
   1871 #endif
   1872 	add	$16, %ecx
   1873 	movdqa	%xmm4, %xmm3
   1874 	jmp	L(loop_ashr_15)
   1875 
   1876 	.p2align 4
   1877 L(nibble_ashr_15):
   1878 	pcmpeqb	%xmm3, %xmm0
   1879 	pmovmskb %xmm0, %esi
   1880 	test	$0x8000, %esi
   1881 	jnz	L(ashr_15_exittail)
   1882 
   1883 #ifdef USE_AS_STRNCMP
   1884 	cmp	$1, %ebp
   1885 	jbe	L(ashr_15_exittail)
   1886 #endif
   1887 	pxor	%xmm0, %xmm0
   1888 	sub	$0x1000, %edi
   1889 	jmp	L(gobble_ashr_15)
   1890 
   1891 	.p2align 4
   1892 L(ashr_15_exittail):
   1893 	movdqa	(%eax, %ecx), %xmm1
   1894 	psrldq	$15, %xmm0
   1895 	psrldq	$15, %xmm3
   1896 	jmp	L(aftertail)
   1897 
   1898 	.p2align 4
   1899 L(aftertail):
   1900 	pcmpeqb	%xmm3, %xmm1
   1901 	psubb	%xmm0, %xmm1
   1902 	pmovmskb %xmm1, %esi
   1903 	not	%esi
   1904 L(exit):
   1905 	mov	%ebx, %edi
   1906 	and	$0x1f, %edi
   1907 	lea	-16(%edi, %ecx), %edi
   1908 L(less32bytes):
   1909 	add	%edi, %edx
   1910 	add	%ecx, %eax
   1911 	test	$0x20, %ebx
   1912 	jz	L(ret2)
   1913 	xchg	%eax, %edx
   1914 
   1915 	.p2align 4
   1916 L(ret2):
   1917 	mov	%esi, %ecx
   1918 	POP	(%esi)
   1919 	POP	(%edi)
   1920 	POP	(%ebx)
   1921 L(less16bytes):
   1922 	test	%cl, %cl
   1923 	jz	L(2next_8_bytes)
   1924 
   1925 	test	$0x01, %cl
   1926 	jnz	L(Byte0)
   1927 
   1928 	test	$0x02, %cl
   1929 	jnz	L(Byte1)
   1930 
   1931 	test	$0x04, %cl
   1932 	jnz	L(Byte2)
   1933 
   1934 	test	$0x08, %cl
   1935 	jnz	L(Byte3)
   1936 
   1937 	test	$0x10, %cl
   1938 	jnz	L(Byte4)
   1939 
   1940 	test	$0x20, %cl
   1941 	jnz	L(Byte5)
   1942 
   1943 	test	$0x40, %cl
   1944 	jnz	L(Byte6)
   1945 #ifdef USE_AS_STRNCMP
   1946 	cmp	$7, %ebp
   1947 	jbe	L(eq)
   1948 #endif
   1949 
   1950 	movzx	7(%eax), %ecx
   1951 	movzx	7(%edx), %eax
   1952 
   1953 	sub	%ecx, %eax
   1954 	RETURN
   1955 
   1956 	.p2align 4
   1957 L(Byte0):
   1958 #ifdef USE_AS_STRNCMP
   1959 	cmp	$0, %ebp
   1960 	jbe	L(eq)
   1961 #endif
   1962 	movzx	(%eax), %ecx
   1963 	movzx	(%edx), %eax
   1964 
   1965 	sub	%ecx, %eax
   1966 	RETURN
   1967 
   1968 	.p2align 4
   1969 L(Byte1):
   1970 #ifdef USE_AS_STRNCMP
   1971 	cmp	$1, %ebp
   1972 	jbe	L(eq)
   1973 #endif
   1974 	movzx	1(%eax), %ecx
   1975 	movzx	1(%edx), %eax
   1976 
   1977 	sub	%ecx, %eax
   1978 	RETURN
   1979 
   1980 	.p2align 4
   1981 L(Byte2):
   1982 #ifdef USE_AS_STRNCMP
   1983 	cmp	$2, %ebp
   1984 	jbe	L(eq)
   1985 #endif
   1986 	movzx	2(%eax), %ecx
   1987 	movzx	2(%edx), %eax
   1988 
   1989 	sub	%ecx, %eax
   1990 	RETURN
   1991 
   1992 	.p2align 4
   1993 L(Byte3):
   1994 #ifdef USE_AS_STRNCMP
   1995 	cmp	$3, %ebp
   1996 	jbe	L(eq)
   1997 #endif
   1998 	movzx	3(%eax), %ecx
   1999 	movzx	3(%edx), %eax
   2000 
   2001 	sub	%ecx, %eax
   2002 	RETURN
   2003 
   2004 	.p2align 4
   2005 L(Byte4):
   2006 #ifdef USE_AS_STRNCMP
   2007 	cmp	$4, %ebp
   2008 	jbe	L(eq)
   2009 #endif
   2010 	movzx	4(%eax), %ecx
   2011 	movzx	4(%edx), %eax
   2012 
   2013 	sub	%ecx, %eax
   2014 	RETURN
   2015 
   2016 	.p2align 4
   2017 L(Byte5):
   2018 #ifdef USE_AS_STRNCMP
   2019 	cmp	$5, %ebp
   2020 	jbe	L(eq)
   2021 #endif
   2022 	movzx	5(%eax), %ecx
   2023 	movzx	5(%edx), %eax
   2024 
   2025 	sub	%ecx, %eax
   2026 	RETURN
   2027 
   2028 	.p2align 4
   2029 L(Byte6):
   2030 #ifdef USE_AS_STRNCMP
   2031 	cmp	$6, %ebp
   2032 	jbe	L(eq)
   2033 #endif
   2034 	movzx	6(%eax), %ecx
   2035 	movzx	6(%edx), %eax
   2036 
   2037 	sub	%ecx, %eax
   2038 	RETURN
   2039 
   2040 	.p2align 4
   2041 L(2next_8_bytes):
   2042 	add	$8, %eax
   2043 	add	$8, %edx
   2044 #ifdef USE_AS_STRNCMP
   2045 	cmp	$8, %ebp
   2046 	lea	-8(%ebp), %ebp
   2047 	jbe	L(eq)
   2048 #endif
   2049 
   2050 	test	$0x01, %ch
   2051 	jnz	L(Byte0)
   2052 
   2053 	test	$0x02, %ch
   2054 	jnz	L(Byte1)
   2055 
   2056 	test	$0x04, %ch
   2057 	jnz	L(Byte2)
   2058 
   2059 	test	$0x08, %ch
   2060 	jnz	L(Byte3)
   2061 
   2062 	test	$0x10, %ch
   2063 	jnz	L(Byte4)
   2064 
   2065 	test	$0x20, %ch
   2066 	jnz	L(Byte5)
   2067 
   2068 	test	$0x40, %ch
   2069 	jnz	L(Byte6)
   2070 
   2071 #ifdef USE_AS_STRNCMP
   2072 	cmp	$7, %ebp
   2073 	jbe	L(eq)
   2074 #endif
   2075 	movzx	7(%eax), %ecx
   2076 	movzx	7(%edx), %eax
   2077 
   2078 	sub	%ecx, %eax
   2079 	RETURN
   2080 
   2081 	.p2align 4
   2082 L(neq):
   2083 	mov	$1, %eax
   2084 	ja	L(neq_bigger)
   2085 	neg	%eax
   2086 L(neq_bigger):
   2087 	RETURN
   2088 
   2089 #ifdef USE_AS_STRNCMP
   2090 	CFI_PUSH (%ebx)
   2091 	CFI_PUSH (%edi)
   2092 	CFI_PUSH (%esi)
   2093 
   2094 	.p2align 4
   2095 L(more8byteseq):
   2096 	POP	(%esi)
   2097 	POP	(%edi)
   2098 	POP	(%ebx)
   2099 #endif
   2100 
   2101 L(eq):
   2102 
   2103 #ifdef USE_AS_STRNCMP
   2104 	POP	(%ebp)
   2105 #endif
   2106 	xorl	%eax, %eax
   2107 	ret
   2108 
   2109 #ifdef USE_AS_STRNCMP
   2110 	CFI_PUSH (%ebp)
   2111 
   2112 	.p2align 4
   2113 L(less16bytes_sncmp):
   2114 	test	%ebp, %ebp
   2115 	jz	L(eq)
   2116 
   2117 	movzbl	(%eax), %ecx
   2118 	cmpb	%cl, (%edx)
   2119 	jne	L(neq)
   2120 	test	%cl, %cl
   2121 	je	L(eq)
   2122 
   2123 	cmp	$1, %ebp
   2124 	je	L(eq)
   2125 
   2126 	movzbl	1(%eax), %ecx
   2127 	cmpb	%cl, 1(%edx)
   2128 	jne	L(neq)
   2129 	test	%cl, %cl
   2130 	je	L(eq)
   2131 
   2132 	cmp	$2, %ebp
   2133 	je	L(eq)
   2134 
   2135 	movzbl	2(%eax), %ecx
   2136 	cmpb	%cl, 2(%edx)
   2137 	jne	L(neq)
   2138 	test	%cl, %cl
   2139 	je	L(eq)
   2140 
   2141 	cmp	$3, %ebp
   2142 	je	L(eq)
   2143 
   2144 	movzbl	3(%eax), %ecx
   2145 	cmpb	%cl, 3(%edx)
   2146 	jne	L(neq)
   2147 	test	%cl, %cl
   2148 	je	L(eq)
   2149 
   2150 	cmp	$4, %ebp
   2151 	je	L(eq)
   2152 
   2153 	movzbl	4(%eax), %ecx
   2154 	cmpb	%cl, 4(%edx)
   2155 	jne	L(neq)
   2156 	test	%cl, %cl
   2157 	je	L(eq)
   2158 
   2159 	cmp	$5, %ebp
   2160 	je	L(eq)
   2161 
   2162 	movzbl	5(%eax), %ecx
   2163 	cmpb	%cl, 5(%edx)
   2164 	jne	L(neq)
   2165 	test	%cl, %cl
   2166 	je	L(eq)
   2167 
   2168 	cmp	$6, %ebp
   2169 	je	L(eq)
   2170 
   2171 	movzbl	6(%eax), %ecx
   2172 	cmpb	%cl, 6(%edx)
   2173 	jne	L(neq)
   2174 	test	%cl, %cl
   2175 	je	L(eq)
   2176 
   2177 	cmp	$7, %ebp
   2178 	je	L(eq)
   2179 
   2180 	movzbl	7(%eax), %ecx
   2181 	cmpb	%cl, 7(%edx)
   2182 	jne	L(neq)
   2183 	test	%cl, %cl
   2184 	je	L(eq)
   2185 
   2186 
   2187 	cmp	$8, %ebp
   2188 	je	L(eq)
   2189 
   2190 	movzbl	8(%eax), %ecx
   2191 	cmpb	%cl, 8(%edx)
   2192 	jne	L(neq)
   2193 	test	%cl, %cl
   2194 	je	L(eq)
   2195 
   2196 	cmp	$9, %ebp
   2197 	je	L(eq)
   2198 
   2199 	movzbl	9(%eax), %ecx
   2200 	cmpb	%cl, 9(%edx)
   2201 	jne	L(neq)
   2202 	test	%cl, %cl
   2203 	je	L(eq)
   2204 
   2205 	cmp	$10, %ebp
   2206 	je	L(eq)
   2207 
   2208 	movzbl	10(%eax), %ecx
   2209 	cmpb	%cl, 10(%edx)
   2210 	jne	L(neq)
   2211 	test	%cl, %cl
   2212 	je	L(eq)
   2213 
   2214 	cmp	$11, %ebp
   2215 	je	L(eq)
   2216 
   2217 	movzbl	11(%eax), %ecx
   2218 	cmpb	%cl, 11(%edx)
   2219 	jne	L(neq)
   2220 	test	%cl, %cl
   2221 	je	L(eq)
   2222 
   2223 
   2224 	cmp	$12, %ebp
   2225 	je	L(eq)
   2226 
   2227 	movzbl	12(%eax), %ecx
   2228 	cmpb	%cl, 12(%edx)
   2229 	jne	L(neq)
   2230 	test	%cl, %cl
   2231 	je	L(eq)
   2232 
   2233 	cmp	$13, %ebp
   2234 	je	L(eq)
   2235 
   2236 	movzbl	13(%eax), %ecx
   2237 	cmpb	%cl, 13(%edx)
   2238 	jne	L(neq)
   2239 	test	%cl, %cl
   2240 	je	L(eq)
   2241 
   2242 	cmp	$14, %ebp
   2243 	je	L(eq)
   2244 
   2245 	movzbl	14(%eax), %ecx
   2246 	cmpb	%cl, 14(%edx)
   2247 	jne	L(neq)
   2248 	test	%cl, %cl
   2249 	je	L(eq)
   2250 
   2251 	cmp	$15, %ebp
   2252 	je	L(eq)
   2253 
   2254 	movzbl	15(%eax), %ecx
   2255 	cmpb	%cl, 15(%edx)
   2256 	jne	L(neq)
   2257 	test	%cl, %cl
   2258 	je	L(eq)
   2259 
   2260 	POP	(%ebp)
   2261 	xor	%eax, %eax
   2262 	ret
   2263 #endif
   2264 
   2265 END (ssse3_strcmp_latest)
   2266