Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2011, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc	.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc	.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)	.cfi_restore reg
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef ENTRY
     56 # define ENTRY(name)	\
     57 	.type name,  @function;	\
     58 	.globl name;	\
     59 	.p2align 4;	\
     60 name:	\
     61 	cfi_startproc
     62 #endif
     63 
     64 #ifndef END
     65 # define END(name)	\
     66 	cfi_endproc;	\
     67 	.size name,	.-name
     68 #endif
     69 
     70 #define CFI_PUSH(REG)	\
     71 	cfi_adjust_cfa_offset (4);	\
     72 	cfi_rel_offset (REG, 0)
     73 
     74 #define CFI_POP(REG)	\
     75 	cfi_adjust_cfa_offset (-4);	\
     76 	cfi_restore (REG)
     77 
     78 #define PUSH(REG) pushl REG; CFI_PUSH (REG)
     79 #define POP(REG) popl REG; CFI_POP (REG)
     80 
     81 #define PARMS  4
     82 #define STR1  PARMS
     83 #define STR2  STR1+4
     84 #define LEN   STR2+4
     85 
     86 	.text
     87 ENTRY (memrchr)
     88 	mov	STR1(%esp), %ecx
     89 	movd	STR2(%esp), %xmm1
     90 	mov	LEN(%esp), %edx
     91 
     92 	test	%edx, %edx
     93 	jz	L(return_null)
     94 	sub	$16, %edx
     95 	jbe	L(length_less16)
     96 
     97 	punpcklbw %xmm1, %xmm1
     98 	add	%edx, %ecx
     99 	punpcklbw %xmm1, %xmm1
    100 
    101 	movdqu	(%ecx), %xmm0
    102 	pshufd	$0, %xmm1, %xmm1
    103 	pcmpeqb	%xmm1, %xmm0
    104 
    105 	pmovmskb %xmm0, %eax
    106 	test	%eax, %eax
    107 	jnz	L(exit_dispatch)
    108 
    109 	sub	$64, %ecx
    110 	mov	%ecx, %eax
    111 	and	$15, %eax
    112 	jz	L(loop_prolog)
    113 
    114 	add	$16, %ecx
    115 	add	$16, %edx
    116 	and	$-16, %ecx
    117 	sub	%eax, %edx
    118 
    119 	.p2align 4
    120 /* Loop start on aligned string.  */
    121 L(loop_prolog):
    122 	sub	$64, %edx
    123 	jbe	L(exit_loop)
    124 
    125 	movdqa	48(%ecx), %xmm0
    126 	pcmpeqb	%xmm1, %xmm0
    127 	pmovmskb %xmm0, %eax
    128 	test	%eax, %eax
    129 	jnz	L(matches48)
    130 
    131 	movdqa	32(%ecx), %xmm2
    132 	pcmpeqb	%xmm1, %xmm2
    133 	pmovmskb %xmm2, %eax
    134 	test	%eax, %eax
    135 	jnz	L(matches32)
    136 
    137 	movdqa	16(%ecx), %xmm3
    138 	pcmpeqb	%xmm1, %xmm3
    139 	pmovmskb %xmm3, %eax
    140 	test	%eax, %eax
    141 	jnz	L(matches16)
    142 
    143 	movdqa	(%ecx), %xmm4
    144 	pcmpeqb	%xmm1, %xmm4
    145 	pmovmskb %xmm4, %eax
    146 	test	%eax, %eax
    147 	jnz	L(exit_dispatch)
    148 
    149 	sub	$64, %ecx
    150 	sub	$64, %edx
    151 	jbe	L(exit_loop)
    152 
    153 	movdqa	48(%ecx), %xmm0
    154 	pcmpeqb	%xmm1, %xmm0
    155 	pmovmskb %xmm0, %eax
    156 	test	%eax, %eax
    157 	jnz	L(matches48)
    158 
    159 	movdqa	32(%ecx), %xmm2
    160 	pcmpeqb	%xmm1, %xmm2
    161 	pmovmskb %xmm2, %eax
    162 	test	%eax, %eax
    163 	jnz	L(matches32)
    164 
    165 	movdqa	16(%ecx), %xmm3
    166 	pcmpeqb	%xmm1, %xmm3
    167 	pmovmskb %xmm3, %eax
    168 	test	%eax, %eax
    169 	jnz	L(matches16)
    170 
    171 	movdqa	(%ecx), %xmm3
    172 	pcmpeqb	%xmm1, %xmm3
    173 	pmovmskb %xmm3, %eax
    174 	test	%eax, %eax
    175 	jnz	L(exit_dispatch)
    176 
    177 	mov	%ecx, %eax
    178 	and	$63, %eax
    179 	test	%eax, %eax
    180 	jz	L(align64_loop)
    181 
    182 	add	$64, %ecx
    183 	add	$64, %edx
    184 	and	$-64, %ecx
    185 	sub	%eax, %edx
    186 
    187 	.p2align 4
    188 L(align64_loop):
    189 	sub	$64, %ecx
    190 	sub	$64, %edx
    191 	jbe	L(exit_loop)
    192 
    193 	movdqa	(%ecx), %xmm0
    194 	movdqa	16(%ecx), %xmm2
    195 	movdqa	32(%ecx), %xmm3
    196 	movdqa	48(%ecx), %xmm4
    197 
    198 	pcmpeqb	%xmm1, %xmm0
    199 	pcmpeqb	%xmm1, %xmm2
    200 	pcmpeqb	%xmm1, %xmm3
    201 	pcmpeqb	%xmm1, %xmm4
    202 
    203 	pmaxub	%xmm3, %xmm0
    204 	pmaxub	%xmm4, %xmm2
    205 	pmaxub	%xmm0, %xmm2
    206 	pmovmskb %xmm2, %eax
    207 
    208 	test	%eax, %eax
    209 	jz	L(align64_loop)
    210 
    211 	pmovmskb %xmm4, %eax
    212 	test	%eax, %eax
    213 	jnz	L(matches48)
    214 
    215 	pmovmskb %xmm3, %eax
    216 	test	%eax, %eax
    217 	jnz	L(matches32)
    218 
    219 	movdqa	16(%ecx), %xmm2
    220 
    221 	pcmpeqb	%xmm1, %xmm2
    222 	pcmpeqb	(%ecx), %xmm1
    223 
    224 	pmovmskb %xmm2, %eax
    225 	test	%eax, %eax
    226 	jnz	L(matches16)
    227 
    228 	pmovmskb %xmm1, %eax
    229 	test	%ah, %ah
    230 	jnz	L(exit_dispatch_high)
    231 	mov	%al, %dl
    232 	and	$15 << 4, %dl
    233 	jnz	L(exit_dispatch_8)
    234 	test	$0x08, %al
    235 	jnz	L(exit_4)
    236 	test	$0x04, %al
    237 	jnz	L(exit_3)
    238 	test	$0x02, %al
    239 	jnz	L(exit_2)
    240 	mov	%ecx, %eax
    241 	ret
    242 
    243 	.p2align 4
    244 L(exit_loop):
    245 	add	$64, %edx
    246 	cmp	$32, %edx
    247 	jbe	L(exit_loop_32)
    248 
    249 	movdqa	48(%ecx), %xmm0
    250 	pcmpeqb	%xmm1, %xmm0
    251 	pmovmskb %xmm0, %eax
    252 	test	%eax, %eax
    253 	jnz	L(matches48)
    254 
    255 	movdqa	32(%ecx), %xmm2
    256 	pcmpeqb	%xmm1, %xmm2
    257 	pmovmskb %xmm2, %eax
    258 	test	%eax, %eax
    259 	jnz	L(matches32)
    260 
    261 	movdqa	16(%ecx), %xmm3
    262 	pcmpeqb	%xmm1, %xmm3
    263 	pmovmskb %xmm3, %eax
    264 	test	%eax, %eax
    265 	jnz	L(matches16_1)
    266 	cmp	$48, %edx
    267 	jbe	L(return_null)
    268 
    269 	pcmpeqb	(%ecx), %xmm1
    270 	pmovmskb %xmm1, %eax
    271 	test	%eax, %eax
    272 	jnz	L(matches0_1)
    273 	xor	%eax, %eax
    274 	ret
    275 
    276 	.p2align 4
    277 L(exit_loop_32):
    278 	movdqa	48(%ecx), %xmm0
    279 	pcmpeqb	%xmm1, %xmm0
    280 	pmovmskb %xmm0, %eax
    281 	test	%eax, %eax
    282 	jnz	L(matches48_1)
    283 	cmp	$16, %edx
    284 	jbe	L(return_null)
    285 
    286 	pcmpeqb	32(%ecx), %xmm1
    287 	pmovmskb %xmm1, %eax
    288 	test	%eax, %eax
    289 	jnz	L(matches32_1)
    290 	xor	%eax, %eax
    291 	ret
    292 
    293 	.p2align 4
    294 L(matches16):
    295 	lea	16(%ecx), %ecx
    296 	test	%ah, %ah
    297 	jnz	L(exit_dispatch_high)
    298 	mov	%al, %dl
    299 	and	$15 << 4, %dl
    300 	jnz	L(exit_dispatch_8)
    301 	test	$0x08, %al
    302 	jnz	L(exit_4)
    303 	test	$0x04, %al
    304 	jnz	L(exit_3)
    305 	test	$0x02, %al
    306 	jnz	L(exit_2)
    307 	mov	%ecx, %eax
    308 	ret
    309 
    310 	.p2align 4
    311 L(matches32):
    312 	lea	32(%ecx), %ecx
    313 	test	%ah, %ah
    314 	jnz	L(exit_dispatch_high)
    315 	mov	%al, %dl
    316 	and	$15 << 4, %dl
    317 	jnz	L(exit_dispatch_8)
    318 	test	$0x08, %al
    319 	jnz	L(exit_4)
    320 	test	$0x04, %al
    321 	jnz	L(exit_3)
    322 	test	$0x02, %al
    323 	jnz	L(exit_2)
    324 	mov	%ecx, %eax
    325 	ret
    326 
    327 	.p2align 4
    328 L(matches48):
    329 	lea	48(%ecx), %ecx
    330 
    331 	.p2align 4
    332 L(exit_dispatch):
    333 	test	%ah, %ah
    334 	jnz	L(exit_dispatch_high)
    335 	mov	%al, %dl
    336 	and	$15 << 4, %dl
    337 	jnz	L(exit_dispatch_8)
    338 	test	$0x08, %al
    339 	jnz	L(exit_4)
    340 	test	$0x04, %al
    341 	jnz	L(exit_3)
    342 	test	$0x02, %al
    343 	jnz	L(exit_2)
    344 	mov	%ecx, %eax
    345 	ret
    346 
    347 	.p2align 4
    348 L(exit_dispatch_8):
    349 	test	$0x80, %al
    350 	jnz	L(exit_8)
    351 	test	$0x40, %al
    352 	jnz	L(exit_7)
    353 	test	$0x20, %al
    354 	jnz	L(exit_6)
    355 	lea	4(%ecx), %eax
    356 	ret
    357 
    358 	.p2align 4
    359 L(exit_dispatch_high):
    360 	mov	%ah, %dh
    361 	and	$15 << 4, %dh
    362 	jnz	L(exit_dispatch_high_8)
    363 	test	$0x08, %ah
    364 	jnz	L(exit_12)
    365 	test	$0x04, %ah
    366 	jnz	L(exit_11)
    367 	test	$0x02, %ah
    368 	jnz	L(exit_10)
    369 	lea	8(%ecx), %eax
    370 	ret
    371 
    372 	.p2align 4
    373 L(exit_dispatch_high_8):
    374 	test	$0x80, %ah
    375 	jnz	L(exit_16)
    376 	test	$0x40, %ah
    377 	jnz	L(exit_15)
    378 	test	$0x20, %ah
    379 	jnz	L(exit_14)
    380 	lea	12(%ecx), %eax
    381 	ret
    382 
    383 	.p2align 4
    384 L(exit_2):
    385 	lea	1(%ecx), %eax
    386 	ret
    387 
    388 	.p2align 4
    389 L(exit_3):
    390 	lea	2(%ecx), %eax
    391 	ret
    392 
    393 	.p2align 4
    394 L(exit_4):
    395 	lea	3(%ecx), %eax
    396 	ret
    397 
    398 	.p2align 4
    399 L(exit_6):
    400 	lea	5(%ecx), %eax
    401 	ret
    402 
    403 	.p2align 4
    404 L(exit_7):
    405 	lea	6(%ecx), %eax
    406 	ret
    407 
    408 	.p2align 4
    409 L(exit_8):
    410 	lea	7(%ecx), %eax
    411 	ret
    412 
    413 	.p2align 4
    414 L(exit_10):
    415 	lea	9(%ecx), %eax
    416 	ret
    417 
    418 	.p2align 4
    419 L(exit_11):
    420 	lea	10(%ecx), %eax
    421 	ret
    422 
    423 	.p2align 4
    424 L(exit_12):
    425 	lea	11(%ecx), %eax
    426 	ret
    427 
    428 	.p2align 4
    429 L(exit_14):
    430 	lea	13(%ecx), %eax
    431 	ret
    432 
    433 	.p2align 4
    434 L(exit_15):
    435 	lea	14(%ecx), %eax
    436 	ret
    437 
    438 	.p2align 4
    439 L(exit_16):
    440 	lea	15(%ecx), %eax
    441 	ret
    442 
    443 	.p2align 4
    444 L(matches0_1):
    445 	lea	-64(%edx), %edx
    446 
    447 	test	%ah, %ah
    448 	jnz	L(exit_dispatch_1_high)
    449 	mov	%al, %ah
    450 	and	$15 << 4, %ah
    451 	jnz	L(exit_dispatch_1_8)
    452 	test	$0x08, %al
    453 	jnz	L(exit_1_4)
    454 	test	$0x04, %al
    455 	jnz	L(exit_1_3)
    456 	test	$0x02, %al
    457 	jnz	L(exit_1_2)
    458 
    459 	add	$0, %edx
    460 	jl	L(return_null)
    461 	mov	%ecx, %eax
    462 	ret
    463 
    464 	.p2align 4
    465 L(matches16_1):
    466 	lea	-48(%edx), %edx
    467 	lea	16(%ecx), %ecx
    468 
    469 	test	%ah, %ah
    470 	jnz	L(exit_dispatch_1_high)
    471 	mov	%al, %ah
    472 	and	$15 << 4, %ah
    473 	jnz	L(exit_dispatch_1_8)
    474 	test	$0x08, %al
    475 	jnz	L(exit_1_4)
    476 	test	$0x04, %al
    477 	jnz	L(exit_1_3)
    478 	test	$0x02, %al
    479 	jnz	L(exit_1_2)
    480 
    481 	add	$0, %edx
    482 	jl	L(return_null)
    483 	mov	%ecx, %eax
    484 	ret
    485 
    486 	.p2align 4
    487 L(matches32_1):
    488 	lea	-32(%edx), %edx
    489 	lea	32(%ecx), %ecx
    490 
    491 	test	%ah, %ah
    492 	jnz	L(exit_dispatch_1_high)
    493 	mov	%al, %ah
    494 	and	$15 << 4, %ah
    495 	jnz	L(exit_dispatch_1_8)
    496 	test	$0x08, %al
    497 	jnz	L(exit_1_4)
    498 	test	$0x04, %al
    499 	jnz	L(exit_1_3)
    500 	test	$0x02, %al
    501 	jnz	L(exit_1_2)
    502 
    503 	add	$0, %edx
    504 	jl	L(return_null)
    505 	mov	%ecx, %eax
    506 	ret
    507 
    508 	.p2align 4
    509 L(matches48_1):
    510 	lea	-16(%edx), %edx
    511 	lea	48(%ecx), %ecx
    512 
    513 	.p2align 4
    514 L(exit_dispatch_1):
    515 	test	%ah, %ah
    516 	jnz	L(exit_dispatch_1_high)
    517 	mov	%al, %ah
    518 	and	$15 << 4, %ah
    519 	jnz	L(exit_dispatch_1_8)
    520 	test	$0x08, %al
    521 	jnz	L(exit_1_4)
    522 	test	$0x04, %al
    523 	jnz	L(exit_1_3)
    524 	test	$0x02, %al
    525 	jnz	L(exit_1_2)
    526 
    527 	add	$0, %edx
    528 	jl	L(return_null)
    529 	mov	%ecx, %eax
    530 	ret
    531 
    532 	.p2align 4
    533 L(exit_dispatch_1_8):
    534 	test	$0x80, %al
    535 	jnz	L(exit_1_8)
    536 	test	$0x40, %al
    537 	jnz	L(exit_1_7)
    538 	test	$0x20, %al
    539 	jnz	L(exit_1_6)
    540 
    541 	add	$4, %edx
    542 	jl	L(return_null)
    543 	lea	4(%ecx), %eax
    544 	ret
    545 
    546 	.p2align 4
    547 L(exit_dispatch_1_high):
    548 	mov	%ah, %al
    549 	and	$15 << 4, %al
    550 	jnz	L(exit_dispatch_1_high_8)
    551 	test	$0x08, %ah
    552 	jnz	L(exit_1_12)
    553 	test	$0x04, %ah
    554 	jnz	L(exit_1_11)
    555 	test	$0x02, %ah
    556 	jnz	L(exit_1_10)
    557 
    558 	add	$8, %edx
    559 	jl	L(return_null)
    560 	lea	8(%ecx), %eax
    561 	ret
    562 
    563 	.p2align 4
    564 L(exit_dispatch_1_high_8):
    565 	test	$0x80, %ah
    566 	jnz	L(exit_1_16)
    567 	test	$0x40, %ah
    568 	jnz	L(exit_1_15)
    569 	test	$0x20, %ah
    570 	jnz	L(exit_1_14)
    571 
    572 	add	$12, %edx
    573 	jl	L(return_null)
    574 	lea	12(%ecx), %eax
    575 	ret
    576 
    577 	.p2align 4
    578 L(exit_1_2):
    579 	add	$1, %edx
    580 	jl	L(return_null)
    581 	lea	1(%ecx), %eax
    582 	ret
    583 
    584 	.p2align 4
    585 L(exit_1_3):
    586 	add	$2, %edx
    587 	jl	L(return_null)
    588 	lea	2(%ecx), %eax
    589 	ret
    590 
    591 	.p2align 4
    592 L(exit_1_4):
    593 	add	$3, %edx
    594 	jl	L(return_null)
    595 	lea	3(%ecx), %eax
    596 	ret
    597 
    598 	.p2align 4
    599 L(exit_1_6):
    600 	add	$5, %edx
    601 	jl	L(return_null)
    602 	lea	5(%ecx), %eax
    603 	ret
    604 
    605 	.p2align 4
    606 L(exit_1_7):
    607 	add	$6, %edx
    608 	jl	L(return_null)
    609 	lea	6(%ecx), %eax
    610 	ret
    611 
    612 	.p2align 4
    613 L(exit_1_8):
    614 	add	$7, %edx
    615 	jl	L(return_null)
    616 	lea	7(%ecx), %eax
    617 	ret
    618 
    619 	.p2align 4
    620 L(exit_1_10):
    621 	add	$9, %edx
    622 	jl	L(return_null)
    623 	lea	9(%ecx), %eax
    624 	ret
    625 
    626 	.p2align 4
    627 L(exit_1_11):
    628 	add	$10, %edx
    629 	jl	L(return_null)
    630 	lea	10(%ecx), %eax
    631 	ret
    632 
    633 	.p2align 4
    634 L(exit_1_12):
    635 	add	$11, %edx
    636 	jl	L(return_null)
    637 	lea	11(%ecx), %eax
    638 	ret
    639 
    640 	.p2align 4
    641 L(exit_1_14):
    642 	add	$13, %edx
    643 	jl	L(return_null)
    644 	lea	13(%ecx), %eax
    645 	ret
    646 
    647 	.p2align 4
    648 L(exit_1_15):
    649 	add	$14, %edx
    650 	jl	L(return_null)
    651 	lea	14(%ecx), %eax
    652 	ret
    653 
    654 	.p2align 4
    655 L(exit_1_16):
    656 	add	$15, %edx
    657 	jl	L(return_null)
    658 	lea	15(%ecx), %eax
    659 	ret
    660 
    661 	.p2align 4
    662 L(return_null):
    663 	xor	%eax, %eax
    664 	ret
    665 
    666 	.p2align 4
    667 L(length_less16_offset0):
    668 	mov	%dl, %cl
    669 	pcmpeqb	(%eax), %xmm1
    670 
    671 	mov	$1, %edx
    672 	sal	%cl, %edx
    673 	sub	$1, %edx
    674 
    675 	mov	%eax, %ecx
    676 	pmovmskb %xmm1, %eax
    677 
    678 	and	%edx, %eax
    679 	test	%eax, %eax
    680 	jnz	L(exit_dispatch)
    681 
    682 	xor	%eax, %eax
    683 	ret
    684 
    685 	.p2align 4
    686 L(length_less16):
    687 	punpcklbw %xmm1, %xmm1
    688 	add	$16, %edx
    689 	punpcklbw %xmm1, %xmm1
    690 
    691 	mov	%ecx, %eax
    692 	pshufd	$0, %xmm1, %xmm1
    693 
    694 	and	$15, %ecx
    695 	jz	L(length_less16_offset0)
    696 
    697 	PUSH	(%edi)
    698 
    699 	mov	%cl, %dh
    700 	add	%dl, %dh
    701 	and	$-16, %eax
    702 
    703 	sub	$16, %dh
    704 	ja	L(length_less16_part2)
    705 
    706 	pcmpeqb	(%eax), %xmm1
    707 	pmovmskb %xmm1, %edi
    708 
    709 	sar	%cl, %edi
    710 	add	%ecx, %eax
    711 	mov	%dl, %cl
    712 
    713 	mov	$1, %edx
    714 	sal	%cl, %edx
    715 	sub	$1, %edx
    716 
    717 	and	%edx, %edi
    718 	test	%edi, %edi
    719 	jz	L(ret_null)
    720 
    721 	bsr	%edi, %edi
    722 	add	%edi, %eax
    723 	POP	(%edi)
    724 	ret
    725 
    726 	CFI_PUSH     (%edi)
    727 
    728 	.p2align 4
    729 L(length_less16_part2):
    730 	movdqa	16(%eax), %xmm2
    731 	pcmpeqb	%xmm1, %xmm2
    732 	pmovmskb %xmm2, %edi
    733 
    734 	mov	%cl, %ch
    735 
    736 	mov	%dh, %cl
    737 	mov	$1, %edx
    738 	sal	%cl, %edx
    739 	sub	$1, %edx
    740 
    741 	and	%edx, %edi
    742 
    743 	test	%edi, %edi
    744 	jnz	L(length_less16_part2_return)
    745 
    746 	pcmpeqb	(%eax), %xmm1
    747 	pmovmskb %xmm1, %edi
    748 
    749 	mov	%ch, %cl
    750 	sar	%cl, %edi
    751 	test	%edi, %edi
    752 	jz	L(ret_null)
    753 
    754 	bsr	%edi, %edi
    755 	add	%edi, %eax
    756 	xor	%ch, %ch
    757 	add	%ecx, %eax
    758 	POP	(%edi)
    759 	ret
    760 
    761 	CFI_PUSH     (%edi)
    762 
    763 	.p2align 4
    764 L(length_less16_part2_return):
    765 	bsr	%edi, %edi
    766 	lea	16(%eax, %edi), %eax
    767 	POP	(%edi)
    768 	ret
    769 
    770 	CFI_PUSH     (%edi)
    771 
    772 	.p2align 4
    773 L(ret_null):
    774 	xor	%eax, %eax
    775 	POP	(%edi)
    776 	ret
    777 
    778 END (memrchr)
    779