Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2011 Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc	.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc	.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)	.cfi_restore reg
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef ENTRY
     56 # define ENTRY(name)	\
     57 	.type name, @function;	\
     58 	.globl name;	\
     59 	.p2align 4;	\
     60 name:	\
     61 	cfi_startproc
     62 #endif
     63 
     64 #ifndef END
     65 # define END(name)	\
     66 	cfi_endproc;	\
     67 	.size name, .-name
     68 #endif
     69 
     70 #define CFI_PUSH(REG)	\
     71 	cfi_adjust_cfa_offset (4);	\
     72 	cfi_rel_offset (REG, 0)
     73 
     74 #define CFI_POP(REG)	\
     75 	cfi_adjust_cfa_offset (-4);	\
     76 	cfi_restore (REG)
     77 
     78 #define PUSH(REG) pushl REG; CFI_PUSH (REG)
     79 #define POP(REG) popl REG; CFI_POP (REG)
     80 
     81 #define ENTRANCE PUSH(%esi); PUSH(%edi)
     82 #define RETURN  POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
     83 #define PARMS  4
     84 #define STR1  PARMS
     85 #define STR2  STR1+4
     86 
     87 	.text
     88 ENTRY (wcscmp)
     89 /*
     90 	* This implementation uses SSE to compare up to 16 bytes at a time.
     91 */
     92 	mov	STR1(%esp), %edx
     93 	mov	STR2(%esp), %eax
     94 
     95 	mov	(%eax), %ecx
     96 	cmp	%ecx, (%edx)
     97 	jne	L(neq)
     98 	test	%ecx, %ecx
     99 	jz	L(eq)
    100 
    101 	mov	4(%eax), %ecx
    102 	cmp	%ecx, 4(%edx)
    103 	jne	L(neq)
    104 	test	%ecx, %ecx
    105 	jz	L(eq)
    106 
    107 	mov	8(%eax), %ecx
    108 	cmp	%ecx, 8(%edx)
    109 	jne	L(neq)
    110 	test	%ecx, %ecx
    111 	jz	L(eq)
    112 
    113 	mov	12(%eax), %ecx
    114 	cmp	%ecx, 12(%edx)
    115 	jne	L(neq)
    116 	test	%ecx, %ecx
    117 	jz	L(eq)
    118 
    119 	ENTRANCE
    120 	add	$16, %eax
    121 	add	$16, %edx
    122 
    123 	mov	%eax, %esi
    124 	mov	%edx, %edi
    125 	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
    126 	mov	%al, %ch
    127 	mov	%dl, %cl
    128 	and	$63, %eax		/* esi alignment in cache line */
    129 	and	$63, %edx		/* edi alignment in cache line */
    130 	and	$15, %cl
    131 	jz	L(continue_00)
    132 	cmp	$16, %edx
    133 	jb	L(continue_0)
    134 	cmp	$32, %edx
    135 	jb	L(continue_16)
    136 	cmp	$48, %edx
    137 	jb	L(continue_32)
    138 
    139 L(continue_48):
    140 	and	$15, %ch
    141 	jz	L(continue_48_00)
    142 	cmp	$16, %eax
    143 	jb	L(continue_0_48)
    144 	cmp	$32, %eax
    145 	jb	L(continue_16_48)
    146 	cmp	$48, %eax
    147 	jb	L(continue_32_48)
    148 
    149 	.p2align 4
    150 L(continue_48_48):
    151 	mov	(%esi), %ecx
    152 	cmp	%ecx, (%edi)
    153 	jne	L(nequal)
    154 	test	%ecx, %ecx
    155 	jz	L(equal)
    156 
    157 	mov	4(%esi), %ecx
    158 	cmp	%ecx, 4(%edi)
    159 	jne	L(nequal)
    160 	test	%ecx, %ecx
    161 	jz	L(equal)
    162 
    163 	mov	8(%esi), %ecx
    164 	cmp	%ecx, 8(%edi)
    165 	jne	L(nequal)
    166 	test	%ecx, %ecx
    167 	jz	L(equal)
    168 
    169 	mov	12(%esi), %ecx
    170 	cmp	%ecx, 12(%edi)
    171 	jne	L(nequal)
    172 	test	%ecx, %ecx
    173 	jz	L(equal)
    174 
    175 	movdqu	16(%edi), %xmm1
    176 	movdqu	16(%esi), %xmm2
    177 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    178 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    179 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    180 	pmovmskb %xmm1, %edx
    181 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    182 	jnz	L(less4_double_words_16)
    183 
    184 	movdqu	32(%edi), %xmm1
    185 	movdqu	32(%esi), %xmm2
    186 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    187 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    188 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    189 	pmovmskb %xmm1, %edx
    190 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    191 	jnz	L(less4_double_words_32)
    192 
    193 	movdqu	48(%edi), %xmm1
    194 	movdqu	48(%esi), %xmm2
    195 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    196 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    197 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    198 	pmovmskb %xmm1, %edx
    199 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    200 	jnz	L(less4_double_words_48)
    201 
    202 	add	$64, %esi
    203 	add	$64, %edi
    204 	jmp	L(continue_48_48)
    205 
    206 L(continue_0):
    207 	and	$15, %ch
    208 	jz	L(continue_0_00)
    209 	cmp	$16, %eax
    210 	jb	L(continue_0_0)
    211 	cmp	$32, %eax
    212 	jb	L(continue_0_16)
    213 	cmp	$48, %eax
    214 	jb	L(continue_0_32)
    215 
    216 	.p2align 4
    217 L(continue_0_48):
    218 	mov	(%esi), %ecx
    219 	cmp	%ecx, (%edi)
    220 	jne	L(nequal)
    221 	test	%ecx, %ecx
    222 	jz	L(equal)
    223 
    224 	mov	4(%esi), %ecx
    225 	cmp	%ecx, 4(%edi)
    226 	jne	L(nequal)
    227 	test	%ecx, %ecx
    228 	jz	L(equal)
    229 
    230 	mov	8(%esi), %ecx
    231 	cmp	%ecx, 8(%edi)
    232 	jne	L(nequal)
    233 	test	%ecx, %ecx
    234 	jz	L(equal)
    235 
    236 	mov	12(%esi), %ecx
    237 	cmp	%ecx, 12(%edi)
    238 	jne	L(nequal)
    239 	test	%ecx, %ecx
    240 	jz	L(equal)
    241 
    242 	movdqu	16(%edi), %xmm1
    243 	movdqu	16(%esi), %xmm2
    244 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    245 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    246 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    247 	pmovmskb %xmm1, %edx
    248 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    249 	jnz	L(less4_double_words_16)
    250 
    251 	movdqu	32(%edi), %xmm1
    252 	movdqu	32(%esi), %xmm2
    253 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    254 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    255 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    256 	pmovmskb %xmm1, %edx
    257 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    258 	jnz	L(less4_double_words_32)
    259 
    260 	mov	48(%esi), %ecx
    261 	cmp	%ecx, 48(%edi)
    262 	jne	L(nequal)
    263 	test	%ecx, %ecx
    264 	jz	L(equal)
    265 
    266 	mov	52(%esi), %ecx
    267 	cmp	%ecx, 52(%edi)
    268 	jne	L(nequal)
    269 	test	%ecx, %ecx
    270 	jz	L(equal)
    271 
    272 	mov	56(%esi), %ecx
    273 	cmp	%ecx, 56(%edi)
    274 	jne	L(nequal)
    275 	test	%ecx, %ecx
    276 	jz	L(equal)
    277 
    278 	mov	60(%esi), %ecx
    279 	cmp	%ecx, 60(%edi)
    280 	jne	L(nequal)
    281 	test	%ecx, %ecx
    282 	jz	L(equal)
    283 
    284 	add	$64, %esi
    285 	add	$64, %edi
    286 	jmp	L(continue_0_48)
    287 
    288 	.p2align 4
    289 L(continue_00):
    290 	and	$15, %ch
    291 	jz	L(continue_00_00)
    292 	cmp	$16, %eax
    293 	jb	L(continue_00_0)
    294 	cmp	$32, %eax
    295 	jb	L(continue_00_16)
    296 	cmp	$48, %eax
    297 	jb	L(continue_00_32)
    298 
    299 	.p2align 4
    300 L(continue_00_48):
    301 	pcmpeqd	(%edi), %xmm0
    302 	mov	(%edi), %eax
    303 	pmovmskb %xmm0, %ecx
    304 	test	%ecx, %ecx
    305 	jnz	L(less4_double_words1)
    306 
    307 	cmp	(%esi), %eax
    308 	jne	L(nequal)
    309 
    310 	mov	4(%edi), %eax
    311 	cmp	4(%esi), %eax
    312 	jne	L(nequal)
    313 
    314 	mov	8(%edi), %eax
    315 	cmp	8(%esi), %eax
    316 	jne	L(nequal)
    317 
    318 	mov	12(%edi), %eax
    319 	cmp	12(%esi), %eax
    320 	jne	L(nequal)
    321 
    322 	movdqu	16(%esi), %xmm2
    323 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
    324 	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
    325 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
    326 	pmovmskb %xmm2, %edx
    327 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    328 	jnz	L(less4_double_words_16)
    329 
    330 	movdqu	32(%esi), %xmm2
    331 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
    332 	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
    333 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
    334 	pmovmskb %xmm2, %edx
    335 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    336 	jnz	L(less4_double_words_32)
    337 
    338 	movdqu	48(%esi), %xmm2
    339 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
    340 	pcmpeqd	48(%edi), %xmm2		/* compare first 4 double_words for equality */
    341 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
    342 	pmovmskb %xmm2, %edx
    343 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    344 	jnz	L(less4_double_words_48)
    345 
    346 	add	$64, %esi
    347 	add	$64, %edi
    348 	jmp	L(continue_00_48)
    349 
    350 	.p2align 4
    351 L(continue_32):
    352 	and	$15, %ch
    353 	jz	L(continue_32_00)
    354 	cmp	$16, %eax
    355 	jb	L(continue_0_32)
    356 	cmp	$32, %eax
    357 	jb	L(continue_16_32)
    358 	cmp	$48, %eax
    359 	jb	L(continue_32_32)
    360 
    361 	.p2align 4
    362 L(continue_32_48):
    363 	mov	(%esi), %ecx
    364 	cmp	%ecx, (%edi)
    365 	jne	L(nequal)
    366 	test	%ecx, %ecx
    367 	jz	L(equal)
    368 
    369 	mov	4(%esi), %ecx
    370 	cmp	%ecx, 4(%edi)
    371 	jne	L(nequal)
    372 	test	%ecx, %ecx
    373 	jz	L(equal)
    374 
    375 	mov	8(%esi), %ecx
    376 	cmp	%ecx, 8(%edi)
    377 	jne	L(nequal)
    378 	test	%ecx, %ecx
    379 	jz	L(equal)
    380 
    381 	mov	12(%esi), %ecx
    382 	cmp	%ecx, 12(%edi)
    383 	jne	L(nequal)
    384 	test	%ecx, %ecx
    385 	jz	L(equal)
    386 
    387 	mov	16(%esi), %ecx
    388 	cmp	%ecx, 16(%edi)
    389 	jne	L(nequal)
    390 	test	%ecx, %ecx
    391 	jz	L(equal)
    392 
    393 	mov	20(%esi), %ecx
    394 	cmp	%ecx, 20(%edi)
    395 	jne	L(nequal)
    396 	test	%ecx, %ecx
    397 	jz	L(equal)
    398 
    399 	mov	24(%esi), %ecx
    400 	cmp	%ecx, 24(%edi)
    401 	jne	L(nequal)
    402 	test	%ecx, %ecx
    403 	jz	L(equal)
    404 
    405 	mov	28(%esi), %ecx
    406 	cmp	%ecx, 28(%edi)
    407 	jne	L(nequal)
    408 	test	%ecx, %ecx
    409 	jz	L(equal)
    410 
    411 	movdqu	32(%edi), %xmm1
    412 	movdqu	32(%esi), %xmm2
    413 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    414 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    415 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    416 	pmovmskb %xmm1, %edx
    417 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    418 	jnz	L(less4_double_words_32)
    419 
    420 	movdqu	48(%edi), %xmm1
    421 	movdqu	48(%esi), %xmm2
    422 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    423 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    424 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    425 	pmovmskb %xmm1, %edx
    426 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    427 	jnz	L(less4_double_words_48)
    428 
    429 	add	$64, %esi
    430 	add	$64, %edi
    431 	jmp	L(continue_32_48)
    432 
    433 	.p2align 4
    434 L(continue_16):
    435 	and	$15, %ch
    436 	jz	L(continue_16_00)
    437 	cmp	$16, %eax
    438 	jb	L(continue_0_16)
    439 	cmp	$32, %eax
    440 	jb	L(continue_16_16)
    441 	cmp	$48, %eax
    442 	jb	L(continue_16_32)
    443 
    444 	.p2align 4
    445 L(continue_16_48):
    446 	mov	(%esi), %ecx
    447 	cmp	%ecx, (%edi)
    448 	jne	L(nequal)
    449 	test	%ecx, %ecx
    450 	jz	L(equal)
    451 
    452 	mov	4(%esi), %ecx
    453 	cmp	%ecx, 4(%edi)
    454 	jne	L(nequal)
    455 	test	%ecx, %ecx
    456 	jz	L(equal)
    457 
    458 	mov	8(%esi), %ecx
    459 	cmp	%ecx, 8(%edi)
    460 	jne	L(nequal)
    461 	test	%ecx, %ecx
    462 	jz	L(equal)
    463 
    464 	mov	12(%esi), %ecx
    465 	cmp	%ecx, 12(%edi)
    466 	jne	L(nequal)
    467 	test	%ecx, %ecx
    468 	jz	L(equal)
    469 
    470 	movdqu	16(%edi), %xmm1
    471 	movdqu	16(%esi), %xmm2
    472 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    473 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    474 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    475 	pmovmskb %xmm1, %edx
    476 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    477 	jnz	L(less4_double_words_16)
    478 
    479 	mov	32(%esi), %ecx
    480 	cmp	%ecx, 32(%edi)
    481 	jne	L(nequal)
    482 	test	%ecx, %ecx
    483 	jz	L(equal)
    484 
    485 	mov	36(%esi), %ecx
    486 	cmp	%ecx, 36(%edi)
    487 	jne	L(nequal)
    488 	test	%ecx, %ecx
    489 	jz	L(equal)
    490 
    491 	mov	40(%esi), %ecx
    492 	cmp	%ecx, 40(%edi)
    493 	jne	L(nequal)
    494 	test	%ecx, %ecx
    495 	jz	L(equal)
    496 
    497 	mov	44(%esi), %ecx
    498 	cmp	%ecx, 44(%edi)
    499 	jne	L(nequal)
    500 	test	%ecx, %ecx
    501 	jz	L(equal)
    502 
    503 	movdqu	48(%edi), %xmm1
    504 	movdqu	48(%esi), %xmm2
    505 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    506 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    507 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    508 	pmovmskb %xmm1, %edx
    509 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    510 	jnz	L(less4_double_words_48)
    511 
    512 	add	$64, %esi
    513 	add	$64, %edi
    514 	jmp	L(continue_16_48)
    515 
    516 	.p2align 4
    517 L(continue_00_00):
    518 	movdqa	(%edi), %xmm1
    519 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    520 	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
    521 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    522 	pmovmskb %xmm1, %edx
    523 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    524 	jnz	L(less4_double_words)
    525 
    526 	movdqa	16(%edi), %xmm3
    527 	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
    528 	pcmpeqd	16(%esi), %xmm3		/* compare first 4 double_words for equality */
    529 	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
    530 	pmovmskb %xmm3, %edx
    531 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    532 	jnz	L(less4_double_words_16)
    533 
    534 	movdqa	32(%edi), %xmm5
    535 	pcmpeqd	%xmm5, %xmm0		/* Any null double_word? */
    536 	pcmpeqd	32(%esi), %xmm5		/* compare first 4 double_words for equality */
    537 	psubb	%xmm0, %xmm5		/* packed sub of comparison results*/
    538 	pmovmskb %xmm5, %edx
    539 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    540 	jnz	L(less4_double_words_32)
    541 
    542 	movdqa	48(%edi), %xmm1
    543 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    544 	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
    545 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    546 	pmovmskb %xmm1, %edx
    547 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    548 	jnz	L(less4_double_words_48)
    549 
    550 	add	$64, %esi
    551 	add	$64, %edi
    552 	jmp	L(continue_00_00)
    553 
    554 	.p2align 4
    555 L(continue_00_32):
    556 	movdqu	(%esi), %xmm2
    557 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
    558 	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
    559 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
    560 	pmovmskb %xmm2, %edx
    561 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    562 	jnz	L(less4_double_words)
    563 
    564 	add	$16, %esi
    565 	add	$16, %edi
    566 	jmp	L(continue_00_48)
    567 
    568 	.p2align 4
    569 L(continue_00_16):
    570 	movdqu	(%esi), %xmm2
    571 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
    572 	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
    573 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
    574 	pmovmskb %xmm2, %edx
    575 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    576 	jnz	L(less4_double_words)
    577 
    578 	movdqu	16(%esi), %xmm2
    579 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
    580 	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
    581 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
    582 	pmovmskb %xmm2, %edx
    583 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    584 	jnz	L(less4_double_words_16)
    585 
    586 	add	$32, %esi
    587 	add	$32, %edi
    588 	jmp	L(continue_00_48)
    589 
    590 	.p2align 4
    591 L(continue_00_0):
    592 	movdqu	(%esi), %xmm2
    593 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
    594 	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
    595 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
    596 	pmovmskb %xmm2, %edx
    597 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    598 	jnz	L(less4_double_words)
    599 
    600 	movdqu	16(%esi), %xmm2
    601 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
    602 	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
    603 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
    604 	pmovmskb %xmm2, %edx
    605 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    606 	jnz	L(less4_double_words_16)
    607 
    608 	movdqu	32(%esi), %xmm2
    609 	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
    610 	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
    611 	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
    612 	pmovmskb %xmm2, %edx
    613 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    614 	jnz	L(less4_double_words_32)
    615 
    616 	add	$48, %esi
    617 	add	$48, %edi
    618 	jmp	L(continue_00_48)
    619 
    620 	.p2align 4
    621 L(continue_48_00):
    622 	pcmpeqd	(%esi), %xmm0
    623 	mov	(%edi), %eax
    624 	pmovmskb %xmm0, %ecx
    625 	test	%ecx, %ecx
    626 	jnz	L(less4_double_words1)
    627 
    628 	cmp	(%esi), %eax
    629 	jne	L(nequal)
    630 
    631 	mov	4(%edi), %eax
    632 	cmp	4(%esi), %eax
    633 	jne	L(nequal)
    634 
    635 	mov	8(%edi), %eax
    636 	cmp	8(%esi), %eax
    637 	jne	L(nequal)
    638 
    639 	mov	12(%edi), %eax
    640 	cmp	12(%esi), %eax
    641 	jne	L(nequal)
    642 
    643 	movdqu	16(%edi), %xmm1
    644 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    645 	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
    646 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    647 	pmovmskb %xmm1, %edx
    648 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    649 	jnz	L(less4_double_words_16)
    650 
    651 	movdqu	32(%edi), %xmm1
    652 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    653 	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
    654 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    655 	pmovmskb %xmm1, %edx
    656 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    657 	jnz	L(less4_double_words_32)
    658 
    659 	movdqu	48(%edi), %xmm1
    660 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    661 	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
    662 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    663 	pmovmskb %xmm1, %edx
    664 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    665 	jnz	L(less4_double_words_48)
    666 
    667 	add	$64, %esi
    668 	add	$64, %edi
    669 	jmp	L(continue_48_00)
    670 
    671 	.p2align 4
    672 L(continue_32_00):
    673 	movdqu	(%edi), %xmm1
    674 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    675 	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
    676 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    677 	pmovmskb %xmm1, %edx
    678 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    679 	jnz	L(less4_double_words)
    680 
    681 	add	$16, %esi
    682 	add	$16, %edi
    683 	jmp	L(continue_48_00)
    684 
    685 	.p2align 4
    686 L(continue_16_00):
    687 	movdqu	(%edi), %xmm1
    688 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    689 	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
    690 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    691 	pmovmskb %xmm1, %edx
    692 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    693 	jnz	L(less4_double_words)
    694 
    695 	movdqu	16(%edi), %xmm1
    696 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    697 	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
    698 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    699 	pmovmskb %xmm1, %edx
    700 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    701 	jnz	L(less4_double_words_16)
    702 
    703 	add	$32, %esi
    704 	add	$32, %edi
    705 	jmp	L(continue_48_00)
    706 
    707 	.p2align 4
    708 L(continue_0_00):
    709 	movdqu	(%edi), %xmm1
    710 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    711 	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
    712 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    713 	pmovmskb %xmm1, %edx
    714 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    715 	jnz	L(less4_double_words)
    716 
    717 	movdqu	16(%edi), %xmm1
    718 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    719 	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
    720 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    721 	pmovmskb %xmm1, %edx
    722 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    723 	jnz	L(less4_double_words_16)
    724 
    725 	movdqu	32(%edi), %xmm1
    726 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    727 	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
    728 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    729 	pmovmskb %xmm1, %edx
    730 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    731 	jnz	L(less4_double_words_32)
    732 
    733 	add	$48, %esi
    734 	add	$48, %edi
    735 	jmp	L(continue_48_00)
    736 
    737 	.p2align 4
    738 L(continue_32_32):
    739 	movdqu	(%edi), %xmm1
    740 	movdqu	(%esi), %xmm2
    741 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    742 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    743 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    744 	pmovmskb %xmm1, %edx
    745 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    746 	jnz	L(less4_double_words)
    747 
    748 	add	$16, %esi
    749 	add	$16, %edi
    750 	jmp	L(continue_48_48)
    751 
    752 	.p2align 4
    753 L(continue_16_16):
    754 	movdqu	(%edi), %xmm1
    755 	movdqu	(%esi), %xmm2
    756 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    757 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    758 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    759 	pmovmskb %xmm1, %edx
    760 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    761 	jnz	L(less4_double_words)
    762 
    763 	movdqu	16(%edi), %xmm3
    764 	movdqu	16(%esi), %xmm4
    765 	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
    766 	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
    767 	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
    768 	pmovmskb %xmm3, %edx
    769 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    770 	jnz	L(less4_double_words_16)
    771 
    772 	add	$32, %esi
    773 	add	$32, %edi
    774 	jmp	L(continue_48_48)
    775 
    776 	.p2align 4
    777 L(continue_0_0):
    778 	movdqu	(%edi), %xmm1
    779 	movdqu	(%esi), %xmm2
    780 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    781 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    782 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    783 	pmovmskb %xmm1, %edx
    784 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    785 	jnz	L(less4_double_words)
    786 
    787 	movdqu	16(%edi), %xmm3
    788 	movdqu	16(%esi), %xmm4
    789 	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
    790 	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
    791 	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
    792 	pmovmskb %xmm3, %edx
    793 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    794 	jnz	L(less4_double_words_16)
    795 
    796 	movdqu	32(%edi), %xmm1
    797 	movdqu	32(%esi), %xmm2
    798 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    799 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    800 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    801 	pmovmskb %xmm1, %edx
    802 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    803 	jnz	L(less4_double_words_32)
    804 
    805 	add	$48, %esi
    806 	add	$48, %edi
    807 	jmp	L(continue_48_48)
    808 
    809 	.p2align 4
    810 L(continue_0_16):
    811 	movdqu	(%edi), %xmm1
    812 	movdqu	(%esi), %xmm2
    813 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    814 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    815 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    816 	pmovmskb %xmm1, %edx
    817 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    818 	jnz	L(less4_double_words)
    819 
    820 	movdqu	16(%edi), %xmm1
    821 	movdqu	16(%esi), %xmm2
    822 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    823 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    824 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    825 	pmovmskb %xmm1, %edx
    826 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    827 	jnz	L(less4_double_words_16)
    828 
    829 	add	$32, %esi
    830 	add	$32, %edi
    831 	jmp	L(continue_32_48)
    832 
    833 	.p2align 4
    834 L(continue_0_32):
    835 	movdqu	(%edi), %xmm1
    836 	movdqu	(%esi), %xmm2
    837 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    838 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    839 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    840 	pmovmskb %xmm1, %edx
    841 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    842 	jnz	L(less4_double_words)
    843 
    844 	add	$16, %esi
    845 	add	$16, %edi
    846 	jmp	L(continue_16_48)
    847 
    848 	.p2align 4
    849 L(continue_16_32):
    850 	movdqu	(%edi), %xmm1
    851 	movdqu	(%esi), %xmm2
    852 	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
    853 	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
    854 	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
    855 	pmovmskb %xmm1, %edx
    856 	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
    857 	jnz	L(less4_double_words)
    858 
    859 	add	$16, %esi
    860 	add	$16, %edi
    861 	jmp	L(continue_32_48)
    862 
    863 	.p2align 4
    864 L(less4_double_words1):
    865 	cmp	(%esi), %eax
    866 	jne	L(nequal)
    867 	test	%eax, %eax
    868 	jz	L(equal)
    869 
    870 	mov	4(%esi), %ecx
    871 	cmp	%ecx, 4(%edi)
    872 	jne	L(nequal)
    873 	test	%ecx, %ecx
    874 	jz	L(equal)
    875 
    876 	mov	8(%esi), %ecx
    877 	cmp	%ecx, 8(%edi)
    878 	jne	L(nequal)
    879 	test	%ecx, %ecx
    880 	jz	L(equal)
    881 
    882 	mov	12(%esi), %ecx
    883 	cmp	%ecx, 12(%edi)
    884 	jne	L(nequal)
    885 	xor	%eax, %eax
    886 	RETURN
    887 
    888 	.p2align 4
    889 L(less4_double_words):
    890 	xor	%eax, %eax
    891 	test	%dl, %dl
    892 	jz	L(next_two_double_words)
    893 	and	$15, %dl
    894 	jz	L(second_double_word)
    895 	mov	(%esi), %ecx
    896 	cmp	%ecx, (%edi)
    897 	jne	L(nequal)
    898 	RETURN
    899 
    900 	.p2align 4
    901 L(second_double_word):
    902 	mov	4(%esi), %ecx
    903 	cmp	%ecx, 4(%edi)
    904 	jne	L(nequal)
    905 	RETURN
    906 
    907 	.p2align 4
    908 L(next_two_double_words):
    909 	and	$15, %dh
    910 	jz	L(fourth_double_word)
    911 	mov	8(%esi), %ecx
    912 	cmp	%ecx, 8(%edi)
    913 	jne	L(nequal)
    914 	RETURN
    915 
    916 	.p2align 4
    917 L(fourth_double_word):
    918 	mov	12(%esi), %ecx
    919 	cmp	%ecx, 12(%edi)
    920 	jne	L(nequal)
    921 	RETURN
    922 
    923 	.p2align 4
    924 L(less4_double_words_16):
    925 	xor	%eax, %eax
    926 	test	%dl, %dl
    927 	jz	L(next_two_double_words_16)
    928 	and	$15, %dl
    929 	jz	L(second_double_word_16)
    930 	mov	16(%esi), %ecx
    931 	cmp	%ecx, 16(%edi)
    932 	jne	L(nequal)
    933 	RETURN
    934 
    935 	.p2align 4
    936 L(second_double_word_16):
    937 	mov	20(%esi), %ecx
    938 	cmp	%ecx, 20(%edi)
    939 	jne	L(nequal)
    940 	RETURN
    941 
    942 	.p2align 4
    943 L(next_two_double_words_16):
    944 	and	$15, %dh
    945 	jz	L(fourth_double_word_16)
    946 	mov	24(%esi), %ecx
    947 	cmp	%ecx, 24(%edi)
    948 	jne	L(nequal)
    949 	RETURN
    950 
    951 	.p2align 4
    952 L(fourth_double_word_16):
    953 	mov	28(%esi), %ecx
    954 	cmp	%ecx, 28(%edi)
    955 	jne	L(nequal)
    956 	RETURN
    957 
    958 	.p2align 4
    959 L(less4_double_words_32):
    960 	xor	%eax, %eax
    961 	test	%dl, %dl
    962 	jz	L(next_two_double_words_32)
    963 	and	$15, %dl
    964 	jz	L(second_double_word_32)
    965 	mov	32(%esi), %ecx
    966 	cmp	%ecx, 32(%edi)
    967 	jne	L(nequal)
    968 	RETURN
    969 
    970 	.p2align 4
    971 L(second_double_word_32):
    972 	mov	36(%esi), %ecx
    973 	cmp	%ecx, 36(%edi)
    974 	jne	L(nequal)
    975 	RETURN
    976 
    977 	.p2align 4
    978 L(next_two_double_words_32):
    979 	and	$15, %dh
    980 	jz	L(fourth_double_word_32)
    981 	mov	40(%esi), %ecx
    982 	cmp	%ecx, 40(%edi)
    983 	jne	L(nequal)
    984 	RETURN
    985 
    986 	.p2align 4
    987 L(fourth_double_word_32):
    988 	mov	44(%esi), %ecx
    989 	cmp	%ecx, 44(%edi)
    990 	jne	L(nequal)
    991 	RETURN
    992 
    993 	.p2align 4
    994 L(less4_double_words_48):
    995 	xor	%eax, %eax
    996 	test	%dl, %dl
    997 	jz	L(next_two_double_words_48)
    998 	and	$15, %dl
    999 	jz	L(second_double_word_48)
   1000 	mov	48(%esi), %ecx
   1001 	cmp	%ecx, 48(%edi)
   1002 	jne	L(nequal)
   1003 	RETURN
   1004 
   1005 	.p2align 4
   1006 L(second_double_word_48):
   1007 	mov	52(%esi), %ecx
   1008 	cmp	%ecx, 52(%edi)
   1009 	jne	L(nequal)
   1010 	RETURN
   1011 
   1012 	.p2align 4
   1013 L(next_two_double_words_48):
   1014 	and	$15, %dh
   1015 	jz	L(fourth_double_word_48)
   1016 	mov	56(%esi), %ecx
   1017 	cmp	%ecx, 56(%edi)
   1018 	jne	L(nequal)
   1019 	RETURN
   1020 
   1021 	.p2align 4
   1022 L(fourth_double_word_48):
   1023 	mov	60(%esi), %ecx
   1024 	cmp	%ecx, 60(%edi)
   1025 	jne	L(nequal)
   1026 	RETURN
   1027 
   1028 	.p2align 4
   1029 L(nequal):
   1030 	mov	$1, %eax
   1031 	jg	L(return)
   1032 	neg	%eax
   1033 	RETURN
   1034 
   1035 	.p2align 4
   1036 L(return):
   1037 	RETURN
   1038 
   1039 	.p2align 4
   1040 L(equal):
   1041 	xorl	%eax, %eax
   1042 	RETURN
   1043 
   1044 	CFI_POP (%edi)
   1045 	CFI_POP (%esi)
   1046 
   1047 	.p2align 4
   1048 L(neq):
   1049 	mov	$1, %eax
   1050 	jg	L(neq_bigger)
   1051 	neg	%eax
   1052 
   1053 L(neq_bigger):
   1054 	ret
   1055 
   1056 	.p2align 4
   1057 L(eq):
   1058 	xorl	%eax, %eax
   1059 	ret
   1060 
   1061 END (wcscmp)
   1062 
   1063