Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2011, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef USE_AS_STRCAT
     32 
     33 # ifndef L
     34 #  define L(label)	.L##label
     35 # endif
     36 
     37 # ifndef cfi_startproc
     38 #  define cfi_startproc	.cfi_startproc
     39 # endif
     40 
     41 # ifndef cfi_endproc
     42 #  define cfi_endproc	.cfi_endproc
     43 # endif
     44 
     45 # ifndef cfi_rel_offset
     46 #  define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     47 # endif
     48 
     49 # ifndef cfi_restore
     50 #  define cfi_restore(reg)	.cfi_restore reg
     51 # endif
     52 
     53 # ifndef cfi_adjust_cfa_offset
     54 #  define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     55 # endif
     56 
     57 # ifndef ENTRY
     58 #  define ENTRY(name)	\
     59 	.type name, @function;	\
     60 	.globl name;	\
     61 	.p2align 4;	\
     62 name:	\
     63 	cfi_startproc
     64 # endif
     65 
     66 # ifndef END
     67 #  define END(name)	\
     68 	cfi_endproc;	\
     69 	.size name, .-name
     70 # endif
     71 
     72 # define CFI_PUSH(REG)	\
     73 	cfi_adjust_cfa_offset (4);	\
     74 	cfi_rel_offset (REG, 0)
     75 
     76 # define CFI_POP(REG)	\
     77 	cfi_adjust_cfa_offset (-4);	\
     78 	cfi_restore (REG)
     79 
     80 # define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     81 # define POP(REG)	popl REG; CFI_POP (REG)
     82 
     83 # ifndef STRCPY
     84 #  define STRCPY  strcpy
     85 # endif
     86 
     87 # ifdef USE_AS_STRNCPY
     88 #  define PARMS  8
     89 #  define ENTRANCE PUSH (%ebx)
     90 #  define RETURN  POP (%ebx); ret; CFI_PUSH (%ebx);
     91 #  define RETURN1  POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
     92 # else
     93 #  define PARMS  4
     94 #  define ENTRANCE
     95 #  define RETURN  ret
     96 #  define RETURN1  POP (%edi); ret; CFI_PUSH (%edi)
     97 # endif
     98 
     99 # ifdef USE_AS_STPCPY
    100 #  define SAVE_RESULT(n)  lea	n(%edx), %eax
    101 #  define SAVE_RESULT_TAIL(n)  lea	n(%edx), %eax
    102 # else
    103 #  define SAVE_RESULT(n)  movl	%edi, %eax
    104 #  define SAVE_RESULT_TAIL(n)  movl	%edx, %eax
    105 # endif
    106 
    107 # define STR1  PARMS
    108 # define STR2  STR1+4
    109 # define LEN  STR2+4
    110 
    111 /* In this code following instructions are used for copying:
    112 	movb	- 1 byte
    113 	movw	- 2 byte
    114 	movl	- 4 byte
    115 	movlpd	- 8 byte
    116 	movaps	- 16 byte - requires 16 byte alignment
    117 	of	sourse and destination adresses.
    118 */
    119 
    120 .text
    121 ENTRY (STRCPY)
    122 	ENTRANCE
    123 	mov	STR1(%esp), %edx
    124 	mov	STR2(%esp), %ecx
    125 # ifdef USE_AS_STRNCPY
    126 	movl	LEN(%esp), %ebx
    127 	cmp	$8, %ebx
    128 	jbe	L(StrncpyExit8Bytes)
    129 # endif
    130 	cmpb	$0, (%ecx)
    131 	jz	L(ExitTail1)
    132 	cmpb	$0, 1(%ecx)
    133 	jz	L(ExitTail2)
    134 	cmpb	$0, 2(%ecx)
    135 	jz	L(ExitTail3)
    136 	cmpb	$0, 3(%ecx)
    137 	jz	L(ExitTail4)
    138 	cmpb	$0, 4(%ecx)
    139 	jz	L(ExitTail5)
    140 	cmpb	$0, 5(%ecx)
    141 	jz	L(ExitTail6)
    142 	cmpb	$0, 6(%ecx)
    143 	jz	L(ExitTail7)
    144 	cmpb	$0, 7(%ecx)
    145 	jz	L(ExitTail8)
    146 # ifdef USE_AS_STRNCPY
    147 	cmp	$16, %ebx
    148 	jb	L(StrncpyExit15Bytes)
    149 # endif
    150 	cmpb	$0, 8(%ecx)
    151 	jz	L(ExitTail9)
    152 	cmpb	$0, 9(%ecx)
    153 	jz	L(ExitTail10)
    154 	cmpb	$0, 10(%ecx)
    155 	jz	L(ExitTail11)
    156 	cmpb	$0, 11(%ecx)
    157 	jz	L(ExitTail12)
    158 	cmpb	$0, 12(%ecx)
    159 	jz	L(ExitTail13)
    160 	cmpb	$0, 13(%ecx)
    161 	jz	L(ExitTail14)
    162 	cmpb	$0, 14(%ecx)
    163 	jz	L(ExitTail15)
    164 # if defined USE_AS_STRNCPY && !defined USE_AS_STRLCPY
    165 	cmp	$16, %ebx
    166 	je	L(ExitTail16)
    167 # endif
    168 	cmpb	$0, 15(%ecx)
    169 	jz	L(ExitTail16)
    170 
    171 # if defined USE_AS_STRNCPY && defined USE_AS_STRLCPY
    172 	cmp	$16, %ebx
    173 	je	L(StrlcpyExitTail16)
    174 # endif
    175 
    176 	PUSH	(%edi)
    177 # ifndef USE_AS_STRLCPY
    178 	mov	%edx, %edi
    179 # else
    180 	mov	%ecx, %edi
    181 # endif
    182 #endif
    183 	PUSH	(%esi)
    184 #ifdef USE_AS_STRNCPY
    185 	mov	%ecx, %esi
    186 	sub	$16, %ebx
    187 	and	$0xf, %esi
    188 
    189 /* add 16 bytes ecx_offset to ebx */
    190 
    191 	add	%esi, %ebx
    192 #endif
    193 	lea	16(%ecx), %esi
    194 	and	$-16, %esi
    195 	pxor	%xmm0, %xmm0
    196 	movlpd	(%ecx), %xmm1
    197 	movlpd	%xmm1, (%edx)
    198 
    199 	pcmpeqb	(%esi), %xmm0
    200 	movlpd	8(%ecx), %xmm1
    201 	movlpd	%xmm1, 8(%edx)
    202 
    203 	pmovmskb %xmm0, %eax
    204 	sub	%ecx, %esi
    205 
    206 #ifdef USE_AS_STRNCPY
    207 	sub	$16, %ebx
    208 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    209 #endif
    210 	test	%eax, %eax
    211 	jnz	L(CopyFrom1To16Bytes)
    212 
    213 	mov	%edx, %eax
    214 	lea	16(%edx), %edx
    215 	and	$-16, %edx
    216 	sub	%edx, %eax
    217 
    218 #ifdef USE_AS_STRNCPY
    219 	add	%eax, %esi
    220 	lea	-1(%esi), %esi
    221 	and	$1<<31, %esi
    222 	test	%esi, %esi
    223 	jnz	L(ContinueCopy)
    224 	lea	16(%ebx), %ebx
    225 
    226 L(ContinueCopy):
    227 #endif
    228 	sub	%eax, %ecx
    229 	mov	%ecx, %eax
    230 	and	$0xf, %eax
    231 	mov	$0, %esi
    232 
    233 /* case: ecx_offset == edx_offset */
    234 
    235 	jz	L(Align16Both)
    236 
    237 	cmp	$8, %eax
    238 	jae	L(ShlHigh8)
    239 	cmp	$1, %eax
    240 	je	L(Shl1)
    241 	cmp	$2, %eax
    242 	je	L(Shl2)
    243 	cmp	$3, %eax
    244 	je	L(Shl3)
    245 	cmp	$4, %eax
    246 	je	L(Shl4)
    247 	cmp	$5, %eax
    248 	je	L(Shl5)
    249 	cmp	$6, %eax
    250 	je	L(Shl6)
    251 	jmp	L(Shl7)
    252 
    253 L(ShlHigh8):
    254 	je	L(Shl8)
    255 	cmp	$9, %eax
    256 	je	L(Shl9)
    257 	cmp	$10, %eax
    258 	je	L(Shl10)
    259 	cmp	$11, %eax
    260 	je	L(Shl11)
    261 	cmp	$12, %eax
    262 	je	L(Shl12)
    263 	cmp	$13, %eax
    264 	je	L(Shl13)
    265 	cmp	$14, %eax
    266 	je	L(Shl14)
    267 	jmp	L(Shl15)
    268 
    269 L(Align16Both):
    270 	movaps	(%ecx), %xmm1
    271 	movaps	16(%ecx), %xmm2
    272 	movaps	%xmm1, (%edx)
    273 	pcmpeqb	%xmm2, %xmm0
    274 	pmovmskb %xmm0, %eax
    275 	lea	16(%esi), %esi
    276 #ifdef USE_AS_STRNCPY
    277 	sub	$16, %ebx
    278 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    279 #endif
    280 	test	%eax, %eax
    281 	jnz	L(CopyFrom1To16Bytes)
    282 
    283 	movaps	16(%ecx, %esi), %xmm3
    284 	movaps	%xmm2, (%edx, %esi)
    285 	pcmpeqb	%xmm3, %xmm0
    286 	pmovmskb %xmm0, %eax
    287 	lea	16(%esi), %esi
    288 #ifdef USE_AS_STRNCPY
    289 	sub	$16, %ebx
    290 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    291 #endif
    292 	test	%eax, %eax
    293 	jnz	L(CopyFrom1To16Bytes)
    294 
    295 	movaps	16(%ecx, %esi), %xmm4
    296 	movaps	%xmm3, (%edx, %esi)
    297 	pcmpeqb	%xmm4, %xmm0
    298 	pmovmskb %xmm0, %eax
    299 	lea	16(%esi), %esi
    300 #ifdef USE_AS_STRNCPY
    301 	sub	$16, %ebx
    302 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    303 #endif
    304 	test	%eax, %eax
    305 	jnz	L(CopyFrom1To16Bytes)
    306 
    307 	movaps	16(%ecx, %esi), %xmm1
    308 	movaps	%xmm4, (%edx, %esi)
    309 	pcmpeqb	%xmm1, %xmm0
    310 	pmovmskb %xmm0, %eax
    311 	lea	16(%esi), %esi
    312 #ifdef USE_AS_STRNCPY
    313 	sub	$16, %ebx
    314 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    315 #endif
    316 	test	%eax, %eax
    317 	jnz	L(CopyFrom1To16Bytes)
    318 
    319 	movaps	16(%ecx, %esi), %xmm2
    320 	movaps	%xmm1, (%edx, %esi)
    321 	pcmpeqb	%xmm2, %xmm0
    322 	pmovmskb %xmm0, %eax
    323 	lea	16(%esi), %esi
    324 #ifdef USE_AS_STRNCPY
    325 	sub	$16, %ebx
    326 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    327 #endif
    328 	test	%eax, %eax
    329 	jnz	L(CopyFrom1To16Bytes)
    330 
    331 	movaps	16(%ecx, %esi), %xmm3
    332 	movaps	%xmm2, (%edx, %esi)
    333 	pcmpeqb	%xmm3, %xmm0
    334 	pmovmskb %xmm0, %eax
    335 	lea	16(%esi), %esi
    336 #ifdef USE_AS_STRNCPY
    337 	sub	$16, %ebx
    338 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    339 #endif
    340 	test	%eax, %eax
    341 	jnz	L(CopyFrom1To16Bytes)
    342 
    343 	movaps	%xmm3, (%edx, %esi)
    344 	mov	%ecx, %eax
    345 	lea	16(%ecx, %esi), %ecx
    346 	and	$-0x40, %ecx
    347 	sub	%ecx, %eax
    348 	sub	%eax, %edx
    349 #ifdef USE_AS_STRNCPY
    350 	lea	112(%ebx, %eax), %ebx
    351 #endif
    352 	mov	$-0x40, %esi
    353 
    354 L(Aligned64Loop):
    355 	movaps	(%ecx), %xmm2
    356 	movaps	32(%ecx), %xmm3
    357 	movaps	%xmm2, %xmm4
    358 	movaps	16(%ecx), %xmm5
    359 	movaps	%xmm3, %xmm6
    360 	movaps	48(%ecx), %xmm7
    361 	pminub	%xmm5, %xmm2
    362 	pminub	%xmm7, %xmm3
    363 	pminub	%xmm2, %xmm3
    364 	lea	64(%edx), %edx
    365 	pcmpeqb	%xmm0, %xmm3
    366 	lea	64(%ecx), %ecx
    367 	pmovmskb %xmm3, %eax
    368 #ifdef USE_AS_STRNCPY
    369 	sub	$64, %ebx
    370 	jbe	L(StrncpyLeaveCase2OrCase3)
    371 #endif
    372 	test	%eax, %eax
    373 	jnz	L(Aligned64Leave)
    374 	movaps	%xmm4, -64(%edx)
    375 	movaps	%xmm5, -48(%edx)
    376 	movaps	%xmm6, -32(%edx)
    377 	movaps	%xmm7, -16(%edx)
    378 	jmp	L(Aligned64Loop)
    379 
    380 L(Aligned64Leave):
    381 #ifdef USE_AS_STRNCPY
    382 	lea	48(%ebx), %ebx
    383 #endif
    384 	pcmpeqb	%xmm4, %xmm0
    385 	pmovmskb %xmm0, %eax
    386 	test	%eax, %eax
    387 	jnz	L(CopyFrom1To16Bytes)
    388 
    389 	pcmpeqb	%xmm5, %xmm0
    390 #ifdef USE_AS_STRNCPY
    391 	lea	-16(%ebx), %ebx
    392 #endif
    393 	pmovmskb %xmm0, %eax
    394 	movaps	%xmm4, -64(%edx)
    395 	lea	16(%esi), %esi
    396 	test	%eax, %eax
    397 	jnz	L(CopyFrom1To16Bytes)
    398 
    399 	pcmpeqb	%xmm6, %xmm0
    400 #ifdef USE_AS_STRNCPY
    401 	lea	-16(%ebx), %ebx
    402 #endif
    403 	pmovmskb %xmm0, %eax
    404 	movaps	%xmm5, -48(%edx)
    405 	lea	16(%esi), %esi
    406 	test	%eax, %eax
    407 	jnz	L(CopyFrom1To16Bytes)
    408 
    409 	movaps	%xmm6, -32(%edx)
    410 	pcmpeqb	%xmm7, %xmm0
    411 #ifdef USE_AS_STRNCPY
    412 	lea	-16(%ebx), %ebx
    413 #endif
    414 	pmovmskb %xmm0, %eax
    415 	lea	16(%esi), %esi
    416 	jmp	L(CopyFrom1To16Bytes)
    417 
    418 	.p2align 4
    419 L(Shl1):
    420 	movaps	-1(%ecx), %xmm1
    421 	movaps	15(%ecx), %xmm2
    422 L(Shl1Start):
    423 	pcmpeqb	%xmm2, %xmm0
    424 	pmovmskb %xmm0, %eax
    425 	movaps	%xmm2, %xmm3
    426 #ifdef USE_AS_STRNCPY
    427 	sub	$16, %ebx
    428 	jbe	L(StrncpyExit1Case2OrCase3)
    429 #endif
    430 	test	%eax, %eax
    431 	jnz	L(Shl1LoopExit)
    432 
    433 	palignr	$1, %xmm1, %xmm2
    434 	movaps	%xmm3, %xmm1
    435 	movaps	%xmm2, (%edx)
    436 	movaps	31(%ecx), %xmm2
    437 
    438 	pcmpeqb	%xmm2, %xmm0
    439 	lea	16(%edx), %edx
    440 	pmovmskb %xmm0, %eax
    441 	lea	16(%ecx), %ecx
    442 	movaps	%xmm2, %xmm3
    443 #ifdef USE_AS_STRNCPY
    444 	sub	$16, %ebx
    445 	jbe	L(StrncpyExit1Case2OrCase3)
    446 #endif
    447 	test	%eax, %eax
    448 	jnz	L(Shl1LoopExit)
    449 
    450 	palignr	$1, %xmm1, %xmm2
    451 	movaps	%xmm2, (%edx)
    452 	movaps	31(%ecx), %xmm2
    453 	movaps	%xmm3, %xmm1
    454 
    455 	pcmpeqb	%xmm2, %xmm0
    456 	lea	16(%edx), %edx
    457 	pmovmskb %xmm0, %eax
    458 	lea	16(%ecx), %ecx
    459 	movaps	%xmm2, %xmm3
    460 #ifdef USE_AS_STRNCPY
    461 	sub	$16, %ebx
    462 	jbe	L(StrncpyExit1Case2OrCase3)
    463 #endif
    464 	test	%eax, %eax
    465 	jnz	L(Shl1LoopExit)
    466 
    467 	palignr	$1, %xmm1, %xmm2
    468 	movaps	%xmm2, (%edx)
    469 	movaps	31(%ecx), %xmm2
    470 
    471 	pcmpeqb	%xmm2, %xmm0
    472 	lea	16(%edx), %edx
    473 	pmovmskb %xmm0, %eax
    474 	lea	16(%ecx), %ecx
    475 #ifdef USE_AS_STRNCPY
    476 	sub	$16, %ebx
    477 	jbe	L(StrncpyExit1Case2OrCase3)
    478 #endif
    479 	test	%eax, %eax
    480 	jnz	L(Shl1LoopExit)
    481 
    482 	palignr	$1, %xmm3, %xmm2
    483 	movaps	%xmm2, (%edx)
    484 	lea	31(%ecx), %ecx
    485 	lea	16(%edx), %edx
    486 
    487 	mov	%ecx, %eax
    488 	and	$-0x40, %ecx
    489 	sub	%ecx, %eax
    490 	lea	-15(%ecx), %ecx
    491 	sub	%eax, %edx
    492 #ifdef USE_AS_STRNCPY
    493 	add	%eax, %ebx
    494 #endif
    495 	movaps	-1(%ecx), %xmm1
    496 
    497 L(Shl1LoopStart):
    498 	movaps	15(%ecx), %xmm2
    499 	movaps	31(%ecx), %xmm3
    500 	movaps	%xmm3, %xmm6
    501 	movaps	47(%ecx), %xmm4
    502 	movaps	%xmm4, %xmm7
    503 	movaps	63(%ecx), %xmm5
    504 	pminub	%xmm2, %xmm6
    505 	pminub	%xmm5, %xmm7
    506 	pminub	%xmm6, %xmm7
    507 	pcmpeqb	%xmm0, %xmm7
    508 	pmovmskb %xmm7, %eax
    509 	movaps	%xmm5, %xmm7
    510 	palignr	$1, %xmm4, %xmm5
    511 	palignr	$1, %xmm3, %xmm4
    512 	test	%eax, %eax
    513 	jnz	L(Shl1Start)
    514 #ifdef USE_AS_STRNCPY
    515 	sub	$64, %ebx
    516 	jbe	L(StrncpyLeave1)
    517 #endif
    518 	palignr	$1, %xmm2, %xmm3
    519 	lea	64(%ecx), %ecx
    520 	palignr	$1, %xmm1, %xmm2
    521 	movaps	%xmm7, %xmm1
    522 	movaps	%xmm5, 48(%edx)
    523 	movaps	%xmm4, 32(%edx)
    524 	movaps	%xmm3, 16(%edx)
    525 	movaps	%xmm2, (%edx)
    526 	lea	64(%edx), %edx
    527 	jmp	L(Shl1LoopStart)
    528 
    529 L(Shl1LoopExit):
    530 	movlpd	(%ecx), %xmm0
    531 	movlpd	%xmm0, (%edx)
    532 	movlpd	7(%ecx), %xmm0
    533 	movlpd	%xmm0, 7(%edx)
    534 	mov	$15, %esi
    535 	jmp	L(CopyFrom1To16Bytes)
    536 
    537 	.p2align 4
    538 L(Shl2):
    539 	movaps	-2(%ecx), %xmm1
    540 	movaps	14(%ecx), %xmm2
    541 L(Shl2Start):
    542 	pcmpeqb	%xmm2, %xmm0
    543 	pmovmskb %xmm0, %eax
    544 	movaps	%xmm2, %xmm3
    545 #ifdef USE_AS_STRNCPY
    546 	sub	$16, %ebx
    547 	jbe	L(StrncpyExit2Case2OrCase3)
    548 #endif
    549 	test	%eax, %eax
    550 	jnz	L(Shl2LoopExit)
    551 
    552 	palignr	$2, %xmm1, %xmm2
    553 	movaps	%xmm3, %xmm1
    554 	movaps	%xmm2, (%edx)
    555 	movaps	30(%ecx), %xmm2
    556 
    557 	pcmpeqb	%xmm2, %xmm0
    558 	lea	16(%edx), %edx
    559 	pmovmskb %xmm0, %eax
    560 	lea	16(%ecx), %ecx
    561 	movaps	%xmm2, %xmm3
    562 #ifdef USE_AS_STRNCPY
    563 	sub	$16, %ebx
    564 	jbe	L(StrncpyExit2Case2OrCase3)
    565 #endif
    566 	test	%eax, %eax
    567 	jnz	L(Shl2LoopExit)
    568 
    569 	palignr	$2, %xmm1, %xmm2
    570 	movaps	%xmm2, (%edx)
    571 	movaps	30(%ecx), %xmm2
    572 	movaps	%xmm3, %xmm1
    573 
    574 	pcmpeqb	%xmm2, %xmm0
    575 	lea	16(%edx), %edx
    576 	pmovmskb %xmm0, %eax
    577 	lea	16(%ecx), %ecx
    578 	movaps	%xmm2, %xmm3
    579 #ifdef USE_AS_STRNCPY
    580 	sub	$16, %ebx
    581 	jbe	L(StrncpyExit2Case2OrCase3)
    582 #endif
    583 	test	%eax, %eax
    584 	jnz	L(Shl2LoopExit)
    585 
    586 	palignr	$2, %xmm1, %xmm2
    587 	movaps	%xmm2, (%edx)
    588 	movaps	30(%ecx), %xmm2
    589 
    590 	pcmpeqb	%xmm2, %xmm0
    591 	lea	16(%edx), %edx
    592 	pmovmskb %xmm0, %eax
    593 	lea	16(%ecx), %ecx
    594 #ifdef USE_AS_STRNCPY
    595 	sub	$16, %ebx
    596 	jbe	L(StrncpyExit2Case2OrCase3)
    597 #endif
    598 	test	%eax, %eax
    599 	jnz	L(Shl2LoopExit)
    600 
    601 	palignr	$2, %xmm3, %xmm2
    602 	movaps	%xmm2, (%edx)
    603 	lea	30(%ecx), %ecx
    604 	lea	16(%edx), %edx
    605 
    606 	mov	%ecx, %eax
    607 	and	$-0x40, %ecx
    608 	sub	%ecx, %eax
    609 	lea	-14(%ecx), %ecx
    610 	sub	%eax, %edx
    611 #ifdef USE_AS_STRNCPY
    612 	add	%eax, %ebx
    613 #endif
    614 	movaps	-2(%ecx), %xmm1
    615 
    616 L(Shl2LoopStart):
    617 	movaps	14(%ecx), %xmm2
    618 	movaps	30(%ecx), %xmm3
    619 	movaps	%xmm3, %xmm6
    620 	movaps	46(%ecx), %xmm4
    621 	movaps	%xmm4, %xmm7
    622 	movaps	62(%ecx), %xmm5
    623 	pminub	%xmm2, %xmm6
    624 	pminub	%xmm5, %xmm7
    625 	pminub	%xmm6, %xmm7
    626 	pcmpeqb	%xmm0, %xmm7
    627 	pmovmskb %xmm7, %eax
    628 	movaps	%xmm5, %xmm7
    629 	palignr	$2, %xmm4, %xmm5
    630 	palignr	$2, %xmm3, %xmm4
    631 	test	%eax, %eax
    632 	jnz	L(Shl2Start)
    633 #ifdef USE_AS_STRNCPY
    634 	sub	$64, %ebx
    635 	jbe	L(StrncpyLeave2)
    636 #endif
    637 	palignr	$2, %xmm2, %xmm3
    638 	lea	64(%ecx), %ecx
    639 	palignr	$2, %xmm1, %xmm2
    640 	movaps	%xmm7, %xmm1
    641 	movaps	%xmm5, 48(%edx)
    642 	movaps	%xmm4, 32(%edx)
    643 	movaps	%xmm3, 16(%edx)
    644 	movaps	%xmm2, (%edx)
    645 	lea	64(%edx), %edx
    646 	jmp	L(Shl2LoopStart)
    647 
    648 L(Shl2LoopExit):
    649 	movlpd	(%ecx), %xmm0
    650 	movlpd	6(%ecx), %xmm1
    651 	movlpd	%xmm0, (%edx)
    652 	movlpd	%xmm1, 6(%edx)
    653 	mov	$14, %esi
    654 	jmp	L(CopyFrom1To16Bytes)
    655 
    656 	.p2align 4
    657 L(Shl3):
    658 	movaps	-3(%ecx), %xmm1
    659 	movaps	13(%ecx), %xmm2
    660 L(Shl3Start):
    661 	pcmpeqb	%xmm2, %xmm0
    662 	pmovmskb %xmm0, %eax
    663 	movaps	%xmm2, %xmm3
    664 #ifdef USE_AS_STRNCPY
    665 	sub	$16, %ebx
    666 	jbe	L(StrncpyExit3Case2OrCase3)
    667 #endif
    668 	test	%eax, %eax
    669 	jnz	L(Shl3LoopExit)
    670 
    671 	palignr	$3, %xmm1, %xmm2
    672 	movaps	%xmm3, %xmm1
    673 	movaps	%xmm2, (%edx)
    674 	movaps	29(%ecx), %xmm2
    675 
    676 	pcmpeqb	%xmm2, %xmm0
    677 	lea	16(%edx), %edx
    678 	pmovmskb %xmm0, %eax
    679 	lea	16(%ecx), %ecx
    680 	movaps	%xmm2, %xmm3
    681 #ifdef USE_AS_STRNCPY
    682 	sub	$16, %ebx
    683 	jbe	L(StrncpyExit3Case2OrCase3)
    684 #endif
    685 	test	%eax, %eax
    686 	jnz	L(Shl3LoopExit)
    687 
    688 	palignr	$3, %xmm1, %xmm2
    689 	movaps	%xmm2, (%edx)
    690 	movaps	29(%ecx), %xmm2
    691 	movaps	%xmm3, %xmm1
    692 
    693 	pcmpeqb	%xmm2, %xmm0
    694 	lea	16(%edx), %edx
    695 	pmovmskb %xmm0, %eax
    696 	lea	16(%ecx), %ecx
    697 	movaps	%xmm2, %xmm3
    698 #ifdef USE_AS_STRNCPY
    699 	sub	$16, %ebx
    700 	jbe	L(StrncpyExit3Case2OrCase3)
    701 #endif
    702 	test	%eax, %eax
    703 	jnz	L(Shl3LoopExit)
    704 
    705 	palignr	$3, %xmm1, %xmm2
    706 	movaps	%xmm2, (%edx)
    707 	movaps	29(%ecx), %xmm2
    708 
    709 	pcmpeqb	%xmm2, %xmm0
    710 	lea	16(%edx), %edx
    711 	pmovmskb %xmm0, %eax
    712 	lea	16(%ecx), %ecx
    713 #ifdef USE_AS_STRNCPY
    714 	sub	$16, %ebx
    715 	jbe	L(StrncpyExit3Case2OrCase3)
    716 #endif
    717 	test	%eax, %eax
    718 	jnz	L(Shl3LoopExit)
    719 
    720 	palignr	$3, %xmm3, %xmm2
    721 	movaps	%xmm2, (%edx)
    722 	lea	29(%ecx), %ecx
    723 	lea	16(%edx), %edx
    724 
    725 	mov	%ecx, %eax
    726 	and	$-0x40, %ecx
    727 	sub	%ecx, %eax
    728 	lea	-13(%ecx), %ecx
    729 	sub	%eax, %edx
    730 #ifdef USE_AS_STRNCPY
    731 	add	%eax, %ebx
    732 #endif
    733 	movaps	-3(%ecx), %xmm1
    734 
    735 L(Shl3LoopStart):
    736 	movaps	13(%ecx), %xmm2
    737 	movaps	29(%ecx), %xmm3
    738 	movaps	%xmm3, %xmm6
    739 	movaps	45(%ecx), %xmm4
    740 	movaps	%xmm4, %xmm7
    741 	movaps	61(%ecx), %xmm5
    742 	pminub	%xmm2, %xmm6
    743 	pminub	%xmm5, %xmm7
    744 	pminub	%xmm6, %xmm7
    745 	pcmpeqb	%xmm0, %xmm7
    746 	pmovmskb %xmm7, %eax
    747 	movaps	%xmm5, %xmm7
    748 	palignr	$3, %xmm4, %xmm5
    749 	palignr	$3, %xmm3, %xmm4
    750 	test	%eax, %eax
    751 	jnz	L(Shl3Start)
    752 #ifdef USE_AS_STRNCPY
    753 	sub	$64, %ebx
    754 	jbe	L(StrncpyLeave3)
    755 #endif
    756 	palignr	$3, %xmm2, %xmm3
    757 	lea	64(%ecx), %ecx
    758 	palignr	$3, %xmm1, %xmm2
    759 	movaps	%xmm7, %xmm1
    760 	movaps	%xmm5, 48(%edx)
    761 	movaps	%xmm4, 32(%edx)
    762 	movaps	%xmm3, 16(%edx)
    763 	movaps	%xmm2, (%edx)
    764 	lea	64(%edx), %edx
    765 	jmp	L(Shl3LoopStart)
    766 
    767 L(Shl3LoopExit):
    768 	movlpd	(%ecx), %xmm0
    769 	movlpd	5(%ecx), %xmm1
    770 	movlpd	%xmm0, (%edx)
    771 	movlpd	%xmm1, 5(%edx)
    772 	mov	$13, %esi
    773 	jmp	L(CopyFrom1To16Bytes)
    774 
    775 	.p2align 4
    776 L(Shl4):
    777 	movaps	-4(%ecx), %xmm1
    778 	movaps	12(%ecx), %xmm2
    779 L(Shl4Start):
    780 	pcmpeqb	%xmm2, %xmm0
    781 	pmovmskb %xmm0, %eax
    782 	movaps	%xmm2, %xmm3
    783 #ifdef USE_AS_STRNCPY
    784 	sub	$16, %ebx
    785 	jbe	L(StrncpyExit4Case2OrCase3)
    786 #endif
    787 	test	%eax, %eax
    788 	jnz	L(Shl4LoopExit)
    789 
    790 	palignr	$4, %xmm1, %xmm2
    791 	movaps	%xmm3, %xmm1
    792 	movaps	%xmm2, (%edx)
    793 	movaps	28(%ecx), %xmm2
    794 
    795 	pcmpeqb	%xmm2, %xmm0
    796 	lea	16(%edx), %edx
    797 	pmovmskb %xmm0, %eax
    798 	lea	16(%ecx), %ecx
    799 	movaps	%xmm2, %xmm3
    800 #ifdef USE_AS_STRNCPY
    801 	sub	$16, %ebx
    802 	jbe	L(StrncpyExit4Case2OrCase3)
    803 #endif
    804 	test	%eax, %eax
    805 	jnz	L(Shl4LoopExit)
    806 
    807 	palignr	$4, %xmm1, %xmm2
    808 	movaps	%xmm2, (%edx)
    809 	movaps	28(%ecx), %xmm2
    810 	movaps	%xmm3, %xmm1
    811 
    812 	pcmpeqb	%xmm2, %xmm0
    813 	lea	16(%edx), %edx
    814 	pmovmskb %xmm0, %eax
    815 	lea	16(%ecx), %ecx
    816 	movaps	%xmm2, %xmm3
    817 #ifdef USE_AS_STRNCPY
    818 	sub	$16, %ebx
    819 	jbe	L(StrncpyExit4Case2OrCase3)
    820 #endif
    821 	test	%eax, %eax
    822 	jnz	L(Shl4LoopExit)
    823 
    824 	palignr	$4, %xmm1, %xmm2
    825 	movaps	%xmm2, (%edx)
    826 	movaps	28(%ecx), %xmm2
    827 
    828 	pcmpeqb	%xmm2, %xmm0
    829 	lea	16(%edx), %edx
    830 	pmovmskb %xmm0, %eax
    831 	lea	16(%ecx), %ecx
    832 #ifdef USE_AS_STRNCPY
    833 	sub	$16, %ebx
    834 	jbe	L(StrncpyExit4Case2OrCase3)
    835 #endif
    836 	test	%eax, %eax
    837 	jnz	L(Shl4LoopExit)
    838 
    839 	palignr	$4, %xmm3, %xmm2
    840 	movaps	%xmm2, (%edx)
    841 	lea	28(%ecx), %ecx
    842 	lea	16(%edx), %edx
    843 
    844 	mov	%ecx, %eax
    845 	and	$-0x40, %ecx
    846 	sub	%ecx, %eax
    847 	lea	-12(%ecx), %ecx
    848 	sub	%eax, %edx
    849 #ifdef USE_AS_STRNCPY
    850 	add	%eax, %ebx
    851 #endif
    852 	movaps	-4(%ecx), %xmm1
    853 
    854 L(Shl4LoopStart):
    855 	movaps	12(%ecx), %xmm2
    856 	movaps	28(%ecx), %xmm3
    857 	movaps	%xmm3, %xmm6
    858 	movaps	44(%ecx), %xmm4
    859 	movaps	%xmm4, %xmm7
    860 	movaps	60(%ecx), %xmm5
    861 	pminub	%xmm2, %xmm6
    862 	pminub	%xmm5, %xmm7
    863 	pminub	%xmm6, %xmm7
    864 	pcmpeqb	%xmm0, %xmm7
    865 	pmovmskb %xmm7, %eax
    866 	movaps	%xmm5, %xmm7
    867 	palignr	$4, %xmm4, %xmm5
    868 	palignr	$4, %xmm3, %xmm4
    869 	test	%eax, %eax
    870 	jnz	L(Shl4Start)
    871 #ifdef USE_AS_STRNCPY
    872 	sub	$64, %ebx
    873 	jbe	L(StrncpyLeave4)
    874 #endif
    875 	palignr	$4, %xmm2, %xmm3
    876 	lea	64(%ecx), %ecx
    877 	palignr	$4, %xmm1, %xmm2
    878 	movaps	%xmm7, %xmm1
    879 	movaps	%xmm5, 48(%edx)
    880 	movaps	%xmm4, 32(%edx)
    881 	movaps	%xmm3, 16(%edx)
    882 	movaps	%xmm2, (%edx)
    883 	lea	64(%edx), %edx
    884 	jmp	L(Shl4LoopStart)
    885 
    886 L(Shl4LoopExit):
    887 	movlpd	(%ecx), %xmm0
    888 	movl	8(%ecx), %esi
    889 	movlpd	%xmm0, (%edx)
    890 	movl	%esi, 8(%edx)
    891 	mov	$12, %esi
    892 	jmp	L(CopyFrom1To16Bytes)
    893 
    894 	.p2align 4
    895 L(Shl5):
    896 	movaps	-5(%ecx), %xmm1
    897 	movaps	11(%ecx), %xmm2
    898 L(Shl5Start):
    899 	pcmpeqb	%xmm2, %xmm0
    900 	pmovmskb %xmm0, %eax
    901 	movaps	%xmm2, %xmm3
    902 #ifdef USE_AS_STRNCPY
    903 	sub	$16, %ebx
    904 	jbe	L(StrncpyExit5Case2OrCase3)
    905 #endif
    906 	test	%eax, %eax
    907 	jnz	L(Shl5LoopExit)
    908 
    909 	palignr	$5, %xmm1, %xmm2
    910 	movaps	%xmm3, %xmm1
    911 	movaps	%xmm2, (%edx)
    912 	movaps	27(%ecx), %xmm2
    913 
    914 	pcmpeqb	%xmm2, %xmm0
    915 	lea	16(%edx), %edx
    916 	pmovmskb %xmm0, %eax
    917 	lea	16(%ecx), %ecx
    918 	movaps	%xmm2, %xmm3
    919 #ifdef USE_AS_STRNCPY
    920 	sub	$16, %ebx
    921 	jbe	L(StrncpyExit5Case2OrCase3)
    922 #endif
    923 	test	%eax, %eax
    924 	jnz	L(Shl5LoopExit)
    925 
    926 	palignr	$5, %xmm1, %xmm2
    927 	movaps	%xmm2, (%edx)
    928 	movaps	27(%ecx), %xmm2
    929 	movaps	%xmm3, %xmm1
    930 
    931 	pcmpeqb	%xmm2, %xmm0
    932 	lea	16(%edx), %edx
    933 	pmovmskb %xmm0, %eax
    934 	lea	16(%ecx), %ecx
    935 	movaps	%xmm2, %xmm3
    936 #ifdef USE_AS_STRNCPY
    937 	sub	$16, %ebx
    938 	jbe	L(StrncpyExit5Case2OrCase3)
    939 #endif
    940 	test	%eax, %eax
    941 	jnz	L(Shl5LoopExit)
    942 
    943 	palignr	$5, %xmm1, %xmm2
    944 	movaps	%xmm2, (%edx)
    945 	movaps	27(%ecx), %xmm2
    946 
    947 	pcmpeqb	%xmm2, %xmm0
    948 	lea	16(%edx), %edx
    949 	pmovmskb %xmm0, %eax
    950 	lea	16(%ecx), %ecx
    951 #ifdef USE_AS_STRNCPY
    952 	sub	$16, %ebx
    953 	jbe	L(StrncpyExit5Case2OrCase3)
    954 #endif
    955 	test	%eax, %eax
    956 	jnz	L(Shl5LoopExit)
    957 
    958 	palignr	$5, %xmm3, %xmm2
    959 	movaps	%xmm2, (%edx)
    960 	lea	27(%ecx), %ecx
    961 	lea	16(%edx), %edx
    962 
    963 	mov	%ecx, %eax
    964 	and	$-0x40, %ecx
    965 	sub	%ecx, %eax
    966 	lea	-11(%ecx), %ecx
    967 	sub	%eax, %edx
    968 #ifdef USE_AS_STRNCPY
    969 	add	%eax, %ebx
    970 #endif
    971 	movaps	-5(%ecx), %xmm1
    972 
    973 L(Shl5LoopStart):
    974 	movaps	11(%ecx), %xmm2
    975 	movaps	27(%ecx), %xmm3
    976 	movaps	%xmm3, %xmm6
    977 	movaps	43(%ecx), %xmm4
    978 	movaps	%xmm4, %xmm7
    979 	movaps	59(%ecx), %xmm5
    980 	pminub	%xmm2, %xmm6
    981 	pminub	%xmm5, %xmm7
    982 	pminub	%xmm6, %xmm7
    983 	pcmpeqb	%xmm0, %xmm7
    984 	pmovmskb %xmm7, %eax
    985 	movaps	%xmm5, %xmm7
    986 	palignr	$5, %xmm4, %xmm5
    987 	palignr	$5, %xmm3, %xmm4
    988 	test	%eax, %eax
    989 	jnz	L(Shl5Start)
    990 #ifdef USE_AS_STRNCPY
    991 	sub	$64, %ebx
    992 	jbe	L(StrncpyLeave5)
    993 #endif
    994 	palignr	$5, %xmm2, %xmm3
    995 	lea	64(%ecx), %ecx
    996 	palignr	$5, %xmm1, %xmm2
    997 	movaps	%xmm7, %xmm1
    998 	movaps	%xmm5, 48(%edx)
    999 	movaps	%xmm4, 32(%edx)
   1000 	movaps	%xmm3, 16(%edx)
   1001 	movaps	%xmm2, (%edx)
   1002 	lea	64(%edx), %edx
   1003 	jmp	L(Shl5LoopStart)
   1004 
   1005 L(Shl5LoopExit):
   1006 	movlpd	(%ecx), %xmm0
   1007 	movl	7(%ecx), %esi
   1008 	movlpd	%xmm0, (%edx)
   1009 	movl	%esi, 7(%edx)
   1010 	mov	$11, %esi
   1011 	jmp	L(CopyFrom1To16Bytes)
   1012 
   1013 	.p2align 4
   1014 L(Shl6):
   1015 	movaps	-6(%ecx), %xmm1
   1016 	movaps	10(%ecx), %xmm2
   1017 L(Shl6Start):
   1018 	pcmpeqb	%xmm2, %xmm0
   1019 	pmovmskb %xmm0, %eax
   1020 	movaps	%xmm2, %xmm3
   1021 #ifdef USE_AS_STRNCPY
   1022 	sub	$16, %ebx
   1023 	jbe	L(StrncpyExit6Case2OrCase3)
   1024 #endif
   1025 	test	%eax, %eax
   1026 	jnz	L(Shl6LoopExit)
   1027 
   1028 	palignr	$6, %xmm1, %xmm2
   1029 	movaps	%xmm3, %xmm1
   1030 	movaps	%xmm2, (%edx)
   1031 	movaps	26(%ecx), %xmm2
   1032 
   1033 	pcmpeqb	%xmm2, %xmm0
   1034 	lea	16(%edx), %edx
   1035 	pmovmskb %xmm0, %eax
   1036 	lea	16(%ecx), %ecx
   1037 	movaps	%xmm2, %xmm3
   1038 #ifdef USE_AS_STRNCPY
   1039 	sub	$16, %ebx
   1040 	jbe	L(StrncpyExit6Case2OrCase3)
   1041 #endif
   1042 	test	%eax, %eax
   1043 	jnz	L(Shl6LoopExit)
   1044 
   1045 	palignr	$6, %xmm1, %xmm2
   1046 	movaps	%xmm2, (%edx)
   1047 	movaps	26(%ecx), %xmm2
   1048 	movaps	%xmm3, %xmm1
   1049 
   1050 	pcmpeqb	%xmm2, %xmm0
   1051 	lea	16(%edx), %edx
   1052 	pmovmskb %xmm0, %eax
   1053 	lea	16(%ecx), %ecx
   1054 	movaps	%xmm2, %xmm3
   1055 #ifdef USE_AS_STRNCPY
   1056 	sub	$16, %ebx
   1057 	jbe	L(StrncpyExit6Case2OrCase3)
   1058 #endif
   1059 	test	%eax, %eax
   1060 	jnz	L(Shl6LoopExit)
   1061 
   1062 	palignr	$6, %xmm1, %xmm2
   1063 	movaps	%xmm2, (%edx)
   1064 	movaps	26(%ecx), %xmm2
   1065 
   1066 	pcmpeqb	%xmm2, %xmm0
   1067 	lea	16(%edx), %edx
   1068 	pmovmskb %xmm0, %eax
   1069 	lea	16(%ecx), %ecx
   1070 #ifdef USE_AS_STRNCPY
   1071 	sub	$16, %ebx
   1072 	jbe	L(StrncpyExit6Case2OrCase3)
   1073 #endif
   1074 	test	%eax, %eax
   1075 	jnz	L(Shl6LoopExit)
   1076 
   1077 	palignr	$6, %xmm3, %xmm2
   1078 	movaps	%xmm2, (%edx)
   1079 	lea	26(%ecx), %ecx
   1080 	lea	16(%edx), %edx
   1081 
   1082 	mov	%ecx, %eax
   1083 	and	$-0x40, %ecx
   1084 	sub	%ecx, %eax
   1085 	lea	-10(%ecx), %ecx
   1086 	sub	%eax, %edx
   1087 #ifdef USE_AS_STRNCPY
   1088 	add	%eax, %ebx
   1089 #endif
   1090 	movaps	-6(%ecx), %xmm1
   1091 
   1092 L(Shl6LoopStart):
   1093 	movaps	10(%ecx), %xmm2
   1094 	movaps	26(%ecx), %xmm3
   1095 	movaps	%xmm3, %xmm6
   1096 	movaps	42(%ecx), %xmm4
   1097 	movaps	%xmm4, %xmm7
   1098 	movaps	58(%ecx), %xmm5
   1099 	pminub	%xmm2, %xmm6
   1100 	pminub	%xmm5, %xmm7
   1101 	pminub	%xmm6, %xmm7
   1102 	pcmpeqb	%xmm0, %xmm7
   1103 	pmovmskb %xmm7, %eax
   1104 	movaps	%xmm5, %xmm7
   1105 	palignr	$6, %xmm4, %xmm5
   1106 	palignr	$6, %xmm3, %xmm4
   1107 	test	%eax, %eax
   1108 	jnz	L(Shl6Start)
   1109 #ifdef USE_AS_STRNCPY
   1110 	sub	$64, %ebx
   1111 	jbe	L(StrncpyLeave6)
   1112 #endif
   1113 	palignr	$6, %xmm2, %xmm3
   1114 	lea	64(%ecx), %ecx
   1115 	palignr	$6, %xmm1, %xmm2
   1116 	movaps	%xmm7, %xmm1
   1117 	movaps	%xmm5, 48(%edx)
   1118 	movaps	%xmm4, 32(%edx)
   1119 	movaps	%xmm3, 16(%edx)
   1120 	movaps	%xmm2, (%edx)
   1121 	lea	64(%edx), %edx
   1122 	jmp	L(Shl6LoopStart)
   1123 
   1124 L(Shl6LoopExit):
   1125 	movlpd	(%ecx), %xmm0
   1126 	movl	6(%ecx), %esi
   1127 	movlpd	%xmm0, (%edx)
   1128 	movl	%esi, 6(%edx)
   1129 	mov	$10, %esi
   1130 	jmp	L(CopyFrom1To16Bytes)
   1131 
   1132 	.p2align 4
   1133 L(Shl7):
   1134 	movaps	-7(%ecx), %xmm1
   1135 	movaps	9(%ecx), %xmm2
   1136 L(Shl7Start):
   1137 	pcmpeqb	%xmm2, %xmm0
   1138 	pmovmskb %xmm0, %eax
   1139 	movaps	%xmm2, %xmm3
   1140 #ifdef USE_AS_STRNCPY
   1141 	sub	$16, %ebx
   1142 	jbe	L(StrncpyExit7Case2OrCase3)
   1143 #endif
   1144 	test	%eax, %eax
   1145 	jnz	L(Shl7LoopExit)
   1146 
   1147 	palignr	$7, %xmm1, %xmm2
   1148 	movaps	%xmm3, %xmm1
   1149 	movaps	%xmm2, (%edx)
   1150 	movaps	25(%ecx), %xmm2
   1151 
   1152 	pcmpeqb	%xmm2, %xmm0
   1153 	lea	16(%edx), %edx
   1154 	pmovmskb %xmm0, %eax
   1155 	lea	16(%ecx), %ecx
   1156 	movaps	%xmm2, %xmm3
   1157 #ifdef USE_AS_STRNCPY
   1158 	sub	$16, %ebx
   1159 	jbe	L(StrncpyExit7Case2OrCase3)
   1160 #endif
   1161 	test	%eax, %eax
   1162 	jnz	L(Shl7LoopExit)
   1163 
   1164 	palignr	$7, %xmm1, %xmm2
   1165 	movaps	%xmm2, (%edx)
   1166 	movaps	25(%ecx), %xmm2
   1167 	movaps	%xmm3, %xmm1
   1168 
   1169 	pcmpeqb	%xmm2, %xmm0
   1170 	lea	16(%edx), %edx
   1171 	pmovmskb %xmm0, %eax
   1172 	lea	16(%ecx), %ecx
   1173 	movaps	%xmm2, %xmm3
   1174 #ifdef USE_AS_STRNCPY
   1175 	sub	$16, %ebx
   1176 	jbe	L(StrncpyExit7Case2OrCase3)
   1177 #endif
   1178 	test	%eax, %eax
   1179 	jnz	L(Shl7LoopExit)
   1180 
   1181 	palignr	$7, %xmm1, %xmm2
   1182 	movaps	%xmm2, (%edx)
   1183 	movaps	25(%ecx), %xmm2
   1184 
   1185 	pcmpeqb	%xmm2, %xmm0
   1186 	lea	16(%edx), %edx
   1187 	pmovmskb %xmm0, %eax
   1188 	lea	16(%ecx), %ecx
   1189 #ifdef USE_AS_STRNCPY
   1190 	sub	$16, %ebx
   1191 	jbe	L(StrncpyExit7Case2OrCase3)
   1192 #endif
   1193 	test	%eax, %eax
   1194 	jnz	L(Shl7LoopExit)
   1195 
   1196 	palignr	$7, %xmm3, %xmm2
   1197 	movaps	%xmm2, (%edx)
   1198 	lea	25(%ecx), %ecx
   1199 	lea	16(%edx), %edx
   1200 
   1201 	mov	%ecx, %eax
   1202 	and	$-0x40, %ecx
   1203 	sub	%ecx, %eax
   1204 	lea	-9(%ecx), %ecx
   1205 	sub	%eax, %edx
   1206 #ifdef USE_AS_STRNCPY
   1207 	add	%eax, %ebx
   1208 #endif
   1209 	movaps	-7(%ecx), %xmm1
   1210 
   1211 L(Shl7LoopStart):
   1212 	movaps	9(%ecx), %xmm2
   1213 	movaps	25(%ecx), %xmm3
   1214 	movaps	%xmm3, %xmm6
   1215 	movaps	41(%ecx), %xmm4
   1216 	movaps	%xmm4, %xmm7
   1217 	movaps	57(%ecx), %xmm5
   1218 	pminub	%xmm2, %xmm6
   1219 	pminub	%xmm5, %xmm7
   1220 	pminub	%xmm6, %xmm7
   1221 	pcmpeqb	%xmm0, %xmm7
   1222 	pmovmskb %xmm7, %eax
   1223 	movaps	%xmm5, %xmm7
   1224 	palignr	$7, %xmm4, %xmm5
   1225 	palignr	$7, %xmm3, %xmm4
   1226 	test	%eax, %eax
   1227 	jnz	L(Shl7Start)
   1228 #ifdef USE_AS_STRNCPY
   1229 	sub	$64, %ebx
   1230 	jbe	L(StrncpyLeave7)
   1231 #endif
   1232 	palignr	$7, %xmm2, %xmm3
   1233 	lea	64(%ecx), %ecx
   1234 	palignr	$7, %xmm1, %xmm2
   1235 	movaps	%xmm7, %xmm1
   1236 	movaps	%xmm5, 48(%edx)
   1237 	movaps	%xmm4, 32(%edx)
   1238 	movaps	%xmm3, 16(%edx)
   1239 	movaps	%xmm2, (%edx)
   1240 	lea	64(%edx), %edx
   1241 	jmp	L(Shl7LoopStart)
   1242 
   1243 L(Shl7LoopExit):
   1244 	movlpd	(%ecx), %xmm0
   1245 	movl	5(%ecx), %esi
   1246 	movlpd	%xmm0, (%edx)
   1247 	movl	%esi, 5(%edx)
   1248 	mov	$9, %esi
   1249 	jmp	L(CopyFrom1To16Bytes)
   1250 
   1251 	.p2align 4
   1252 L(Shl8):
   1253 	movaps	-8(%ecx), %xmm1
   1254 	movaps	8(%ecx), %xmm2
   1255 L(Shl8Start):
   1256 	pcmpeqb	%xmm2, %xmm0
   1257 	pmovmskb %xmm0, %eax
   1258 	movaps	%xmm2, %xmm3
   1259 #ifdef USE_AS_STRNCPY
   1260 	sub	$16, %ebx
   1261 	jbe	L(StrncpyExit8Case2OrCase3)
   1262 #endif
   1263 	test	%eax, %eax
   1264 	jnz	L(Shl8LoopExit)
   1265 
   1266 	palignr	$8, %xmm1, %xmm2
   1267 	movaps	%xmm3, %xmm1
   1268 	movaps	%xmm2, (%edx)
   1269 	movaps	24(%ecx), %xmm2
   1270 
   1271 	pcmpeqb	%xmm2, %xmm0
   1272 	lea	16(%edx), %edx
   1273 	pmovmskb %xmm0, %eax
   1274 	lea	16(%ecx), %ecx
   1275 	movaps	%xmm2, %xmm3
   1276 #ifdef USE_AS_STRNCPY
   1277 	sub	$16, %ebx
   1278 	jbe	L(StrncpyExit8Case2OrCase3)
   1279 #endif
   1280 	test	%eax, %eax
   1281 	jnz	L(Shl8LoopExit)
   1282 
   1283 	palignr	$8, %xmm1, %xmm2
   1284 	movaps	%xmm2, (%edx)
   1285 	movaps	24(%ecx), %xmm2
   1286 	movaps	%xmm3, %xmm1
   1287 
   1288 	pcmpeqb	%xmm2, %xmm0
   1289 	lea	16(%edx), %edx
   1290 	pmovmskb %xmm0, %eax
   1291 	lea	16(%ecx), %ecx
   1292 	movaps	%xmm2, %xmm3
   1293 #ifdef USE_AS_STRNCPY
   1294 	sub	$16, %ebx
   1295 	jbe	L(StrncpyExit8Case2OrCase3)
   1296 #endif
   1297 	test	%eax, %eax
   1298 	jnz	L(Shl8LoopExit)
   1299 
   1300 	palignr	$8, %xmm1, %xmm2
   1301 	movaps	%xmm2, (%edx)
   1302 	movaps	24(%ecx), %xmm2
   1303 
   1304 	pcmpeqb	%xmm2, %xmm0
   1305 	lea	16(%edx), %edx
   1306 	pmovmskb %xmm0, %eax
   1307 	lea	16(%ecx), %ecx
   1308 #ifdef USE_AS_STRNCPY
   1309 	sub	$16, %ebx
   1310 	jbe	L(StrncpyExit8Case2OrCase3)
   1311 #endif
   1312 	test	%eax, %eax
   1313 	jnz	L(Shl8LoopExit)
   1314 
   1315 	palignr	$8, %xmm3, %xmm2
   1316 	movaps	%xmm2, (%edx)
   1317 	lea	24(%ecx), %ecx
   1318 	lea	16(%edx), %edx
   1319 
   1320 	mov	%ecx, %eax
   1321 	and	$-0x40, %ecx
   1322 	sub	%ecx, %eax
   1323 	lea	-8(%ecx), %ecx
   1324 	sub	%eax, %edx
   1325 #ifdef USE_AS_STRNCPY
   1326 	add	%eax, %ebx
   1327 #endif
   1328 	movaps	-8(%ecx), %xmm1
   1329 
   1330 L(Shl8LoopStart):
   1331 	movaps	8(%ecx), %xmm2
   1332 	movaps	24(%ecx), %xmm3
   1333 	movaps	%xmm3, %xmm6
   1334 	movaps	40(%ecx), %xmm4
   1335 	movaps	%xmm4, %xmm7
   1336 	movaps	56(%ecx), %xmm5
   1337 	pminub	%xmm2, %xmm6
   1338 	pminub	%xmm5, %xmm7
   1339 	pminub	%xmm6, %xmm7
   1340 	pcmpeqb	%xmm0, %xmm7
   1341 	pmovmskb %xmm7, %eax
   1342 	movaps	%xmm5, %xmm7
   1343 	palignr	$8, %xmm4, %xmm5
   1344 	palignr	$8, %xmm3, %xmm4
   1345 	test	%eax, %eax
   1346 	jnz	L(Shl8Start)
   1347 #ifdef USE_AS_STRNCPY
   1348 	sub	$64, %ebx
   1349 	jbe	L(StrncpyLeave8)
   1350 #endif
   1351 	palignr	$8, %xmm2, %xmm3
   1352 	lea	64(%ecx), %ecx
   1353 	palignr	$8, %xmm1, %xmm2
   1354 	movaps	%xmm7, %xmm1
   1355 	movaps	%xmm5, 48(%edx)
   1356 	movaps	%xmm4, 32(%edx)
   1357 	movaps	%xmm3, 16(%edx)
   1358 	movaps	%xmm2, (%edx)
   1359 	lea	64(%edx), %edx
   1360 	jmp	L(Shl8LoopStart)
   1361 
   1362 L(Shl8LoopExit):
   1363 	movlpd	(%ecx), %xmm0
   1364 	movlpd	%xmm0, (%edx)
   1365 	mov	$8, %esi
   1366 	jmp	L(CopyFrom1To16Bytes)
   1367 
   1368 	.p2align 4
   1369 L(Shl9):
   1370 	movaps	-9(%ecx), %xmm1
   1371 	movaps	7(%ecx), %xmm2
   1372 L(Shl9Start):
   1373 	pcmpeqb	%xmm2, %xmm0
   1374 	pmovmskb %xmm0, %eax
   1375 	movaps	%xmm2, %xmm3
   1376 #ifdef USE_AS_STRNCPY
   1377 	sub	$16, %ebx
   1378 	jbe	L(StrncpyExit9Case2OrCase3)
   1379 #endif
   1380 	test	%eax, %eax
   1381 	jnz	L(Shl9LoopExit)
   1382 
   1383 	palignr	$9, %xmm1, %xmm2
   1384 	movaps	%xmm3, %xmm1
   1385 	movaps	%xmm2, (%edx)
   1386 	movaps	23(%ecx), %xmm2
   1387 
   1388 	pcmpeqb	%xmm2, %xmm0
   1389 	lea	16(%edx), %edx
   1390 	pmovmskb %xmm0, %eax
   1391 	lea	16(%ecx), %ecx
   1392 	movaps	%xmm2, %xmm3
   1393 #ifdef USE_AS_STRNCPY
   1394 	sub	$16, %ebx
   1395 	jbe	L(StrncpyExit9Case2OrCase3)
   1396 #endif
   1397 	test	%eax, %eax
   1398 	jnz	L(Shl9LoopExit)
   1399 
   1400 	palignr	$9, %xmm1, %xmm2
   1401 	movaps	%xmm2, (%edx)
   1402 	movaps	23(%ecx), %xmm2
   1403 	movaps	%xmm3, %xmm1
   1404 
   1405 	pcmpeqb	%xmm2, %xmm0
   1406 	lea	16(%edx), %edx
   1407 	pmovmskb %xmm0, %eax
   1408 	lea	16(%ecx), %ecx
   1409 	movaps	%xmm2, %xmm3
   1410 #ifdef USE_AS_STRNCPY
   1411 	sub	$16, %ebx
   1412 	jbe	L(StrncpyExit9Case2OrCase3)
   1413 #endif
   1414 	test	%eax, %eax
   1415 	jnz	L(Shl9LoopExit)
   1416 
   1417 	palignr	$9, %xmm1, %xmm2
   1418 	movaps	%xmm2, (%edx)
   1419 	movaps	23(%ecx), %xmm2
   1420 
   1421 	pcmpeqb	%xmm2, %xmm0
   1422 	lea	16(%edx), %edx
   1423 	pmovmskb %xmm0, %eax
   1424 	lea	16(%ecx), %ecx
   1425 #ifdef USE_AS_STRNCPY
   1426 	sub	$16, %ebx
   1427 	jbe	L(StrncpyExit9Case2OrCase3)
   1428 #endif
   1429 	test	%eax, %eax
   1430 	jnz	L(Shl9LoopExit)
   1431 
   1432 	palignr	$9, %xmm3, %xmm2
   1433 	movaps	%xmm2, (%edx)
   1434 	lea	23(%ecx), %ecx
   1435 	lea	16(%edx), %edx
   1436 
   1437 	mov	%ecx, %eax
   1438 	and	$-0x40, %ecx
   1439 	sub	%ecx, %eax
   1440 	lea	-7(%ecx), %ecx
   1441 	sub	%eax, %edx
   1442 #ifdef USE_AS_STRNCPY
   1443 	add	%eax, %ebx
   1444 #endif
   1445 	movaps	-9(%ecx), %xmm1
   1446 
   1447 L(Shl9LoopStart):
   1448 	movaps	7(%ecx), %xmm2
   1449 	movaps	23(%ecx), %xmm3
   1450 	movaps	%xmm3, %xmm6
   1451 	movaps	39(%ecx), %xmm4
   1452 	movaps	%xmm4, %xmm7
   1453 	movaps	55(%ecx), %xmm5
   1454 	pminub	%xmm2, %xmm6
   1455 	pminub	%xmm5, %xmm7
   1456 	pminub	%xmm6, %xmm7
   1457 	pcmpeqb	%xmm0, %xmm7
   1458 	pmovmskb %xmm7, %eax
   1459 	movaps	%xmm5, %xmm7
   1460 	palignr	$9, %xmm4, %xmm5
   1461 	palignr	$9, %xmm3, %xmm4
   1462 	test	%eax, %eax
   1463 	jnz	L(Shl9Start)
   1464 #ifdef USE_AS_STRNCPY
   1465 	sub	$64, %ebx
   1466 	jbe	L(StrncpyLeave9)
   1467 #endif
   1468 	palignr	$9, %xmm2, %xmm3
   1469 	lea	64(%ecx), %ecx
   1470 	palignr	$9, %xmm1, %xmm2
   1471 	movaps	%xmm7, %xmm1
   1472 	movaps	%xmm5, 48(%edx)
   1473 	movaps	%xmm4, 32(%edx)
   1474 	movaps	%xmm3, 16(%edx)
   1475 	movaps	%xmm2, (%edx)
   1476 	lea	64(%edx), %edx
   1477 	jmp	L(Shl9LoopStart)
   1478 
   1479 L(Shl9LoopExit):
   1480 	movlpd	-1(%ecx), %xmm0
   1481 	movlpd	%xmm0, -1(%edx)
   1482 	mov	$7, %esi
   1483 	jmp	L(CopyFrom1To16Bytes)
   1484 
   1485 	.p2align 4
   1486 L(Shl10):
   1487 	movaps	-10(%ecx), %xmm1
   1488 	movaps	6(%ecx), %xmm2
   1489 L(Shl10Start):
   1490 	pcmpeqb	%xmm2, %xmm0
   1491 	pmovmskb %xmm0, %eax
   1492 	movaps	%xmm2, %xmm3
   1493 #ifdef USE_AS_STRNCPY
   1494 	sub	$16, %ebx
   1495 	jbe	L(StrncpyExit10Case2OrCase3)
   1496 #endif
   1497 	test	%eax, %eax
   1498 	jnz	L(Shl10LoopExit)
   1499 
   1500 	palignr	$10, %xmm1, %xmm2
   1501 	movaps	%xmm3, %xmm1
   1502 	movaps	%xmm2, (%edx)
   1503 	movaps	22(%ecx), %xmm2
   1504 
   1505 	pcmpeqb	%xmm2, %xmm0
   1506 	lea	16(%edx), %edx
   1507 	pmovmskb %xmm0, %eax
   1508 	lea	16(%ecx), %ecx
   1509 	movaps	%xmm2, %xmm3
   1510 #ifdef USE_AS_STRNCPY
   1511 	sub	$16, %ebx
   1512 	jbe	L(StrncpyExit10Case2OrCase3)
   1513 #endif
   1514 	test	%eax, %eax
   1515 	jnz	L(Shl10LoopExit)
   1516 
   1517 	palignr	$10, %xmm1, %xmm2
   1518 	movaps	%xmm2, (%edx)
   1519 	movaps	22(%ecx), %xmm2
   1520 	movaps	%xmm3, %xmm1
   1521 
   1522 	pcmpeqb	%xmm2, %xmm0
   1523 	lea	16(%edx), %edx
   1524 	pmovmskb %xmm0, %eax
   1525 	lea	16(%ecx), %ecx
   1526 	movaps	%xmm2, %xmm3
   1527 #ifdef USE_AS_STRNCPY
   1528 	sub	$16, %ebx
   1529 	jbe	L(StrncpyExit10Case2OrCase3)
   1530 #endif
   1531 	test	%eax, %eax
   1532 	jnz	L(Shl10LoopExit)
   1533 
   1534 	palignr	$10, %xmm1, %xmm2
   1535 	movaps	%xmm2, (%edx)
   1536 	movaps	22(%ecx), %xmm2
   1537 
   1538 	pcmpeqb	%xmm2, %xmm0
   1539 	lea	16(%edx), %edx
   1540 	pmovmskb %xmm0, %eax
   1541 	lea	16(%ecx), %ecx
   1542 #ifdef USE_AS_STRNCPY
   1543 	sub	$16, %ebx
   1544 	jbe	L(StrncpyExit10Case2OrCase3)
   1545 #endif
   1546 	test	%eax, %eax
   1547 	jnz	L(Shl10LoopExit)
   1548 
   1549 	palignr	$10, %xmm3, %xmm2
   1550 	movaps	%xmm2, (%edx)
   1551 	lea	22(%ecx), %ecx
   1552 	lea	16(%edx), %edx
   1553 
   1554 	mov	%ecx, %eax
   1555 	and	$-0x40, %ecx
   1556 	sub	%ecx, %eax
   1557 	lea	-6(%ecx), %ecx
   1558 	sub	%eax, %edx
   1559 #ifdef USE_AS_STRNCPY
   1560 	add	%eax, %ebx
   1561 #endif
   1562 	movaps	-10(%ecx), %xmm1
   1563 
   1564 L(Shl10LoopStart):
   1565 	movaps	6(%ecx), %xmm2
   1566 	movaps	22(%ecx), %xmm3
   1567 	movaps	%xmm3, %xmm6
   1568 	movaps	38(%ecx), %xmm4
   1569 	movaps	%xmm4, %xmm7
   1570 	movaps	54(%ecx), %xmm5
   1571 	pminub	%xmm2, %xmm6
   1572 	pminub	%xmm5, %xmm7
   1573 	pminub	%xmm6, %xmm7
   1574 	pcmpeqb	%xmm0, %xmm7
   1575 	pmovmskb %xmm7, %eax
   1576 	movaps	%xmm5, %xmm7
   1577 	palignr	$10, %xmm4, %xmm5
   1578 	palignr	$10, %xmm3, %xmm4
   1579 	test	%eax, %eax
   1580 	jnz	L(Shl10Start)
   1581 #ifdef USE_AS_STRNCPY
   1582 	sub	$64, %ebx
   1583 	jbe	L(StrncpyLeave10)
   1584 #endif
   1585 	palignr	$10, %xmm2, %xmm3
   1586 	lea	64(%ecx), %ecx
   1587 	palignr	$10, %xmm1, %xmm2
   1588 	movaps	%xmm7, %xmm1
   1589 	movaps	%xmm5, 48(%edx)
   1590 	movaps	%xmm4, 32(%edx)
   1591 	movaps	%xmm3, 16(%edx)
   1592 	movaps	%xmm2, (%edx)
   1593 	lea	64(%edx), %edx
   1594 	jmp	L(Shl10LoopStart)
   1595 
   1596 L(Shl10LoopExit):
   1597 	movlpd	-2(%ecx), %xmm0
   1598 	movlpd	%xmm0, -2(%edx)
   1599 	mov	$6, %esi
   1600 	jmp	L(CopyFrom1To16Bytes)
   1601 
   1602 	.p2align 4
   1603 L(Shl11):
   1604 	movaps	-11(%ecx), %xmm1
   1605 	movaps	5(%ecx), %xmm2
   1606 L(Shl11Start):
   1607 	pcmpeqb	%xmm2, %xmm0
   1608 	pmovmskb %xmm0, %eax
   1609 	movaps	%xmm2, %xmm3
   1610 #ifdef USE_AS_STRNCPY
   1611 	sub	$16, %ebx
   1612 	jbe	L(StrncpyExit11Case2OrCase3)
   1613 #endif
   1614 	test	%eax, %eax
   1615 	jnz	L(Shl11LoopExit)
   1616 
   1617 	palignr	$11, %xmm1, %xmm2
   1618 	movaps	%xmm3, %xmm1
   1619 	movaps	%xmm2, (%edx)
   1620 	movaps	21(%ecx), %xmm2
   1621 
   1622 	pcmpeqb	%xmm2, %xmm0
   1623 	lea	16(%edx), %edx
   1624 	pmovmskb %xmm0, %eax
   1625 	lea	16(%ecx), %ecx
   1626 	movaps	%xmm2, %xmm3
   1627 #ifdef USE_AS_STRNCPY
   1628 	sub	$16, %ebx
   1629 	jbe	L(StrncpyExit11Case2OrCase3)
   1630 #endif
   1631 	test	%eax, %eax
   1632 	jnz	L(Shl11LoopExit)
   1633 
   1634 	palignr	$11, %xmm1, %xmm2
   1635 	movaps	%xmm2, (%edx)
   1636 	movaps	21(%ecx), %xmm2
   1637 	movaps	%xmm3, %xmm1
   1638 
   1639 	pcmpeqb	%xmm2, %xmm0
   1640 	lea	16(%edx), %edx
   1641 	pmovmskb %xmm0, %eax
   1642 	lea	16(%ecx), %ecx
   1643 	movaps	%xmm2, %xmm3
   1644 #ifdef USE_AS_STRNCPY
   1645 	sub	$16, %ebx
   1646 	jbe	L(StrncpyExit11Case2OrCase3)
   1647 #endif
   1648 	test	%eax, %eax
   1649 	jnz	L(Shl11LoopExit)
   1650 
   1651 	palignr	$11, %xmm1, %xmm2
   1652 	movaps	%xmm2, (%edx)
   1653 	movaps	21(%ecx), %xmm2
   1654 
   1655 	pcmpeqb	%xmm2, %xmm0
   1656 	lea	16(%edx), %edx
   1657 	pmovmskb %xmm0, %eax
   1658 	lea	16(%ecx), %ecx
   1659 #ifdef USE_AS_STRNCPY
   1660 	sub	$16, %ebx
   1661 	jbe	L(StrncpyExit11Case2OrCase3)
   1662 #endif
   1663 	test	%eax, %eax
   1664 	jnz	L(Shl11LoopExit)
   1665 
   1666 	palignr	$11, %xmm3, %xmm2
   1667 	movaps	%xmm2, (%edx)
   1668 	lea	21(%ecx), %ecx
   1669 	lea	16(%edx), %edx
   1670 
   1671 	mov	%ecx, %eax
   1672 	and	$-0x40, %ecx
   1673 	sub	%ecx, %eax
   1674 	lea	-5(%ecx), %ecx
   1675 	sub	%eax, %edx
   1676 #ifdef USE_AS_STRNCPY
   1677 	add	%eax, %ebx
   1678 #endif
   1679 	movaps	-11(%ecx), %xmm1
   1680 
   1681 L(Shl11LoopStart):
   1682 	movaps	5(%ecx), %xmm2
   1683 	movaps	21(%ecx), %xmm3
   1684 	movaps	%xmm3, %xmm6
   1685 	movaps	37(%ecx), %xmm4
   1686 	movaps	%xmm4, %xmm7
   1687 	movaps	53(%ecx), %xmm5
   1688 	pminub	%xmm2, %xmm6
   1689 	pminub	%xmm5, %xmm7
   1690 	pminub	%xmm6, %xmm7
   1691 	pcmpeqb	%xmm0, %xmm7
   1692 	pmovmskb %xmm7, %eax
   1693 	movaps	%xmm5, %xmm7
   1694 	palignr	$11, %xmm4, %xmm5
   1695 	palignr	$11, %xmm3, %xmm4
   1696 	test	%eax, %eax
   1697 	jnz	L(Shl11Start)
   1698 #ifdef USE_AS_STRNCPY
   1699 	sub	$64, %ebx
   1700 	jbe	L(StrncpyLeave11)
   1701 #endif
   1702 	palignr	$11, %xmm2, %xmm3
   1703 	lea	64(%ecx), %ecx
   1704 	palignr	$11, %xmm1, %xmm2
   1705 	movaps	%xmm7, %xmm1
   1706 	movaps	%xmm5, 48(%edx)
   1707 	movaps	%xmm4, 32(%edx)
   1708 	movaps	%xmm3, 16(%edx)
   1709 	movaps	%xmm2, (%edx)
   1710 	lea	64(%edx), %edx
   1711 	jmp	L(Shl11LoopStart)
   1712 
   1713 L(Shl11LoopExit):
   1714 	movlpd	-3(%ecx), %xmm0
   1715 	movlpd	%xmm0, -3(%edx)
   1716 	mov	$5, %esi
   1717 	jmp	L(CopyFrom1To16Bytes)
   1718 
   1719 	.p2align 4
   1720 L(Shl12):
   1721 	movaps	-12(%ecx), %xmm1
   1722 	movaps	4(%ecx), %xmm2
   1723 L(Shl12Start):
   1724 	pcmpeqb	%xmm2, %xmm0
   1725 	pmovmskb %xmm0, %eax
   1726 	movaps	%xmm2, %xmm3
   1727 #ifdef USE_AS_STRNCPY
   1728 	sub	$16, %ebx
   1729 	jbe	L(StrncpyExit12Case2OrCase3)
   1730 #endif
   1731 	test	%eax, %eax
   1732 	jnz	L(Shl12LoopExit)
   1733 
   1734 	palignr	$12, %xmm1, %xmm2
   1735 	movaps	%xmm3, %xmm1
   1736 	movaps	%xmm2, (%edx)
   1737 	movaps	20(%ecx), %xmm2
   1738 
   1739 	pcmpeqb	%xmm2, %xmm0
   1740 	lea	16(%edx), %edx
   1741 	pmovmskb %xmm0, %eax
   1742 	lea	16(%ecx), %ecx
   1743 	movaps	%xmm2, %xmm3
   1744 #ifdef USE_AS_STRNCPY
   1745 	sub	$16, %ebx
   1746 	jbe	L(StrncpyExit12Case2OrCase3)
   1747 #endif
   1748 	test	%eax, %eax
   1749 	jnz	L(Shl12LoopExit)
   1750 
   1751 	palignr	$12, %xmm1, %xmm2
   1752 	movaps	%xmm2, (%edx)
   1753 	movaps	20(%ecx), %xmm2
   1754 	movaps	%xmm3, %xmm1
   1755 
   1756 	pcmpeqb	%xmm2, %xmm0
   1757 	lea	16(%edx), %edx
   1758 	pmovmskb %xmm0, %eax
   1759 	lea	16(%ecx), %ecx
   1760 	movaps	%xmm2, %xmm3
   1761 #ifdef USE_AS_STRNCPY
   1762 	sub	$16, %ebx
   1763 	jbe	L(StrncpyExit12Case2OrCase3)
   1764 #endif
   1765 	test	%eax, %eax
   1766 	jnz	L(Shl12LoopExit)
   1767 
   1768 	palignr	$12, %xmm1, %xmm2
   1769 	movaps	%xmm2, (%edx)
   1770 	movaps	20(%ecx), %xmm2
   1771 
   1772 	pcmpeqb	%xmm2, %xmm0
   1773 	lea	16(%edx), %edx
   1774 	pmovmskb %xmm0, %eax
   1775 	lea	16(%ecx), %ecx
   1776 #ifdef USE_AS_STRNCPY
   1777 	sub	$16, %ebx
   1778 	jbe	L(StrncpyExit12Case2OrCase3)
   1779 #endif
   1780 	test	%eax, %eax
   1781 	jnz	L(Shl12LoopExit)
   1782 
   1783 	palignr	$12, %xmm3, %xmm2
   1784 	movaps	%xmm2, (%edx)
   1785 	lea	20(%ecx), %ecx
   1786 	lea	16(%edx), %edx
   1787 
   1788 	mov	%ecx, %eax
   1789 	and	$-0x40, %ecx
   1790 	sub	%ecx, %eax
   1791 	lea	-4(%ecx), %ecx
   1792 	sub	%eax, %edx
   1793 #ifdef USE_AS_STRNCPY
   1794 	add	%eax, %ebx
   1795 #endif
   1796 	movaps	-12(%ecx), %xmm1
   1797 
   1798 L(Shl12LoopStart):
   1799 	movaps	4(%ecx), %xmm2
   1800 	movaps	20(%ecx), %xmm3
   1801 	movaps	%xmm3, %xmm6
   1802 	movaps	36(%ecx), %xmm4
   1803 	movaps	%xmm4, %xmm7
   1804 	movaps	52(%ecx), %xmm5
   1805 	pminub	%xmm2, %xmm6
   1806 	pminub	%xmm5, %xmm7
   1807 	pminub	%xmm6, %xmm7
   1808 	pcmpeqb	%xmm0, %xmm7
   1809 	pmovmskb %xmm7, %eax
   1810 	movaps	%xmm5, %xmm7
   1811 	palignr	$12, %xmm4, %xmm5
   1812 	palignr	$12, %xmm3, %xmm4
   1813 	test	%eax, %eax
   1814 	jnz	L(Shl12Start)
   1815 #ifdef USE_AS_STRNCPY
   1816 	sub	$64, %ebx
   1817 	jbe	L(StrncpyLeave12)
   1818 #endif
   1819 	palignr	$12, %xmm2, %xmm3
   1820 	lea	64(%ecx), %ecx
   1821 	palignr	$12, %xmm1, %xmm2
   1822 	movaps	%xmm7, %xmm1
   1823 	movaps	%xmm5, 48(%edx)
   1824 	movaps	%xmm4, 32(%edx)
   1825 	movaps	%xmm3, 16(%edx)
   1826 	movaps	%xmm2, (%edx)
   1827 	lea	64(%edx), %edx
   1828 	jmp	L(Shl12LoopStart)
   1829 
   1830 L(Shl12LoopExit):
   1831 	movl	(%ecx), %esi
   1832 	movl	%esi, (%edx)
   1833 	mov	$4, %esi
   1834 	jmp	L(CopyFrom1To16Bytes)
   1835 
   1836 	.p2align 4
   1837 L(Shl13):
   1838 	movaps	-13(%ecx), %xmm1
   1839 	movaps	3(%ecx), %xmm2
   1840 L(Shl13Start):
   1841 	pcmpeqb	%xmm2, %xmm0
   1842 	pmovmskb %xmm0, %eax
   1843 	movaps	%xmm2, %xmm3
   1844 #ifdef USE_AS_STRNCPY
   1845 	sub	$16, %ebx
   1846 	jbe	L(StrncpyExit13Case2OrCase3)
   1847 #endif
   1848 	test	%eax, %eax
   1849 	jnz	L(Shl13LoopExit)
   1850 
   1851 	palignr	$13, %xmm1, %xmm2
   1852 	movaps	%xmm3, %xmm1
   1853 	movaps	%xmm2, (%edx)
   1854 	movaps	19(%ecx), %xmm2
   1855 
   1856 	pcmpeqb	%xmm2, %xmm0
   1857 	lea	16(%edx), %edx
   1858 	pmovmskb %xmm0, %eax
   1859 	lea	16(%ecx), %ecx
   1860 	movaps	%xmm2, %xmm3
   1861 #ifdef USE_AS_STRNCPY
   1862 	sub	$16, %ebx
   1863 	jbe	L(StrncpyExit13Case2OrCase3)
   1864 #endif
   1865 	test	%eax, %eax
   1866 	jnz	L(Shl13LoopExit)
   1867 
   1868 	palignr	$13, %xmm1, %xmm2
   1869 	movaps	%xmm2, (%edx)
   1870 	movaps	19(%ecx), %xmm2
   1871 	movaps	%xmm3, %xmm1
   1872 
   1873 	pcmpeqb	%xmm2, %xmm0
   1874 	lea	16(%edx), %edx
   1875 	pmovmskb %xmm0, %eax
   1876 	lea	16(%ecx), %ecx
   1877 	movaps	%xmm2, %xmm3
   1878 #ifdef USE_AS_STRNCPY
   1879 	sub	$16, %ebx
   1880 	jbe	L(StrncpyExit13Case2OrCase3)
   1881 #endif
   1882 	test	%eax, %eax
   1883 	jnz	L(Shl13LoopExit)
   1884 
   1885 	palignr	$13, %xmm1, %xmm2
   1886 	movaps	%xmm2, (%edx)
   1887 	movaps	19(%ecx), %xmm2
   1888 
   1889 	pcmpeqb	%xmm2, %xmm0
   1890 	lea	16(%edx), %edx
   1891 	pmovmskb %xmm0, %eax
   1892 	lea	16(%ecx), %ecx
   1893 #ifdef USE_AS_STRNCPY
   1894 	sub	$16, %ebx
   1895 	jbe	L(StrncpyExit13Case2OrCase3)
   1896 #endif
   1897 	test	%eax, %eax
   1898 	jnz	L(Shl13LoopExit)
   1899 
   1900 	palignr	$13, %xmm3, %xmm2
   1901 	movaps	%xmm2, (%edx)
   1902 	lea	19(%ecx), %ecx
   1903 	lea	16(%edx), %edx
   1904 
   1905 	mov	%ecx, %eax
   1906 	and	$-0x40, %ecx
   1907 	sub	%ecx, %eax
   1908 	lea	-3(%ecx), %ecx
   1909 	sub	%eax, %edx
   1910 #ifdef USE_AS_STRNCPY
   1911 	add	%eax, %ebx
   1912 #endif
   1913 	movaps	-13(%ecx), %xmm1
   1914 
   1915 L(Shl13LoopStart):
   1916 	movaps	3(%ecx), %xmm2
   1917 	movaps	19(%ecx), %xmm3
   1918 	movaps	%xmm3, %xmm6
   1919 	movaps	35(%ecx), %xmm4
   1920 	movaps	%xmm4, %xmm7
   1921 	movaps	51(%ecx), %xmm5
   1922 	pminub	%xmm2, %xmm6
   1923 	pminub	%xmm5, %xmm7
   1924 	pminub	%xmm6, %xmm7
   1925 	pcmpeqb	%xmm0, %xmm7
   1926 	pmovmskb %xmm7, %eax
   1927 	movaps	%xmm5, %xmm7
   1928 	palignr	$13, %xmm4, %xmm5
   1929 	palignr	$13, %xmm3, %xmm4
   1930 	test	%eax, %eax
   1931 	jnz	L(Shl13Start)
   1932 #ifdef USE_AS_STRNCPY
   1933 	sub	$64, %ebx
   1934 	jbe	L(StrncpyLeave13)
   1935 #endif
   1936 	palignr	$13, %xmm2, %xmm3
   1937 	lea	64(%ecx), %ecx
   1938 	palignr	$13, %xmm1, %xmm2
   1939 	movaps	%xmm7, %xmm1
   1940 	movaps	%xmm5, 48(%edx)
   1941 	movaps	%xmm4, 32(%edx)
   1942 	movaps	%xmm3, 16(%edx)
   1943 	movaps	%xmm2, (%edx)
   1944 	lea	64(%edx), %edx
   1945 	jmp	L(Shl13LoopStart)
   1946 
   1947 L(Shl13LoopExit):
   1948 	movl	-1(%ecx), %esi
   1949 	movl	%esi, -1(%edx)
   1950 	mov	$3, %esi
   1951 	jmp	L(CopyFrom1To16Bytes)
   1952 
   1953 	.p2align 4
   1954 L(Shl14):
   1955 	movaps	-14(%ecx), %xmm1
   1956 	movaps	2(%ecx), %xmm2
   1957 L(Shl14Start):
   1958 	pcmpeqb	%xmm2, %xmm0
   1959 	pmovmskb %xmm0, %eax
   1960 	movaps	%xmm2, %xmm3
   1961 #ifdef USE_AS_STRNCPY
   1962 	sub	$16, %ebx
   1963 	jbe	L(StrncpyExit14Case2OrCase3)
   1964 #endif
   1965 	test	%eax, %eax
   1966 	jnz	L(Shl14LoopExit)
   1967 
   1968 	palignr	$14, %xmm1, %xmm2
   1969 	movaps	%xmm3, %xmm1
   1970 	movaps	%xmm2, (%edx)
   1971 	movaps	18(%ecx), %xmm2
   1972 
   1973 	pcmpeqb	%xmm2, %xmm0
   1974 	lea	16(%edx), %edx
   1975 	pmovmskb %xmm0, %eax
   1976 	lea	16(%ecx), %ecx
   1977 	movaps	%xmm2, %xmm3
   1978 #ifdef USE_AS_STRNCPY
   1979 	sub	$16, %ebx
   1980 	jbe	L(StrncpyExit14Case2OrCase3)
   1981 #endif
   1982 	test	%eax, %eax
   1983 	jnz	L(Shl14LoopExit)
   1984 
   1985 	palignr	$14, %xmm1, %xmm2
   1986 	movaps	%xmm2, (%edx)
   1987 	movaps	18(%ecx), %xmm2
   1988 	movaps	%xmm3, %xmm1
   1989 
   1990 	pcmpeqb	%xmm2, %xmm0
   1991 	lea	16(%edx), %edx
   1992 	pmovmskb %xmm0, %eax
   1993 	lea	16(%ecx), %ecx
   1994 	movaps	%xmm2, %xmm3
   1995 #ifdef USE_AS_STRNCPY
   1996 	sub	$16, %ebx
   1997 	jbe	L(StrncpyExit14Case2OrCase3)
   1998 #endif
   1999 	test	%eax, %eax
   2000 	jnz	L(Shl14LoopExit)
   2001 
   2002 	palignr	$14, %xmm1, %xmm2
   2003 	movaps	%xmm2, (%edx)
   2004 	movaps	18(%ecx), %xmm2
   2005 
   2006 	pcmpeqb	%xmm2, %xmm0
   2007 	lea	16(%edx), %edx
   2008 	pmovmskb %xmm0, %eax
   2009 	lea	16(%ecx), %ecx
   2010 #ifdef USE_AS_STRNCPY
   2011 	sub	$16, %ebx
   2012 	jbe	L(StrncpyExit14Case2OrCase3)
   2013 #endif
   2014 	test	%eax, %eax
   2015 	jnz	L(Shl14LoopExit)
   2016 
   2017 	palignr	$14, %xmm3, %xmm2
   2018 	movaps	%xmm2, (%edx)
   2019 	lea	18(%ecx), %ecx
   2020 	lea	16(%edx), %edx
   2021 
   2022 	mov	%ecx, %eax
   2023 	and	$-0x40, %ecx
   2024 	sub	%ecx, %eax
   2025 	lea	-2(%ecx), %ecx
   2026 	sub	%eax, %edx
   2027 #ifdef USE_AS_STRNCPY
   2028 	add	%eax, %ebx
   2029 #endif
   2030 	movaps	-14(%ecx), %xmm1
   2031 
   2032 L(Shl14LoopStart):
   2033 	movaps	2(%ecx), %xmm2
   2034 	movaps	18(%ecx), %xmm3
   2035 	movaps	%xmm3, %xmm6
   2036 	movaps	34(%ecx), %xmm4
   2037 	movaps	%xmm4, %xmm7
   2038 	movaps	50(%ecx), %xmm5
   2039 	pminub	%xmm2, %xmm6
   2040 	pminub	%xmm5, %xmm7
   2041 	pminub	%xmm6, %xmm7
   2042 	pcmpeqb	%xmm0, %xmm7
   2043 	pmovmskb %xmm7, %eax
   2044 	movaps	%xmm5, %xmm7
   2045 	palignr	$14, %xmm4, %xmm5
   2046 	palignr	$14, %xmm3, %xmm4
   2047 	test	%eax, %eax
   2048 	jnz	L(Shl14Start)
   2049 #ifdef USE_AS_STRNCPY
   2050 	sub	$64, %ebx
   2051 	jbe	L(StrncpyLeave14)
   2052 #endif
   2053 	palignr	$14, %xmm2, %xmm3
   2054 	lea	64(%ecx), %ecx
   2055 	palignr	$14, %xmm1, %xmm2
   2056 	movaps	%xmm7, %xmm1
   2057 	movaps	%xmm5, 48(%edx)
   2058 	movaps	%xmm4, 32(%edx)
   2059 	movaps	%xmm3, 16(%edx)
   2060 	movaps	%xmm2, (%edx)
   2061 	lea	64(%edx), %edx
   2062 	jmp	L(Shl14LoopStart)
   2063 
   2064 L(Shl14LoopExit):
   2065 	movl	-2(%ecx), %esi
   2066 	movl	%esi, -2(%edx)
   2067 	mov	$2, %esi
   2068 	jmp	L(CopyFrom1To16Bytes)
   2069 
   2070 	.p2align 4
   2071 L(Shl15):
   2072 	movaps	-15(%ecx), %xmm1
   2073 	movaps	1(%ecx), %xmm2
   2074 L(Shl15Start):
   2075 	pcmpeqb	%xmm2, %xmm0
   2076 	pmovmskb %xmm0, %eax
   2077 	movaps	%xmm2, %xmm3
   2078 #ifdef USE_AS_STRNCPY
   2079 	sub	$16, %ebx
   2080 	jbe	L(StrncpyExit15Case2OrCase3)
   2081 #endif
   2082 	test	%eax, %eax
   2083 	jnz	L(Shl15LoopExit)
   2084 
   2085 	palignr	$15, %xmm1, %xmm2
   2086 	movaps	%xmm3, %xmm1
   2087 	movaps	%xmm2, (%edx)
   2088 	movaps	17(%ecx), %xmm2
   2089 
   2090 	pcmpeqb	%xmm2, %xmm0
   2091 	lea	16(%edx), %edx
   2092 	pmovmskb %xmm0, %eax
   2093 	lea	16(%ecx), %ecx
   2094 	movaps	%xmm2, %xmm3
   2095 #ifdef USE_AS_STRNCPY
   2096 	sub	$16, %ebx
   2097 	jbe	L(StrncpyExit15Case2OrCase3)
   2098 #endif
   2099 	test	%eax, %eax
   2100 	jnz	L(Shl15LoopExit)
   2101 
   2102 	palignr	$15, %xmm1, %xmm2
   2103 	movaps	%xmm2, (%edx)
   2104 	movaps	17(%ecx), %xmm2
   2105 	movaps	%xmm3, %xmm1
   2106 
   2107 	pcmpeqb	%xmm2, %xmm0
   2108 	lea	16(%edx), %edx
   2109 	pmovmskb %xmm0, %eax
   2110 	lea	16(%ecx), %ecx
   2111 	movaps	%xmm2, %xmm3
   2112 #ifdef USE_AS_STRNCPY
   2113 	sub	$16, %ebx
   2114 	jbe	L(StrncpyExit15Case2OrCase3)
   2115 #endif
   2116 	test	%eax, %eax
   2117 	jnz	L(Shl15LoopExit)
   2118 
   2119 	palignr	$15, %xmm1, %xmm2
   2120 	movaps	%xmm2, (%edx)
   2121 	movaps	17(%ecx), %xmm2
   2122 
   2123 	pcmpeqb	%xmm2, %xmm0
   2124 	lea	16(%edx), %edx
   2125 	pmovmskb %xmm0, %eax
   2126 	lea	16(%ecx), %ecx
   2127 #ifdef USE_AS_STRNCPY
   2128 	sub	$16, %ebx
   2129 	jbe	L(StrncpyExit15Case2OrCase3)
   2130 #endif
   2131 	test	%eax, %eax
   2132 	jnz	L(Shl15LoopExit)
   2133 
   2134 	palignr	$15, %xmm3, %xmm2
   2135 	movaps	%xmm2, (%edx)
   2136 	lea	17(%ecx), %ecx
   2137 	lea	16(%edx), %edx
   2138 
   2139 	mov	%ecx, %eax
   2140 	and	$-0x40, %ecx
   2141 	sub	%ecx, %eax
   2142 	lea	-1(%ecx), %ecx
   2143 	sub	%eax, %edx
   2144 #ifdef USE_AS_STRNCPY
   2145 	add	%eax, %ebx
   2146 #endif
   2147 	movaps	-15(%ecx), %xmm1
   2148 
   2149 L(Shl15LoopStart):
   2150 	movaps	1(%ecx), %xmm2
   2151 	movaps	17(%ecx), %xmm3
   2152 	movaps	%xmm3, %xmm6
   2153 	movaps	33(%ecx), %xmm4
   2154 	movaps	%xmm4, %xmm7
   2155 	movaps	49(%ecx), %xmm5
   2156 	pminub	%xmm2, %xmm6
   2157 	pminub	%xmm5, %xmm7
   2158 	pminub	%xmm6, %xmm7
   2159 	pcmpeqb	%xmm0, %xmm7
   2160 	pmovmskb %xmm7, %eax
   2161 	movaps	%xmm5, %xmm7
   2162 	palignr	$15, %xmm4, %xmm5
   2163 	palignr	$15, %xmm3, %xmm4
   2164 	test	%eax, %eax
   2165 	jnz	L(Shl15Start)
   2166 #ifdef USE_AS_STRNCPY
   2167 	sub	$64, %ebx
   2168 	jbe	L(StrncpyLeave15)
   2169 #endif
   2170 	palignr	$15, %xmm2, %xmm3
   2171 	lea	64(%ecx), %ecx
   2172 	palignr	$15, %xmm1, %xmm2
   2173 	movaps	%xmm7, %xmm1
   2174 	movaps	%xmm5, 48(%edx)
   2175 	movaps	%xmm4, 32(%edx)
   2176 	movaps	%xmm3, 16(%edx)
   2177 	movaps	%xmm2, (%edx)
   2178 	lea	64(%edx), %edx
   2179 	jmp	L(Shl15LoopStart)
   2180 
   2181 L(Shl15LoopExit):
   2182 	movl	-3(%ecx), %esi
   2183 	movl	%esi, -3(%edx)
   2184 	mov	$1, %esi
   2185 #if defined USE_AS_STRCAT || defined USE_AS_STRLCPY
   2186 	jmp	L(CopyFrom1To16Bytes)
   2187 #endif
   2188 
   2189 
   2190 #if !defined USE_AS_STRCAT && !defined USE_AS_STRLCPY
   2191 
   2192 	.p2align 4
   2193 L(CopyFrom1To16Bytes):
   2194 # ifdef USE_AS_STRNCPY
   2195 	add	$16, %ebx
   2196 # endif
   2197 	add	%esi, %edx
   2198 	add	%esi, %ecx
   2199 
   2200 	POP	(%esi)
   2201 	test	%al, %al
   2202 	jz	L(ExitHigh8)
   2203 
   2204 L(CopyFrom1To16BytesLess8):
   2205 	mov	%al, %ah
   2206 	and	$15, %ah
   2207 	jz	L(ExitHigh4)
   2208 
   2209 	test	$0x01, %al
   2210 	jnz	L(Exit1)
   2211 	test	$0x02, %al
   2212 	jnz	L(Exit2)
   2213 	test	$0x04, %al
   2214 	jnz	L(Exit3)
   2215 
   2216 	.p2align 4
   2217 L(Exit4):
   2218 	movl	(%ecx), %eax
   2219 	movl	%eax, (%edx)
   2220 	SAVE_RESULT	(3)
   2221 # ifdef USE_AS_STRNCPY
   2222 	sub	$4, %ebx
   2223 	lea	4(%edx), %ecx
   2224 	jnz	L(StrncpyFillTailWithZero1)
   2225 #  ifdef USE_AS_STPCPY
   2226 	cmpb	$1, (%eax)
   2227 	sbb	$-1, %eax
   2228 #  endif
   2229 # endif
   2230 	RETURN1
   2231 
   2232 	.p2align 4
   2233 L(ExitHigh4):
   2234 	test	$0x10, %al
   2235 	jnz	L(Exit5)
   2236 	test	$0x20, %al
   2237 	jnz	L(Exit6)
   2238 	test	$0x40, %al
   2239 	jnz	L(Exit7)
   2240 
   2241 	.p2align 4
   2242 L(Exit8):
   2243 	movlpd	(%ecx), %xmm0
   2244 	movlpd	%xmm0, (%edx)
   2245 	SAVE_RESULT	(7)
   2246 # ifdef USE_AS_STRNCPY
   2247 	sub	$8, %ebx
   2248 	lea	8(%edx), %ecx
   2249 	jnz	L(StrncpyFillTailWithZero1)
   2250 #  ifdef USE_AS_STPCPY
   2251 	cmpb	$1, (%eax)
   2252 	sbb	$-1, %eax
   2253 #  endif
   2254 # endif
   2255 	RETURN1
   2256 
   2257 	.p2align 4
   2258 L(ExitHigh8):
   2259 	mov	%ah, %al
   2260 	and	$15, %al
   2261 	jz	L(ExitHigh12)
   2262 
   2263 	test	$0x01, %ah
   2264 	jnz	L(Exit9)
   2265 	test	$0x02, %ah
   2266 	jnz	L(Exit10)
   2267 	test	$0x04, %ah
   2268 	jnz	L(Exit11)
   2269 
   2270 	.p2align 4
   2271 L(Exit12):
   2272 	movlpd	(%ecx), %xmm0
   2273 	movl	8(%ecx), %eax
   2274 	movlpd	%xmm0, (%edx)
   2275 	movl	%eax, 8(%edx)
   2276 	SAVE_RESULT	(11)
   2277 # ifdef USE_AS_STRNCPY
   2278 	sub	$12, %ebx
   2279 	lea	12(%edx), %ecx
   2280 	jnz	L(StrncpyFillTailWithZero1)
   2281 #  ifdef USE_AS_STPCPY
   2282 	cmpb	$1, (%eax)
   2283 	sbb	$-1, %eax
   2284 #  endif
   2285 # endif
   2286 	RETURN1
   2287 
   2288 	.p2align 4
   2289 L(ExitHigh12):
   2290 	test	$0x10, %ah
   2291 	jnz	L(Exit13)
   2292 	test	$0x20, %ah
   2293 	jnz	L(Exit14)
   2294 	test	$0x40, %ah
   2295 	jnz	L(Exit15)
   2296 
   2297 	.p2align 4
   2298 L(Exit16):
   2299 	movdqu	(%ecx), %xmm0
   2300 	movdqu	%xmm0, (%edx)
   2301 	SAVE_RESULT	(15)
   2302 # ifdef USE_AS_STRNCPY
   2303 	sub	$16, %ebx
   2304 	lea	16(%edx), %ecx
   2305 	jnz	L(StrncpyFillTailWithZero1)
   2306 #  ifdef USE_AS_STPCPY
   2307 	cmpb	$1, (%eax)
   2308 	sbb	$-1, %eax
   2309 #  endif
   2310 # endif
   2311 	RETURN1
   2312 
   2313 #  ifdef USE_AS_STRNCPY
   2314 
   2315 	CFI_PUSH(%esi)
   2316 
   2317 	.p2align 4
   2318 L(CopyFrom1To16BytesCase2):
   2319 	add	$16, %ebx
   2320 	add	%esi, %ecx
   2321 	add	%esi, %edx
   2322 
   2323 	POP	(%esi)
   2324 
   2325 	test	%al, %al
   2326 	jz	L(ExitHighCase2)
   2327 
   2328 	cmp	$8, %ebx
   2329 	ja	L(CopyFrom1To16BytesLess8)
   2330 
   2331 	test	$0x01, %al
   2332 	jnz	L(Exit1)
   2333 	cmp	$1, %ebx
   2334 	je	L(Exit1)
   2335 	test	$0x02, %al
   2336 	jnz	L(Exit2)
   2337 	cmp	$2, %ebx
   2338 	je	L(Exit2)
   2339 	test	$0x04, %al
   2340 	jnz	L(Exit3)
   2341 	cmp	$3, %ebx
   2342 	je	L(Exit3)
   2343 	test	$0x08, %al
   2344 	jnz	L(Exit4)
   2345 	cmp	$4, %ebx
   2346 	je	L(Exit4)
   2347 	test	$0x10, %al
   2348 	jnz	L(Exit5)
   2349 	cmp	$5, %ebx
   2350 	je	L(Exit5)
   2351 	test	$0x20, %al
   2352 	jnz	L(Exit6)
   2353 	cmp	$6, %ebx
   2354 	je	L(Exit6)
   2355 	test	$0x40, %al
   2356 	jnz	L(Exit7)
   2357 	cmp	$7, %ebx
   2358 	je	L(Exit7)
   2359 	jmp	L(Exit8)
   2360 
   2361 	.p2align 4
   2362 L(ExitHighCase2):
   2363 	cmp	$8, %ebx
   2364 	jbe	L(CopyFrom1To16BytesLess8Case3)
   2365 
   2366 	test	$0x01, %ah
   2367 	jnz	L(Exit9)
   2368 	cmp	$9, %ebx
   2369 	je	L(Exit9)
   2370 	test	$0x02, %ah
   2371 	jnz	L(Exit10)
   2372 	cmp	$10, %ebx
   2373 	je	L(Exit10)
   2374 	test	$0x04, %ah
   2375 	jnz	L(Exit11)
   2376 	cmp	$11, %ebx
   2377 	je	L(Exit11)
   2378 	test	$0x8, %ah
   2379 	jnz	L(Exit12)
   2380 	cmp	$12, %ebx
   2381 	je	L(Exit12)
   2382 	test	$0x10, %ah
   2383 	jnz	L(Exit13)
   2384 	cmp	$13, %ebx
   2385 	je	L(Exit13)
   2386 	test	$0x20, %ah
   2387 	jnz	L(Exit14)
   2388 	cmp	$14, %ebx
   2389 	je	L(Exit14)
   2390 	test	$0x40, %ah
   2391 	jnz	L(Exit15)
   2392 	cmp	$15, %ebx
   2393 	je	L(Exit15)
   2394 	jmp	L(Exit16)
   2395 
   2396 	CFI_PUSH(%esi)
   2397 
   2398 	.p2align 4
   2399 L(CopyFrom1To16BytesCase2OrCase3):
   2400 	test	%eax, %eax
   2401 	jnz	L(CopyFrom1To16BytesCase2)
   2402 
   2403 	.p2align 4
   2404 L(CopyFrom1To16BytesCase3):
   2405 	add	$16, %ebx
   2406 	add	%esi, %edx
   2407 	add	%esi, %ecx
   2408 
   2409 	POP	(%esi)
   2410 
   2411 	cmp	$8, %ebx
   2412 	ja	L(ExitHigh8Case3)
   2413 
   2414 L(CopyFrom1To16BytesLess8Case3):
   2415 	cmp	$4, %ebx
   2416 	ja	L(ExitHigh4Case3)
   2417 
   2418 	cmp	$1, %ebx
   2419 	je	L(Exit1)
   2420 	cmp	$2, %ebx
   2421 	je	L(Exit2)
   2422 	cmp	$3, %ebx
   2423 	je	L(Exit3)
   2424 	movl	(%ecx), %eax
   2425 	movl	%eax, (%edx)
   2426 	SAVE_RESULT	(4)
   2427 	RETURN1
   2428 
   2429 	.p2align 4
   2430 L(ExitHigh4Case3):
   2431 	cmp	$5, %ebx
   2432 	je	L(Exit5)
   2433 	cmp	$6, %ebx
   2434 	je	L(Exit6)
   2435 	cmp	$7, %ebx
   2436 	je	L(Exit7)
   2437 	movlpd	(%ecx), %xmm0
   2438 	movlpd	%xmm0, (%edx)
   2439 	SAVE_RESULT	(8)
   2440 	RETURN1
   2441 
   2442 	.p2align 4
   2443 L(ExitHigh8Case3):
   2444 	cmp	$12, %ebx
   2445 	ja	L(ExitHigh12Case3)
   2446 
   2447 	cmp	$9, %ebx
   2448 	je	L(Exit9)
   2449 	cmp	$10, %ebx
   2450 	je	L(Exit10)
   2451 	cmp	$11, %ebx
   2452 	je	L(Exit11)
   2453 	movlpd	(%ecx), %xmm0
   2454 	movl	8(%ecx), %eax
   2455 	movlpd	%xmm0, (%edx)
   2456 	movl	%eax, 8(%edx)
   2457 	SAVE_RESULT	(12)
   2458 	RETURN1
   2459 
   2460 	.p2align 4
   2461 L(ExitHigh12Case3):
   2462 	cmp	$13, %ebx
   2463 	je	L(Exit13)
   2464 	cmp	$14, %ebx
   2465 	je	L(Exit14)
   2466 	cmp	$15, %ebx
   2467 	je	L(Exit15)
   2468 	movlpd	(%ecx), %xmm0
   2469 	movlpd	8(%ecx), %xmm1
   2470 	movlpd	%xmm0, (%edx)
   2471 	movlpd	%xmm1, 8(%edx)
   2472 	SAVE_RESULT	(16)
   2473 	RETURN1
   2474 
   2475 # endif
   2476 
   2477 	.p2align 4
   2478 L(Exit1):
   2479 	movb	(%ecx), %al
   2480 	movb	%al, (%edx)
   2481 	SAVE_RESULT	(0)
   2482 # ifdef USE_AS_STRNCPY
   2483 	sub	$1, %ebx
   2484 	lea	1(%edx), %ecx
   2485 	jnz	L(StrncpyFillTailWithZero1)
   2486 #  ifdef USE_AS_STPCPY
   2487 	cmpb	$1, (%eax)
   2488 	sbb	$-1, %eax
   2489 #  endif
   2490 # endif
   2491 	RETURN1
   2492 
   2493 	.p2align 4
   2494 L(Exit2):
   2495 	movw	(%ecx), %ax
   2496 	movw	%ax, (%edx)
   2497 	SAVE_RESULT	(1)
   2498 # ifdef USE_AS_STRNCPY
   2499 	sub	$2, %ebx
   2500 	lea	2(%edx), %ecx
   2501 	jnz	L(StrncpyFillTailWithZero1)
   2502 #  ifdef USE_AS_STPCPY
   2503 	cmpb	$1, (%eax)
   2504 	sbb	$-1, %eax
   2505 #  endif
   2506 # endif
   2507 	RETURN1
   2508 
   2509 	.p2align 4
   2510 L(Exit3):
   2511 	movw	(%ecx), %ax
   2512 	movw	%ax, (%edx)
   2513 	movb	2(%ecx), %al
   2514 	movb	%al, 2(%edx)
   2515 	SAVE_RESULT	(2)
   2516 # ifdef USE_AS_STRNCPY
   2517 	sub	$3, %ebx
   2518 	lea	3(%edx), %ecx
   2519 	jnz	L(StrncpyFillTailWithZero1)
   2520 #  ifdef USE_AS_STPCPY
   2521 	cmpb	$1, (%eax)
   2522 	sbb	$-1, %eax
   2523 #  endif
   2524 # endif
   2525 	RETURN1
   2526 
   2527 	.p2align 4
   2528 L(Exit5):
   2529 	movl	(%ecx), %eax
   2530 	movl	%eax, (%edx)
   2531 	movb	4(%ecx), %al
   2532 	movb	%al, 4(%edx)
   2533 	SAVE_RESULT	(4)
   2534 # ifdef USE_AS_STRNCPY
   2535 	sub	$5, %ebx
   2536 	lea	5(%edx), %ecx
   2537 	jnz	L(StrncpyFillTailWithZero1)
   2538 #  ifdef USE_AS_STPCPY
   2539 	cmpb	$1, (%eax)
   2540 	sbb	$-1, %eax
   2541 #  endif
   2542 # endif
   2543 	RETURN1
   2544 
   2545 	.p2align 4
   2546 L(Exit6):
   2547 	movl	(%ecx), %eax
   2548 	movl	%eax, (%edx)
   2549 	movw	4(%ecx), %ax
   2550 	movw	%ax, 4(%edx)
   2551 	SAVE_RESULT	(5)
   2552 # ifdef USE_AS_STRNCPY
   2553 	sub	$6, %ebx
   2554 	lea	6(%edx), %ecx
   2555 	jnz	L(StrncpyFillTailWithZero1)
   2556 #  ifdef USE_AS_STPCPY
   2557 	cmpb	$1, (%eax)
   2558 	sbb	$-1, %eax
   2559 #  endif
   2560 # endif
   2561 	RETURN1
   2562 
   2563 	.p2align 4
   2564 L(Exit7):
   2565 	movl	(%ecx), %eax
   2566 	movl	%eax, (%edx)
   2567 	movl	3(%ecx), %eax
   2568 	movl	%eax, 3(%edx)
   2569 	SAVE_RESULT	(6)
   2570 # ifdef USE_AS_STRNCPY
   2571 	sub	$7, %ebx
   2572 	lea	7(%edx), %ecx
   2573 	jnz	L(StrncpyFillTailWithZero1)
   2574 #  ifdef USE_AS_STPCPY
   2575 	cmpb	$1, (%eax)
   2576 	sbb	$-1, %eax
   2577 #  endif
   2578 # endif
   2579 	RETURN1
   2580 
   2581 	.p2align 4
   2582 L(Exit9):
   2583 	movlpd	(%ecx), %xmm0
   2584 	movb	8(%ecx), %al
   2585 	movlpd	%xmm0, (%edx)
   2586 	movb	%al, 8(%edx)
   2587 	SAVE_RESULT	(8)
   2588 # ifdef USE_AS_STRNCPY
   2589 	sub	$9, %ebx
   2590 	lea	9(%edx), %ecx
   2591 	jnz	L(StrncpyFillTailWithZero1)
   2592 #  ifdef USE_AS_STPCPY
   2593 	cmpb	$1, (%eax)
   2594 	sbb	$-1, %eax
   2595 #  endif
   2596 # endif
   2597 	RETURN1
   2598 
   2599 	.p2align 4
   2600 L(Exit10):
   2601 	movlpd	(%ecx), %xmm0
   2602 	movw	8(%ecx), %ax
   2603 	movlpd	%xmm0, (%edx)
   2604 	movw	%ax, 8(%edx)
   2605 	SAVE_RESULT	(9)
   2606 # ifdef USE_AS_STRNCPY
   2607 	sub	$10, %ebx
   2608 	lea	10(%edx), %ecx
   2609 	jnz	L(StrncpyFillTailWithZero1)
   2610 #  ifdef USE_AS_STPCPY
   2611 	cmpb	$1, (%eax)
   2612 	sbb	$-1, %eax
   2613 #  endif
   2614 # endif
   2615 	RETURN1
   2616 
   2617 	.p2align 4
   2618 L(Exit11):
   2619 	movlpd	(%ecx), %xmm0
   2620 	movl	7(%ecx), %eax
   2621 	movlpd	%xmm0, (%edx)
   2622 	movl	%eax, 7(%edx)
   2623 	SAVE_RESULT	(10)
   2624 # ifdef USE_AS_STRNCPY
   2625 	sub	$11, %ebx
   2626 	lea	11(%edx), %ecx
   2627 	jnz	L(StrncpyFillTailWithZero1)
   2628 #  ifdef USE_AS_STPCPY
   2629 	cmpb	$1, (%eax)
   2630 	sbb	$-1, %eax
   2631 #  endif
   2632 # endif
   2633 	RETURN1
   2634 
   2635 	.p2align 4
   2636 L(Exit13):
   2637 	movlpd	(%ecx), %xmm0
   2638 	movlpd	5(%ecx), %xmm1
   2639 	movlpd	%xmm0, (%edx)
   2640 	movlpd	%xmm1, 5(%edx)
   2641 	SAVE_RESULT	(12)
   2642 # ifdef USE_AS_STRNCPY
   2643 	sub	$13, %ebx
   2644 	lea	13(%edx), %ecx
   2645 	jnz	L(StrncpyFillTailWithZero1)
   2646 #  ifdef USE_AS_STPCPY
   2647 	cmpb	$1, (%eax)
   2648 	sbb	$-1, %eax
   2649 #  endif
   2650 # endif
   2651 	RETURN1
   2652 
   2653 	.p2align 4
   2654 L(Exit14):
   2655 	movlpd	(%ecx), %xmm0
   2656 	movlpd	6(%ecx), %xmm1
   2657 	movlpd	%xmm0, (%edx)
   2658 	movlpd	%xmm1, 6(%edx)
   2659 	SAVE_RESULT	(13)
   2660 # ifdef USE_AS_STRNCPY
   2661 	sub	$14, %ebx
   2662 	lea	14(%edx), %ecx
   2663 	jnz	L(StrncpyFillTailWithZero1)
   2664 #  ifdef USE_AS_STPCPY
   2665 	cmpb	$1, (%eax)
   2666 	sbb	$-1, %eax
   2667 #  endif
   2668 # endif
   2669 	RETURN1
   2670 
   2671 	.p2align 4
   2672 L(Exit15):
   2673 	movlpd	(%ecx), %xmm0
   2674 	movlpd	7(%ecx), %xmm1
   2675 	movlpd	%xmm0, (%edx)
   2676 	movlpd	%xmm1, 7(%edx)
   2677 	SAVE_RESULT	(14)
   2678 # ifdef USE_AS_STRNCPY
   2679 	sub	$15, %ebx
   2680 	lea	15(%edx), %ecx
   2681 	jnz	L(StrncpyFillTailWithZero1)
   2682 #  ifdef USE_AS_STPCPY
   2683 	cmpb	$1, (%eax)
   2684 	sbb	$-1, %eax
   2685 #  endif
   2686 # endif
   2687 	RETURN1
   2688 
   2689 CFI_POP	(%edi)
   2690 
   2691 # ifdef USE_AS_STRNCPY
   2692 	.p2align 4
   2693 L(Fill0):
   2694 	RETURN
   2695 
   2696 	.p2align 4
   2697 L(Fill1):
   2698 	movb	%dl, (%ecx)
   2699 	RETURN
   2700 
   2701 	.p2align 4
   2702 L(Fill2):
   2703 	movw	%dx, (%ecx)
   2704 	RETURN
   2705 
   2706 	.p2align 4
   2707 L(Fill3):
   2708 	movw	%dx, (%ecx)
   2709 	movb	%dl, 2(%ecx)
   2710 	RETURN
   2711 
   2712 	.p2align 4
   2713 L(Fill4):
   2714 	movl	%edx, (%ecx)
   2715 	RETURN
   2716 
   2717 	.p2align 4
   2718 L(Fill5):
   2719 	movl	%edx, (%ecx)
   2720 	movb	%dl, 4(%ecx)
   2721 	RETURN
   2722 
   2723 	.p2align 4
   2724 L(Fill6):
   2725 	movl	%edx, (%ecx)
   2726 	movw	%dx, 4(%ecx)
   2727 	RETURN
   2728 
   2729 	.p2align 4
   2730 L(Fill7):
   2731 	movl	%edx, (%ecx)
   2732 	movl	%edx, 3(%ecx)
   2733 	RETURN
   2734 
   2735 	.p2align 4
   2736 L(Fill8):
   2737 	movlpd	%xmm0, (%ecx)
   2738 	RETURN
   2739 
   2740 	.p2align 4
   2741 L(Fill9):
   2742 	movlpd	%xmm0, (%ecx)
   2743 	movb	%dl, 8(%ecx)
   2744 	RETURN
   2745 
   2746 	.p2align 4
   2747 L(Fill10):
   2748 	movlpd	%xmm0, (%ecx)
   2749 	movw	%dx, 8(%ecx)
   2750 	RETURN
   2751 
   2752 	.p2align 4
   2753 L(Fill11):
   2754 	movlpd	%xmm0, (%ecx)
   2755 	movl	%edx, 7(%ecx)
   2756 	RETURN
   2757 
   2758 	.p2align 4
   2759 L(Fill12):
   2760 	movlpd	%xmm0, (%ecx)
   2761 	movl	%edx, 8(%ecx)
   2762 	RETURN
   2763 
   2764 	.p2align 4
   2765 L(Fill13):
   2766 	movlpd	%xmm0, (%ecx)
   2767 	movlpd	%xmm0, 5(%ecx)
   2768 	RETURN
   2769 
   2770 	.p2align 4
   2771 L(Fill14):
   2772 	movlpd	%xmm0, (%ecx)
   2773 	movlpd	%xmm0, 6(%ecx)
   2774 	RETURN
   2775 
   2776 	.p2align 4
   2777 L(Fill15):
   2778 	movlpd	%xmm0, (%ecx)
   2779 	movlpd	%xmm0, 7(%ecx)
   2780 	RETURN
   2781 
   2782 	.p2align 4
   2783 L(Fill16):
   2784 	movlpd	%xmm0, (%ecx)
   2785 	movlpd	%xmm0, 8(%ecx)
   2786 	RETURN
   2787 
   2788 	.p2align 4
   2789 L(StrncpyFillExit1):
   2790 	lea	16(%ebx), %ebx
   2791 L(FillFrom1To16Bytes):
   2792 	test	%ebx, %ebx
   2793 	jz	L(Fill0)
   2794 	cmp	$16, %ebx
   2795 	je	L(Fill16)
   2796 	cmp	$8, %ebx
   2797 	je	L(Fill8)
   2798 	jg	L(FillMore8)
   2799 	cmp	$4, %ebx
   2800 	je	L(Fill4)
   2801 	jg	L(FillMore4)
   2802 	cmp	$2, %ebx
   2803 	jl	L(Fill1)
   2804 	je	L(Fill2)
   2805 	jg	L(Fill3)
   2806 L(FillMore8):	/* but less than 16 */
   2807 	cmp	$12, %ebx
   2808 	je	L(Fill12)
   2809 	jl	L(FillLess12)
   2810 	cmp	$14, %ebx
   2811 	jl	L(Fill13)
   2812 	je	L(Fill14)
   2813 	jg	L(Fill15)
   2814 L(FillMore4):	/* but less than 8 */
   2815 	cmp	$6, %ebx
   2816 	jl	L(Fill5)
   2817 	je	L(Fill6)
   2818 	jg	L(Fill7)
   2819 L(FillLess12):	/* but more than 8 */
   2820 	cmp	$10, %ebx
   2821 	jl	L(Fill9)
   2822 	je	L(Fill10)
   2823 	jmp	L(Fill11)
   2824 
   2825 	CFI_PUSH(%edi)
   2826 
   2827 	.p2align 4
   2828 L(StrncpyFillTailWithZero1):
   2829 	POP	(%edi)
   2830 L(StrncpyFillTailWithZero):
   2831 	pxor	%xmm0, %xmm0
   2832 	xor	%edx, %edx
   2833 	sub	$16, %ebx
   2834 	jbe	L(StrncpyFillExit1)
   2835 
   2836 	movlpd	%xmm0, (%ecx)
   2837 	movlpd	%xmm0, 8(%ecx)
   2838 
   2839 	lea	16(%ecx), %ecx
   2840 
   2841 	mov	%ecx, %edx
   2842 	and	$0xf, %edx
   2843 	sub	%edx, %ecx
   2844 	add	%edx, %ebx
   2845 	xor	%edx, %edx
   2846 	sub	$64, %ebx
   2847 	jb	L(StrncpyFillLess64)
   2848 
   2849 L(StrncpyFillLoopMovdqa):
   2850 	movdqa	%xmm0, (%ecx)
   2851 	movdqa	%xmm0, 16(%ecx)
   2852 	movdqa	%xmm0, 32(%ecx)
   2853 	movdqa	%xmm0, 48(%ecx)
   2854 	lea	64(%ecx), %ecx
   2855 	sub	$64, %ebx
   2856 	jae	L(StrncpyFillLoopMovdqa)
   2857 
   2858 L(StrncpyFillLess64):
   2859 	add	$32, %ebx
   2860 	jl	L(StrncpyFillLess32)
   2861 	movdqa	%xmm0, (%ecx)
   2862 	movdqa	%xmm0, 16(%ecx)
   2863 	lea	32(%ecx), %ecx
   2864 	sub	$16, %ebx
   2865 	jl	L(StrncpyFillExit1)
   2866 	movdqa	%xmm0, (%ecx)
   2867 	lea	16(%ecx), %ecx
   2868 	jmp	L(FillFrom1To16Bytes)
   2869 
   2870 L(StrncpyFillLess32):
   2871 	add	$16, %ebx
   2872 	jl	L(StrncpyFillExit1)
   2873 	movdqa	%xmm0, (%ecx)
   2874 	lea	16(%ecx), %ecx
   2875 	jmp	L(FillFrom1To16Bytes)
   2876 # endif
   2877 
   2878 	.p2align 4
   2879 L(ExitTail1):
   2880 	movb	(%ecx), %al
   2881 	movb	%al, (%edx)
   2882 	SAVE_RESULT_TAIL (0)
   2883 # ifdef USE_AS_STRNCPY
   2884 	sub	$1, %ebx
   2885 	lea	1(%edx), %ecx
   2886 	jnz	L(StrncpyFillTailWithZero)
   2887 #  ifdef USE_AS_STPCPY
   2888 	cmpb	$1, (%eax)
   2889 	sbb	$-1, %eax
   2890 #  endif
   2891 # endif
   2892 	RETURN
   2893 
   2894 	.p2align 4
   2895 L(ExitTail2):
   2896 	movw	(%ecx), %ax
   2897 	movw	%ax, (%edx)
   2898 	SAVE_RESULT_TAIL (1)
   2899 # ifdef USE_AS_STRNCPY
   2900 	sub	$2, %ebx
   2901 	lea	2(%edx), %ecx
   2902 	jnz	L(StrncpyFillTailWithZero)
   2903 #  ifdef USE_AS_STPCPY
   2904 	cmpb	$1, (%eax)
   2905 	sbb	$-1, %eax
   2906 #  endif
   2907 # endif
   2908 	RETURN
   2909 
   2910 	.p2align 4
   2911 L(ExitTail3):
   2912 	movw	(%ecx), %ax
   2913 	movw	%ax, (%edx)
   2914 	movb	2(%ecx), %al
   2915 	movb	%al, 2(%edx)
   2916 	SAVE_RESULT_TAIL (2)
   2917 # ifdef USE_AS_STRNCPY
   2918 	sub	$3, %ebx
   2919 	lea	3(%edx), %ecx
   2920 	jnz	L(StrncpyFillTailWithZero)
   2921 #  ifdef USE_AS_STPCPY
   2922 	cmpb	$1, (%eax)
   2923 	sbb	$-1, %eax
   2924 #  endif
   2925 # endif
   2926 	RETURN
   2927 
   2928 	.p2align 4
   2929 L(ExitTail4):
   2930 	movl	(%ecx), %eax
   2931 	movl	%eax, (%edx)
   2932 	SAVE_RESULT_TAIL (3)
   2933 # ifdef USE_AS_STRNCPY
   2934 	sub	$4, %ebx
   2935 	lea	4(%edx), %ecx
   2936 	jnz	L(StrncpyFillTailWithZero)
   2937 #  ifdef USE_AS_STPCPY
   2938 	cmpb	$1, (%eax)
   2939 	sbb	$-1, %eax
   2940 #  endif
   2941 # endif
   2942 	RETURN
   2943 
   2944 	.p2align 4
   2945 L(ExitTail5):
   2946 	movl	(%ecx), %eax
   2947 	movl	%eax, (%edx)
   2948 	movb	4(%ecx), %al
   2949 	movb	%al, 4(%edx)
   2950 	SAVE_RESULT_TAIL (4)
   2951 # ifdef USE_AS_STRNCPY
   2952 	sub	$5, %ebx
   2953 	lea	5(%edx), %ecx
   2954 	jnz	L(StrncpyFillTailWithZero)
   2955 #  ifdef USE_AS_STPCPY
   2956 	cmpb	$1, (%eax)
   2957 	sbb	$-1, %eax
   2958 #  endif
   2959 # endif
   2960 	RETURN
   2961 
   2962 	.p2align 4
   2963 L(ExitTail6):
   2964 	movl	(%ecx), %eax
   2965 	movl	%eax, (%edx)
   2966 	movw	4(%ecx), %ax
   2967 	movw	%ax, 4(%edx)
   2968 	SAVE_RESULT_TAIL (5)
   2969 # ifdef USE_AS_STRNCPY
   2970 	sub	$6, %ebx
   2971 	lea	6(%edx), %ecx
   2972 	jnz	L(StrncpyFillTailWithZero)
   2973 #  ifdef USE_AS_STPCPY
   2974 	cmpb	$1, (%eax)
   2975 	sbb	$-1, %eax
   2976 #  endif
   2977 # endif
   2978 	RETURN
   2979 
   2980 	.p2align 4
   2981 L(ExitTail7):
   2982 	movl	(%ecx), %eax
   2983 	movl	%eax, (%edx)
   2984 	movl	3(%ecx), %eax
   2985 	movl	%eax, 3(%edx)
   2986 	SAVE_RESULT_TAIL (6)
   2987 # ifdef USE_AS_STRNCPY
   2988 	sub	$7, %ebx
   2989 	lea	7(%edx), %ecx
   2990 	jnz	L(StrncpyFillTailWithZero)
   2991 #  ifdef USE_AS_STPCPY
   2992 	cmpb	$1, (%eax)
   2993 	sbb	$-1, %eax
   2994 #  endif
   2995 # endif
   2996 	RETURN
   2997 
   2998 	.p2align 4
   2999 L(ExitTail8):
   3000 	movlpd	(%ecx), %xmm0
   3001 	movlpd	%xmm0, (%edx)
   3002 	SAVE_RESULT_TAIL (7)
   3003 # ifdef USE_AS_STRNCPY
   3004 	sub	$8, %ebx
   3005 	lea	8(%edx), %ecx
   3006 	jnz	L(StrncpyFillTailWithZero)
   3007 # endif
   3008 	RETURN
   3009 
   3010 	.p2align 4
   3011 L(ExitTail9):
   3012 	movlpd	(%ecx), %xmm0
   3013 	movb	8(%ecx), %al
   3014 	movlpd	%xmm0, (%edx)
   3015 	movb	%al, 8(%edx)
   3016 	SAVE_RESULT_TAIL (8)
   3017 # ifdef USE_AS_STRNCPY
   3018 	sub	$9, %ebx
   3019 	lea	9(%edx), %ecx
   3020 	jnz	L(StrncpyFillTailWithZero)
   3021 #  ifdef USE_AS_STPCPY
   3022 	cmpb	$1, (%eax)
   3023 	sbb	$-1, %eax
   3024 #  endif
   3025 # endif
   3026 	RETURN
   3027 
   3028 	.p2align 4
   3029 L(ExitTail10):
   3030 	movlpd	(%ecx), %xmm0
   3031 	movw	8(%ecx), %ax
   3032 	movlpd	%xmm0, (%edx)
   3033 	movw	%ax, 8(%edx)
   3034 	SAVE_RESULT_TAIL (9)
   3035 # ifdef USE_AS_STRNCPY
   3036 	sub	$10, %ebx
   3037 	lea	10(%edx), %ecx
   3038 	jnz	L(StrncpyFillTailWithZero)
   3039 #  ifdef USE_AS_STPCPY
   3040 	cmpb	$1, (%eax)
   3041 	sbb	$-1, %eax
   3042 #  endif
   3043 # endif
   3044 	RETURN
   3045 
   3046 	.p2align 4
   3047 L(ExitTail11):
   3048 	movlpd	(%ecx), %xmm0
   3049 	movl	7(%ecx), %eax
   3050 	movlpd	%xmm0, (%edx)
   3051 	movl	%eax, 7(%edx)
   3052 	SAVE_RESULT_TAIL (10)
   3053 # ifdef USE_AS_STRNCPY
   3054 	sub	$11, %ebx
   3055 	lea	11(%edx), %ecx
   3056 	jnz	L(StrncpyFillTailWithZero)
   3057 #  ifdef USE_AS_STPCPY
   3058 	cmpb	$1, (%eax)
   3059 	sbb	$-1, %eax
   3060 #  endif
   3061 # endif
   3062 	RETURN
   3063 
   3064 	.p2align 4
   3065 L(ExitTail12):
   3066 	movlpd	(%ecx), %xmm0
   3067 	movl	8(%ecx), %eax
   3068 	movlpd	%xmm0, (%edx)
   3069 	movl	%eax, 8(%edx)
   3070 	SAVE_RESULT_TAIL (11)
   3071 # ifdef USE_AS_STRNCPY
   3072 	sub	$12, %ebx
   3073 	lea	12(%edx), %ecx
   3074 	jnz	L(StrncpyFillTailWithZero)
   3075 #  ifdef USE_AS_STPCPY
   3076 	cmpb	$1, (%eax)
   3077 	sbb	$-1, %eax
   3078 #  endif
   3079 # endif
   3080 	RETURN
   3081 
   3082 	.p2align 4
   3083 L(ExitTail13):
   3084 	movlpd	(%ecx), %xmm0
   3085 	movlpd	5(%ecx), %xmm1
   3086 	movlpd	%xmm0, (%edx)
   3087 	movlpd	%xmm1, 5(%edx)
   3088 	SAVE_RESULT_TAIL (12)
   3089 # ifdef USE_AS_STRNCPY
   3090 	sub	$13, %ebx
   3091 	lea	13(%edx), %ecx
   3092 	jnz	L(StrncpyFillTailWithZero)
   3093 #  ifdef USE_AS_STPCPY
   3094 	cmpb	$1, (%eax)
   3095 	sbb	$-1, %eax
   3096 #  endif
   3097 # endif
   3098 	RETURN
   3099 
   3100 	.p2align 4
   3101 L(ExitTail14):
   3102 	movlpd	(%ecx), %xmm0
   3103 	movlpd	6(%ecx), %xmm1
   3104 	movlpd	%xmm0, (%edx)
   3105 	movlpd	%xmm1, 6(%edx)
   3106 	SAVE_RESULT_TAIL (13)
   3107 # ifdef USE_AS_STRNCPY
   3108 	sub	$14, %ebx
   3109 	lea	14(%edx), %ecx
   3110 	jnz	L(StrncpyFillTailWithZero)
   3111 # ifdef USE_AS_STPCPY
   3112 	cmpb	$1, (%eax)
   3113 	sbb	$-1, %eax
   3114 #  endif
   3115 # endif
   3116 	RETURN
   3117 
   3118 	.p2align 4
   3119 L(ExitTail15):
   3120 	movlpd	(%ecx), %xmm0
   3121 	movlpd	7(%ecx), %xmm1
   3122 	movlpd	%xmm0, (%edx)
   3123 	movlpd	%xmm1, 7(%edx)
   3124 	SAVE_RESULT_TAIL (14)
   3125 # ifdef USE_AS_STRNCPY
   3126 	sub	$15, %ebx
   3127 	lea	15(%edx), %ecx
   3128 	jnz	L(StrncpyFillTailWithZero)
   3129 # endif
   3130 	RETURN
   3131 
   3132 	.p2align 4
   3133 L(ExitTail16):
   3134 	movdqu	(%ecx), %xmm0
   3135 	movdqu	%xmm0, (%edx)
   3136 	SAVE_RESULT_TAIL (15)
   3137 # ifdef USE_AS_STRNCPY
   3138 	sub	$16, %ebx
   3139 	lea	16(%edx), %ecx
   3140 	jnz	L(StrncpyFillTailWithZero)
   3141 #  ifdef USE_AS_STPCPY
   3142 	cmpb	$1, (%eax)
   3143 	sbb	$-1, %eax
   3144 #  endif
   3145 # endif
   3146 	RETURN
   3147 #endif
   3148 
   3149 #ifdef USE_AS_STRNCPY
   3150 # if !defined(USE_AS_STRCAT) && !defined(USE_AS_STRLCPY)
   3151 	CFI_PUSH (%esi)
   3152 	CFI_PUSH (%edi)
   3153 # endif
   3154 	.p2align 4
   3155 L(StrncpyLeaveCase2OrCase3):
   3156 	test	%eax, %eax
   3157 	jnz	L(Aligned64LeaveCase2)
   3158 
   3159 L(Aligned64LeaveCase3):
   3160 	add	$48, %ebx
   3161 	jle	L(CopyFrom1To16BytesCase3)
   3162 	movaps	%xmm4, -64(%edx)
   3163 	lea	16(%esi), %esi
   3164 	sub	$16, %ebx
   3165 	jbe	L(CopyFrom1To16BytesCase3)
   3166 	movaps	%xmm5, -48(%edx)
   3167 	lea	16(%esi), %esi
   3168 	sub	$16, %ebx
   3169 	jbe	L(CopyFrom1To16BytesCase3)
   3170 	movaps	%xmm6, -32(%edx)
   3171 	lea	16(%esi), %esi
   3172 	lea	-16(%ebx), %ebx
   3173 	jmp	L(CopyFrom1To16BytesCase3)
   3174 
   3175 L(Aligned64LeaveCase2):
   3176 	pcmpeqb	%xmm4, %xmm0
   3177 	pmovmskb %xmm0, %eax
   3178 	add	$48, %ebx
   3179 	jle	L(CopyFrom1To16BytesCase2OrCase3)
   3180 	test	%eax, %eax
   3181 	jnz	L(CopyFrom1To16Bytes)
   3182 
   3183 	pcmpeqb	%xmm5, %xmm0
   3184 	pmovmskb %xmm0, %eax
   3185 	movaps	%xmm4, -64(%edx)
   3186 	lea	16(%esi), %esi
   3187 	sub	$16, %ebx
   3188 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
   3189 	test	%eax, %eax
   3190 	jnz	L(CopyFrom1To16Bytes)
   3191 
   3192 	pcmpeqb	%xmm6, %xmm0
   3193 	pmovmskb %xmm0, %eax
   3194 	movaps	%xmm5, -48(%edx)
   3195 	lea	16(%esi), %esi
   3196 	sub	$16, %ebx
   3197 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
   3198 	test	%eax, %eax
   3199 	jnz	L(CopyFrom1To16Bytes)
   3200 
   3201 	pcmpeqb	%xmm7, %xmm0
   3202 	pmovmskb %xmm0, %eax
   3203 	movaps	%xmm6, -32(%edx)
   3204 	lea	16(%esi), %esi
   3205 	lea	-16(%ebx), %ebx
   3206 	jmp	L(CopyFrom1To16BytesCase2)
   3207 
   3208 /*--------------------------------------------------*/
   3209 	.p2align 4
   3210 L(StrncpyExit1Case2OrCase3):
   3211 	movlpd	(%ecx), %xmm0
   3212 	movlpd	7(%ecx), %xmm1
   3213 	movlpd	%xmm0, (%edx)
   3214 	movlpd	%xmm1, 7(%edx)
   3215 	mov	$15, %esi
   3216 	test	%eax, %eax
   3217 	jnz	L(CopyFrom1To16BytesCase2)
   3218 	jmp	L(CopyFrom1To16BytesCase3)
   3219 
   3220 	.p2align 4
   3221 L(StrncpyExit2Case2OrCase3):
   3222 	movlpd	(%ecx), %xmm0
   3223 	movlpd	6(%ecx), %xmm1
   3224 	movlpd	%xmm0, (%edx)
   3225 	movlpd	%xmm1, 6(%edx)
   3226 	mov	$14, %esi
   3227 	test	%eax, %eax
   3228 	jnz	L(CopyFrom1To16BytesCase2)
   3229 	jmp	L(CopyFrom1To16BytesCase3)
   3230 
   3231 	.p2align 4
   3232 L(StrncpyExit3Case2OrCase3):
   3233 	movlpd	(%ecx), %xmm0
   3234 	movlpd	5(%ecx), %xmm1
   3235 	movlpd	%xmm0, (%edx)
   3236 	movlpd	%xmm1, 5(%edx)
   3237 	mov	$13, %esi
   3238 	test	%eax, %eax
   3239 	jnz	L(CopyFrom1To16BytesCase2)
   3240 	jmp	L(CopyFrom1To16BytesCase3)
   3241 
   3242 	.p2align 4
   3243 L(StrncpyExit4Case2OrCase3):
   3244 	movlpd	(%ecx), %xmm0
   3245 	movl	8(%ecx), %esi
   3246 	movlpd	%xmm0, (%edx)
   3247 	movl	%esi, 8(%edx)
   3248 	mov	$12, %esi
   3249 	test	%eax, %eax
   3250 	jnz	L(CopyFrom1To16BytesCase2)
   3251 	jmp	L(CopyFrom1To16BytesCase3)
   3252 
   3253 	.p2align 4
   3254 L(StrncpyExit5Case2OrCase3):
   3255 	movlpd	(%ecx), %xmm0
   3256 	movl	7(%ecx), %esi
   3257 	movlpd	%xmm0, (%edx)
   3258 	movl	%esi, 7(%edx)
   3259 	mov	$11, %esi
   3260 	test	%eax, %eax
   3261 	jnz	L(CopyFrom1To16BytesCase2)
   3262 	jmp	L(CopyFrom1To16BytesCase3)
   3263 
   3264 	.p2align 4
   3265 L(StrncpyExit6Case2OrCase3):
   3266 	movlpd	(%ecx), %xmm0
   3267 	movl	6(%ecx), %esi
   3268 	movlpd	%xmm0, (%edx)
   3269 	movl	%esi, 6(%edx)
   3270 	mov	$10, %esi
   3271 	test	%eax, %eax
   3272 	jnz	L(CopyFrom1To16BytesCase2)
   3273 	jmp	L(CopyFrom1To16BytesCase3)
   3274 
   3275 	.p2align 4
   3276 L(StrncpyExit7Case2OrCase3):
   3277 	movlpd	(%ecx), %xmm0
   3278 	movl	5(%ecx), %esi
   3279 	movlpd	%xmm0, (%edx)
   3280 	movl	%esi, 5(%edx)
   3281 	mov	$9, %esi
   3282 	test	%eax, %eax
   3283 	jnz	L(CopyFrom1To16BytesCase2)
   3284 	jmp	L(CopyFrom1To16BytesCase3)
   3285 
   3286 	.p2align 4
   3287 L(StrncpyExit8Case2OrCase3):
   3288 	movlpd	(%ecx), %xmm0
   3289 	movlpd	%xmm0, (%edx)
   3290 	mov	$8, %esi
   3291 	test	%eax, %eax
   3292 	jnz	L(CopyFrom1To16BytesCase2)
   3293 	jmp	L(CopyFrom1To16BytesCase3)
   3294 
   3295 	.p2align 4
   3296 L(StrncpyExit9Case2OrCase3):
   3297 	movlpd	(%ecx), %xmm0
   3298 	movlpd	%xmm0, (%edx)
   3299 	mov	$7, %esi
   3300 	test	%eax, %eax
   3301 	jnz	L(CopyFrom1To16BytesCase2)
   3302 	jmp	L(CopyFrom1To16BytesCase3)
   3303 
   3304 	.p2align 4
   3305 L(StrncpyExit10Case2OrCase3):
   3306 	movlpd	-1(%ecx), %xmm0
   3307 	movlpd	%xmm0, -1(%edx)
   3308 	mov	$6, %esi
   3309 	test	%eax, %eax
   3310 	jnz	L(CopyFrom1To16BytesCase2)
   3311 	jmp	L(CopyFrom1To16BytesCase3)
   3312 
   3313 	.p2align 4
   3314 L(StrncpyExit11Case2OrCase3):
   3315 	movlpd	-2(%ecx), %xmm0
   3316 	movlpd	%xmm0, -2(%edx)
   3317 	mov	$5, %esi
   3318 	test	%eax, %eax
   3319 	jnz	L(CopyFrom1To16BytesCase2)
   3320 	jmp	L(CopyFrom1To16BytesCase3)
   3321 
   3322 	.p2align 4
   3323 L(StrncpyExit12Case2OrCase3):
   3324 	movl	(%ecx), %esi
   3325 	movl	%esi, (%edx)
   3326 	mov	$4, %esi
   3327 	test	%eax, %eax
   3328 	jnz	L(CopyFrom1To16BytesCase2)
   3329 	jmp	L(CopyFrom1To16BytesCase3)
   3330 
   3331 	.p2align 4
   3332 L(StrncpyExit13Case2OrCase3):
   3333 	movl	-1(%ecx), %esi
   3334 	movl	%esi, -1(%edx)
   3335 	mov	$3, %esi
   3336 	test	%eax, %eax
   3337 	jnz	L(CopyFrom1To16BytesCase2)
   3338 	jmp	L(CopyFrom1To16BytesCase3)
   3339 
   3340 	.p2align 4
   3341 L(StrncpyExit14Case2OrCase3):
   3342 	movl	-2(%ecx), %esi
   3343 	movl	%esi, -2(%edx)
   3344 	mov	$2, %esi
   3345 	test	%eax, %eax
   3346 	jnz	L(CopyFrom1To16BytesCase2)
   3347 	jmp	L(CopyFrom1To16BytesCase3)
   3348 
   3349 	.p2align 4
   3350 L(StrncpyExit15Case2OrCase3):
   3351 	movl	-3(%ecx), %esi
   3352 	movl	%esi, -3(%edx)
   3353 	mov	$1, %esi
   3354 	test	%eax, %eax
   3355 	jnz	L(CopyFrom1To16BytesCase2)
   3356 	jmp	L(CopyFrom1To16BytesCase3)
   3357 
   3358 L(StrncpyLeave1):
   3359 	movaps	%xmm2, %xmm3
   3360 	add	$48, %ebx
   3361 	jle	L(StrncpyExit1)
   3362 	palignr	$1, %xmm1, %xmm2
   3363 	movaps	%xmm2, (%edx)
   3364 	movaps	31(%ecx), %xmm2
   3365 	lea	16(%esi), %esi
   3366 	sub	$16, %ebx
   3367 	jbe	L(StrncpyExit1)
   3368 	palignr	$1, %xmm3, %xmm2
   3369 	movaps	%xmm2, 16(%edx)
   3370 	lea	16(%esi), %esi
   3371 	sub	$16, %ebx
   3372 	jbe	L(StrncpyExit1)
   3373 	movaps	%xmm4, 32(%edx)
   3374 	lea	16(%esi), %esi
   3375 	sub	$16, %ebx
   3376 	jbe	L(StrncpyExit1)
   3377 	movaps	%xmm5, 48(%edx)
   3378 	lea	16(%esi), %esi
   3379 	lea	-16(%ebx), %ebx
   3380 L(StrncpyExit1):
   3381 	lea	15(%edx, %esi), %edx
   3382 	lea	15(%ecx, %esi), %ecx
   3383 	movdqu	-16(%ecx), %xmm0
   3384 	xor	%esi, %esi
   3385 	movdqu	%xmm0, -16(%edx)
   3386 	jmp	L(CopyFrom1To16BytesCase3)
   3387 
   3388 L(StrncpyLeave2):
   3389 	movaps	%xmm2, %xmm3
   3390 	add	$48, %ebx
   3391 	jle	L(StrncpyExit2)
   3392 	palignr	$2, %xmm1, %xmm2
   3393 	movaps	%xmm2, (%edx)
   3394 	movaps	30(%ecx), %xmm2
   3395 	lea	16(%esi), %esi
   3396 	sub	$16, %ebx
   3397 	jbe	L(StrncpyExit2)
   3398 	palignr	$2, %xmm3, %xmm2
   3399 	movaps	%xmm2, 16(%edx)
   3400 	lea	16(%esi), %esi
   3401 	sub	$16, %ebx
   3402 	jbe	L(StrncpyExit2)
   3403 	movaps	%xmm4, 32(%edx)
   3404 	lea	16(%esi), %esi
   3405 	sub	$16, %ebx
   3406 	jbe	L(StrncpyExit2)
   3407 	movaps	%xmm5, 48(%edx)
   3408 	lea	16(%esi), %esi
   3409 	lea	-16(%ebx), %ebx
   3410 L(StrncpyExit2):
   3411 	lea	14(%edx, %esi), %edx
   3412 	lea	14(%ecx, %esi), %ecx
   3413 	movdqu	-16(%ecx), %xmm0
   3414 	xor	%esi, %esi
   3415 	movdqu	%xmm0, -16(%edx)
   3416 	jmp	L(CopyFrom1To16BytesCase3)
   3417 
   3418 L(StrncpyLeave3):
   3419 	movaps	%xmm2, %xmm3
   3420 	add	$48, %ebx
   3421 	jle	L(StrncpyExit3)
   3422 	palignr	$3, %xmm1, %xmm2
   3423 	movaps	%xmm2, (%edx)
   3424 	movaps	29(%ecx), %xmm2
   3425 	lea	16(%esi), %esi
   3426 	sub	$16, %ebx
   3427 	jbe	L(StrncpyExit3)
   3428 	palignr	$3, %xmm3, %xmm2
   3429 	movaps	%xmm2, 16(%edx)
   3430 	lea	16(%esi), %esi
   3431 	sub	$16, %ebx
   3432 	jbe	L(StrncpyExit3)
   3433 	movaps	%xmm4, 32(%edx)
   3434 	lea	16(%esi), %esi
   3435 	sub	$16, %ebx
   3436 	jbe	L(StrncpyExit3)
   3437 	movaps	%xmm5, 48(%edx)
   3438 	lea	16(%esi), %esi
   3439 	lea	-16(%ebx), %ebx
   3440 L(StrncpyExit3):
   3441 	lea	13(%edx, %esi), %edx
   3442 	lea	13(%ecx, %esi), %ecx
   3443 	movdqu	-16(%ecx), %xmm0
   3444 	xor	%esi, %esi
   3445 	movdqu	%xmm0, -16(%edx)
   3446 	jmp	L(CopyFrom1To16BytesCase3)
   3447 
   3448 L(StrncpyLeave4):
   3449 	movaps	%xmm2, %xmm3
   3450 	add	$48, %ebx
   3451 	jle	L(StrncpyExit4)
   3452 	palignr	$4, %xmm1, %xmm2
   3453 	movaps	%xmm2, (%edx)
   3454 	movaps	28(%ecx), %xmm2
   3455 	lea	16(%esi), %esi
   3456 	sub	$16, %ebx
   3457 	jbe	L(StrncpyExit4)
   3458 	palignr	$4, %xmm3, %xmm2
   3459 	movaps	%xmm2, 16(%edx)
   3460 	lea	16(%esi), %esi
   3461 	sub	$16, %ebx
   3462 	jbe	L(StrncpyExit4)
   3463 	movaps	%xmm4, 32(%edx)
   3464 	lea	16(%esi), %esi
   3465 	sub	$16, %ebx
   3466 	jbe	L(StrncpyExit4)
   3467 	movaps	%xmm5, 48(%edx)
   3468 	lea	16(%esi), %esi
   3469 	lea	-16(%ebx), %ebx
   3470 L(StrncpyExit4):
   3471 	lea	12(%edx, %esi), %edx
   3472 	lea	12(%ecx, %esi), %ecx
   3473 	movlpd	-12(%ecx), %xmm0
   3474 	movl	-4(%ecx), %eax
   3475 	movlpd	%xmm0, -12(%edx)
   3476 	movl	%eax, -4(%edx)
   3477 	xor	%esi, %esi
   3478 	jmp	L(CopyFrom1To16BytesCase3)
   3479 
   3480 L(StrncpyLeave5):
   3481 	movaps	%xmm2, %xmm3
   3482 	add	$48, %ebx
   3483 	jle	L(StrncpyExit5)
   3484 	palignr	$5, %xmm1, %xmm2
   3485 	movaps	%xmm2, (%edx)
   3486 	movaps	27(%ecx), %xmm2
   3487 	lea	16(%esi), %esi
   3488 	sub	$16, %ebx
   3489 	jbe	L(StrncpyExit5)
   3490 	palignr	$5, %xmm3, %xmm2
   3491 	movaps	%xmm2, 16(%edx)
   3492 	lea	16(%esi), %esi
   3493 	sub	$16, %ebx
   3494 	jbe	L(StrncpyExit5)
   3495 	movaps	%xmm4, 32(%edx)
   3496 	lea	16(%esi), %esi
   3497 	sub	$16, %ebx
   3498 	jbe	L(StrncpyExit5)
   3499 	movaps	%xmm5, 48(%edx)
   3500 	lea	16(%esi), %esi
   3501 	lea	-16(%ebx), %ebx
   3502 L(StrncpyExit5):
   3503 	lea	11(%edx, %esi), %edx
   3504 	lea	11(%ecx, %esi), %ecx
   3505 	movlpd	-11(%ecx), %xmm0
   3506 	movl	-4(%ecx), %eax
   3507 	movlpd	%xmm0, -11(%edx)
   3508 	movl	%eax, -4(%edx)
   3509 	xor	%esi, %esi
   3510 	jmp	L(CopyFrom1To16BytesCase3)
   3511 
   3512 L(StrncpyLeave6):
   3513 	movaps	%xmm2, %xmm3
   3514 	add	$48, %ebx
   3515 	jle	L(StrncpyExit6)
   3516 	palignr	$6, %xmm1, %xmm2
   3517 	movaps	%xmm2, (%edx)
   3518 	movaps	26(%ecx), %xmm2
   3519 	lea	16(%esi), %esi
   3520 	sub	$16, %ebx
   3521 	jbe	L(StrncpyExit6)
   3522 	palignr	$6, %xmm3, %xmm2
   3523 	movaps	%xmm2, 16(%edx)
   3524 	lea	16(%esi), %esi
   3525 	sub	$16, %ebx
   3526 	jbe	L(StrncpyExit6)
   3527 	movaps	%xmm4, 32(%edx)
   3528 	lea	16(%esi), %esi
   3529 	sub	$16, %ebx
   3530 	jbe	L(StrncpyExit6)
   3531 	movaps	%xmm5, 48(%edx)
   3532 	lea	16(%esi), %esi
   3533 	lea	-16(%ebx), %ebx
   3534 L(StrncpyExit6):
   3535 	lea	10(%edx, %esi), %edx
   3536 	lea	10(%ecx, %esi), %ecx
   3537 
   3538 	movlpd	-10(%ecx), %xmm0
   3539 	movw	-2(%ecx), %ax
   3540 	movlpd	%xmm0, -10(%edx)
   3541 	movw	%ax, -2(%edx)
   3542 	xor	%esi, %esi
   3543 	jmp	L(CopyFrom1To16BytesCase3)
   3544 
   3545 L(StrncpyLeave7):
   3546 	movaps	%xmm2, %xmm3
   3547 	add	$48, %ebx
   3548 	jle	L(StrncpyExit7)
   3549 	palignr	$7, %xmm1, %xmm2
   3550 	movaps	%xmm2, (%edx)
   3551 	movaps	25(%ecx), %xmm2
   3552 	lea	16(%esi), %esi
   3553 	sub	$16, %ebx
   3554 	jbe	L(StrncpyExit7)
   3555 	palignr	$7, %xmm3, %xmm2
   3556 	movaps	%xmm2, 16(%edx)
   3557 	lea	16(%esi), %esi
   3558 	sub	$16, %ebx
   3559 	jbe	L(StrncpyExit7)
   3560 	movaps	%xmm4, 32(%edx)
   3561 	lea	16(%esi), %esi
   3562 	sub	$16, %ebx
   3563 	jbe	L(StrncpyExit7)
   3564 	movaps	%xmm5, 48(%edx)
   3565 	lea	16(%esi), %esi
   3566 	lea	-16(%ebx), %ebx
   3567 L(StrncpyExit7):
   3568 	lea	9(%edx, %esi), %edx
   3569 	lea	9(%ecx, %esi), %ecx
   3570 
   3571 	movlpd	-9(%ecx), %xmm0
   3572 	movb	-1(%ecx), %ah
   3573 	movlpd	%xmm0, -9(%edx)
   3574 	movb	%ah, -1(%edx)
   3575 	xor	%esi, %esi
   3576 	jmp	L(CopyFrom1To16BytesCase3)
   3577 
   3578 L(StrncpyLeave8):
   3579 	movaps	%xmm2, %xmm3
   3580 	add	$48, %ebx
   3581 	jle	L(StrncpyExit8)
   3582 	palignr	$8, %xmm1, %xmm2
   3583 	movaps	%xmm2, (%edx)
   3584 	movaps	24(%ecx), %xmm2
   3585 	lea	16(%esi), %esi
   3586 	sub	$16, %ebx
   3587 	jbe	L(StrncpyExit8)
   3588 	palignr	$8, %xmm3, %xmm2
   3589 	movaps	%xmm2, 16(%edx)
   3590 	lea	16(%esi), %esi
   3591 	sub	$16, %ebx
   3592 	jbe	L(StrncpyExit8)
   3593 	movaps	%xmm4, 32(%edx)
   3594 	lea	16(%esi), %esi
   3595 	sub	$16, %ebx
   3596 	jbe	L(StrncpyExit8)
   3597 	movaps	%xmm5, 48(%edx)
   3598 	lea	16(%esi), %esi
   3599 	lea	-16(%ebx), %ebx
   3600 L(StrncpyExit8):
   3601 	lea	8(%edx, %esi), %edx
   3602 	lea	8(%ecx, %esi), %ecx
   3603 	movlpd	-8(%ecx), %xmm0
   3604 	movlpd	%xmm0, -8(%edx)
   3605 	xor	%esi, %esi
   3606 	jmp	L(CopyFrom1To16BytesCase3)
   3607 
   3608 L(StrncpyLeave9):
   3609 	movaps	%xmm2, %xmm3
   3610 	add	$48, %ebx
   3611 	jle	L(StrncpyExit9)
   3612 	palignr	$9, %xmm1, %xmm2
   3613 	movaps	%xmm2, (%edx)
   3614 	movaps	23(%ecx), %xmm2
   3615 	lea	16(%esi), %esi
   3616 	sub	$16, %ebx
   3617 	jbe	L(StrncpyExit9)
   3618 	palignr	$9, %xmm3, %xmm2
   3619 	movaps	%xmm2, 16(%edx)
   3620 	lea	16(%esi), %esi
   3621 	sub	$16, %ebx
   3622 	jbe	L(StrncpyExit9)
   3623 	movaps	%xmm4, 32(%edx)
   3624 	lea	16(%esi), %esi
   3625 	sub	$16, %ebx
   3626 	jbe	L(StrncpyExit9)
   3627 	movaps	%xmm5, 48(%edx)
   3628 	lea	16(%esi), %esi
   3629 	lea	-16(%ebx), %ebx
   3630 L(StrncpyExit9):
   3631 	lea	7(%edx, %esi), %edx
   3632 	lea	7(%ecx, %esi), %ecx
   3633 
   3634 	movlpd	-8(%ecx), %xmm0
   3635 	movlpd	%xmm0, -8(%edx)
   3636 	xor	%esi, %esi
   3637 	jmp	L(CopyFrom1To16BytesCase3)
   3638 
   3639 L(StrncpyLeave10):
   3640 	movaps	%xmm2, %xmm3
   3641 	add	$48, %ebx
   3642 	jle	L(StrncpyExit10)
   3643 	palignr	$10, %xmm1, %xmm2
   3644 	movaps	%xmm2, (%edx)
   3645 	movaps	22(%ecx), %xmm2
   3646 	lea	16(%esi), %esi
   3647 	sub	$16, %ebx
   3648 	jbe	L(StrncpyExit10)
   3649 	palignr	$10, %xmm3, %xmm2
   3650 	movaps	%xmm2, 16(%edx)
   3651 	lea	16(%esi), %esi
   3652 	sub	$16, %ebx
   3653 	jbe	L(StrncpyExit10)
   3654 	movaps	%xmm4, 32(%edx)
   3655 	lea	16(%esi), %esi
   3656 	sub	$16, %ebx
   3657 	jbe	L(StrncpyExit10)
   3658 	movaps	%xmm5, 48(%edx)
   3659 	lea	16(%esi), %esi
   3660 	lea	-16(%ebx), %ebx
   3661 L(StrncpyExit10):
   3662 	lea	6(%edx, %esi), %edx
   3663 	lea	6(%ecx, %esi), %ecx
   3664 
   3665 	movlpd	-8(%ecx), %xmm0
   3666 	movlpd	%xmm0, -8(%edx)
   3667 	xor	%esi, %esi
   3668 	jmp	L(CopyFrom1To16BytesCase3)
   3669 
   3670 L(StrncpyLeave11):
   3671 	movaps	%xmm2, %xmm3
   3672 	add	$48, %ebx
   3673 	jle	L(StrncpyExit11)
   3674 	palignr	$11, %xmm1, %xmm2
   3675 	movaps	%xmm2, (%edx)
   3676 	movaps	21(%ecx), %xmm2
   3677 	lea	16(%esi), %esi
   3678 	sub	$16, %ebx
   3679 	jbe	L(StrncpyExit11)
   3680 	palignr	$11, %xmm3, %xmm2
   3681 	movaps	%xmm2, 16(%edx)
   3682 	lea	16(%esi), %esi
   3683 	sub	$16, %ebx
   3684 	jbe	L(StrncpyExit11)
   3685 	movaps	%xmm4, 32(%edx)
   3686 	lea	16(%esi), %esi
   3687 	sub	$16, %ebx
   3688 	jbe	L(StrncpyExit11)
   3689 	movaps	%xmm5, 48(%edx)
   3690 	lea	16(%esi), %esi
   3691 	lea	-16(%ebx), %ebx
   3692 L(StrncpyExit11):
   3693 	lea	5(%edx, %esi), %edx
   3694 	lea	5(%ecx, %esi), %ecx
   3695 	movl	-5(%ecx), %esi
   3696 	movb	-1(%ecx), %ah
   3697 	movl	%esi, -5(%edx)
   3698 	movb	%ah, -1(%edx)
   3699 	xor	%esi, %esi
   3700 	jmp	L(CopyFrom1To16BytesCase3)
   3701 
   3702 L(StrncpyLeave12):
   3703 	movaps	%xmm2, %xmm3
   3704 	add	$48, %ebx
   3705 	jle	L(StrncpyExit12)
   3706 	palignr	$12, %xmm1, %xmm2
   3707 	movaps	%xmm2, (%edx)
   3708 	movaps	20(%ecx), %xmm2
   3709 	lea	16(%esi), %esi
   3710 	sub	$16, %ebx
   3711 	jbe	L(StrncpyExit12)
   3712 	palignr	$12, %xmm3, %xmm2
   3713 	movaps	%xmm2, 16(%edx)
   3714 	lea	16(%esi), %esi
   3715 	sub	$16, %ebx
   3716 	jbe	L(StrncpyExit12)
   3717 	movaps	%xmm4, 32(%edx)
   3718 	lea	16(%esi), %esi
   3719 	sub	$16, %ebx
   3720 	jbe	L(StrncpyExit12)
   3721 	movaps	%xmm5, 48(%edx)
   3722 	lea	16(%esi), %esi
   3723 	lea	-16(%ebx), %ebx
   3724 L(StrncpyExit12):
   3725 	lea	4(%edx, %esi), %edx
   3726 	lea	4(%ecx, %esi), %ecx
   3727 	movl	-4(%ecx), %eax
   3728 	movl	%eax, -4(%edx)
   3729 	xor	%esi, %esi
   3730 	jmp	L(CopyFrom1To16BytesCase3)
   3731 
   3732 L(StrncpyLeave13):
   3733 	movaps	%xmm2, %xmm3
   3734 	add	$48, %ebx
   3735 	jle	L(StrncpyExit13)
   3736 	palignr	$13, %xmm1, %xmm2
   3737 	movaps	%xmm2, (%edx)
   3738 	movaps	19(%ecx), %xmm2
   3739 	lea	16(%esi), %esi
   3740 	sub	$16, %ebx
   3741 	jbe	L(StrncpyExit13)
   3742 	palignr	$13, %xmm3, %xmm2
   3743 	movaps	%xmm2, 16(%edx)
   3744 	lea	16(%esi), %esi
   3745 	sub	$16, %ebx
   3746 	jbe	L(StrncpyExit13)
   3747 	movaps	%xmm4, 32(%edx)
   3748 	lea	16(%esi), %esi
   3749 	sub	$16, %ebx
   3750 	jbe	L(StrncpyExit13)
   3751 	movaps	%xmm5, 48(%edx)
   3752 	lea	16(%esi), %esi
   3753 	lea	-16(%ebx), %ebx
   3754 L(StrncpyExit13):
   3755 	lea	3(%edx, %esi), %edx
   3756 	lea	3(%ecx, %esi), %ecx
   3757 
   3758 	movl	-4(%ecx), %eax
   3759 	movl	%eax, -4(%edx)
   3760 	xor	%esi, %esi
   3761 	jmp	L(CopyFrom1To16BytesCase3)
   3762 
   3763 L(StrncpyLeave14):
   3764 	movaps	%xmm2, %xmm3
   3765 	add	$48, %ebx
   3766 	jle	L(StrncpyExit14)
   3767 	palignr	$14, %xmm1, %xmm2
   3768 	movaps	%xmm2, (%edx)
   3769 	movaps	18(%ecx), %xmm2
   3770 	lea	16(%esi), %esi
   3771 	sub	$16, %ebx
   3772 	jbe	L(StrncpyExit14)
   3773 	palignr	$14, %xmm3, %xmm2
   3774 	movaps	%xmm2, 16(%edx)
   3775 	lea	16(%esi), %esi
   3776 	sub	$16, %ebx
   3777 	jbe	L(StrncpyExit14)
   3778 	movaps	%xmm4, 32(%edx)
   3779 	lea	16(%esi), %esi
   3780 	sub	$16, %ebx
   3781 	jbe	L(StrncpyExit14)
   3782 	movaps	%xmm5, 48(%edx)
   3783 	lea	16(%esi), %esi
   3784 	lea	-16(%ebx), %ebx
   3785 L(StrncpyExit14):
   3786 	lea	2(%edx, %esi), %edx
   3787 	lea	2(%ecx, %esi), %ecx
   3788 	movw	-2(%ecx), %ax
   3789 	movw	%ax, -2(%edx)
   3790 	xor	%esi, %esi
   3791 	jmp	L(CopyFrom1To16BytesCase3)
   3792 
   3793 L(StrncpyLeave15):
   3794 	movaps	%xmm2, %xmm3
   3795 	add	$48, %ebx
   3796 	jle	L(StrncpyExit15)
   3797 	palignr	$15, %xmm1, %xmm2
   3798 	movaps	%xmm2, (%edx)
   3799 	movaps	17(%ecx), %xmm2
   3800 	lea	16(%esi), %esi
   3801 	sub	$16, %ebx
   3802 	jbe	L(StrncpyExit15)
   3803 	palignr	$15, %xmm3, %xmm2
   3804 	movaps	%xmm2, 16(%edx)
   3805 	lea	16(%esi), %esi
   3806 	sub	$16, %ebx
   3807 	jbe	L(StrncpyExit15)
   3808 	movaps	%xmm4, 32(%edx)
   3809 	lea	16(%esi), %esi
   3810 	sub	$16, %ebx
   3811 	jbe	L(StrncpyExit15)
   3812 	movaps	%xmm5, 48(%edx)
   3813 	lea	16(%esi), %esi
   3814 	lea	-16(%ebx), %ebx
   3815 L(StrncpyExit15):
   3816 	lea	1(%edx, %esi), %edx
   3817 	lea	1(%ecx, %esi), %ecx
   3818 	movb	-1(%ecx), %ah
   3819 	movb	%ah, -1(%edx)
   3820 	xor	%esi, %esi
   3821 	jmp	L(CopyFrom1To16BytesCase3)
   3822 #endif
   3823 
   3824 #if !defined USE_AS_STRCAT && ! defined USE_AS_STRLCPY
   3825 # ifdef USE_AS_STRNCPY
   3826 	CFI_POP (%esi)
   3827 	CFI_POP (%edi)
   3828 
   3829 	.p2align 4
   3830 L(ExitTail0):
   3831 	movl	%edx, %eax
   3832 	RETURN
   3833 
   3834 	.p2align 4
   3835 L(StrncpyExit15Bytes):
   3836 	cmp	$12, %ebx
   3837 	jbe	L(StrncpyExit12Bytes)
   3838 	cmpb	$0, 8(%ecx)
   3839 	jz	L(ExitTail9)
   3840 	cmpb	$0, 9(%ecx)
   3841 	jz	L(ExitTail10)
   3842 	cmpb	$0, 10(%ecx)
   3843 	jz	L(ExitTail11)
   3844 	cmpb	$0, 11(%ecx)
   3845 	jz	L(ExitTail12)
   3846 	cmp	$13, %ebx
   3847 	je	L(ExitTail13)
   3848 	cmpb	$0, 12(%ecx)
   3849 	jz	L(ExitTail13)
   3850 	cmp	$14, %ebx
   3851 	je	L(ExitTail14)
   3852 	cmpb	$0, 13(%ecx)
   3853 	jz	L(ExitTail14)
   3854 	movlpd	(%ecx), %xmm0
   3855 	movlpd	7(%ecx), %xmm1
   3856 	movlpd	%xmm0, (%edx)
   3857 	movlpd	%xmm1, 7(%edx)
   3858 #  ifdef USE_AS_STPCPY
   3859 	lea	14(%edx), %eax
   3860 	cmpb	$1, (%eax)
   3861 	sbb	$-1, %eax
   3862 #  else
   3863 	movl	%edx, %eax
   3864 #  endif
   3865 	RETURN
   3866 
   3867 	.p2align 4
   3868 L(StrncpyExit12Bytes):
   3869 	cmp	$9, %ebx
   3870 	je	L(ExitTail9)
   3871 	cmpb	$0, 8(%ecx)
   3872 	jz	L(ExitTail9)
   3873 	cmp	$10, %ebx
   3874 	je	L(ExitTail10)
   3875 	cmpb	$0, 9(%ecx)
   3876 	jz	L(ExitTail10)
   3877 	cmp	$11, %ebx
   3878 	je	L(ExitTail11)
   3879 	cmpb	$0, 10(%ecx)
   3880 	jz	L(ExitTail11)
   3881 	movlpd	(%ecx), %xmm0
   3882 	movl	8(%ecx), %eax
   3883 	movlpd	%xmm0, (%edx)
   3884 	movl	%eax, 8(%edx)
   3885 	SAVE_RESULT_TAIL (11)
   3886 #  ifdef USE_AS_STPCPY
   3887 	cmpb	$1, (%eax)
   3888 	sbb	$-1, %eax
   3889 #  endif
   3890 	RETURN
   3891 
   3892 	.p2align 4
   3893 L(StrncpyExit8Bytes):
   3894 	cmp	$4, %ebx
   3895 	jbe	L(StrncpyExit4Bytes)
   3896 	cmpb	$0, (%ecx)
   3897 	jz	L(ExitTail1)
   3898 	cmpb	$0, 1(%ecx)
   3899 	jz	L(ExitTail2)
   3900 	cmpb	$0, 2(%ecx)
   3901 	jz	L(ExitTail3)
   3902 	cmpb	$0, 3(%ecx)
   3903 	jz	L(ExitTail4)
   3904 
   3905 	cmp	$5, %ebx
   3906 	je	L(ExitTail5)
   3907 	cmpb	$0, 4(%ecx)
   3908 	jz	L(ExitTail5)
   3909 	cmp	$6, %ebx
   3910 	je	L(ExitTail6)
   3911 	cmpb	$0, 5(%ecx)
   3912 	jz	L(ExitTail6)
   3913 	cmp	$7, %ebx
   3914 	je	L(ExitTail7)
   3915 	cmpb	$0, 6(%ecx)
   3916 	jz	L(ExitTail7)
   3917 	movlpd	(%ecx), %xmm0
   3918 	movlpd	%xmm0, (%edx)
   3919 #  ifdef USE_AS_STPCPY
   3920 	lea	7(%edx), %eax
   3921 	cmpb	$1, (%eax)
   3922 	sbb	$-1, %eax
   3923 #  else
   3924 	movl	%edx, %eax
   3925 #  endif
   3926 	RETURN
   3927 
   3928 	.p2align 4
   3929 L(StrncpyExit4Bytes):
   3930 	test	%ebx, %ebx
   3931 	jz	L(ExitTail0)
   3932 	cmp	$1, %ebx
   3933 	je	L(ExitTail1)
   3934 	cmpb	$0, (%ecx)
   3935 	jz	L(ExitTail1)
   3936 	cmp	$2, %ebx
   3937 	je	L(ExitTail2)
   3938 	cmpb	$0, 1(%ecx)
   3939 	jz	L(ExitTail2)
   3940 	cmp	$3, %ebx
   3941 	je	L(ExitTail3)
   3942 	cmpb	$0, 2(%ecx)
   3943 	jz	L(ExitTail3)
   3944 	movl	(%ecx), %eax
   3945 	movl	%eax, (%edx)
   3946 	SAVE_RESULT_TAIL (3)
   3947 #  ifdef USE_AS_STPCPY
   3948 	cmpb	$1, (%eax)
   3949 	sbb	$-1, %eax
   3950 #  endif
   3951 	RETURN
   3952 # endif
   3953 
   3954 END (STRCPY)
   3955 #endif
   3956