Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2014, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc	.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc	.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)	.cfi_restore reg
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef ENTRY
     56 # define ENTRY(name)             \
     57 	.type name, @function;   \
     58 	.globl name;             \
     59 	.p2align 4;              \
     60 name:                            \
     61 	cfi_startproc
     62 #endif
     63 
     64 #ifndef END
     65 # define END(name)               \
     66 	cfi_endproc;             \
     67 	.size name, .-name
     68 #endif
     69 
     70 #define CFI_PUSH(REG)                  \
     71 	cfi_adjust_cfa_offset (4);     \
     72 	cfi_rel_offset (REG, 0)
     73 
     74 #define CFI_POP(REG)                   \
     75 	cfi_adjust_cfa_offset (-4);    \
     76 	cfi_restore (REG)
     77 
     78 #define PUSH(REG) pushl REG; CFI_PUSH (REG)
     79 #define POP(REG) popl REG; CFI_POP (REG)
     80 
     81 #ifndef STRCPY
     82 # define STRCPY  strcpy
     83 #endif
     84 
     85 #ifdef USE_AS_STPNCPY
     86 # define USE_AS_STRNCPY
     87 # define USE_AS_STPCPY
     88 #endif
     89 
     90 #ifdef USE_AS_STRNCPY
     91 # define PARMS  16
     92 # define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
     93 # define RETURN  POP(%edi); POP(%esi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi); CFI_PUSH(%edi);
     94 #else
     95 # define PARMS  12
     96 # define ENTRANCE PUSH(%esi); PUSH(%edi)
     97 # define RETURN  POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
     98 #endif
     99 
    100 #define STR1  PARMS
    101 #define STR2  STR1+4
    102 #define LEN  STR2+4
    103 
    104 
    105 #if (defined SHARED || defined __PIC__)
    106 # define JMPTBL(I, B)	I - B
    107 
    108 /* Load an entry in a jump table into ECX and branch to it.  TABLE is a
    109 	jump	table with relative offsets.  INDEX is a register contains the
    110 	index	into the jump table.   SCALE is the scale of INDEX. */
    111 
    112 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)            \
    113 	/* We first load PC into ECX.  */                       \
    114 	call	__x86.get_pc_thunk.cx;                         \
    115 	/* Get the address of the jump table.  */               \
    116 	addl	$(TABLE - .), %ecx;                             \
    117 	/* Get the entry and convert the relative offset to the \
    118 	absolute	address.  */                            \
    119 	addl	(%ecx,INDEX,SCALE), %ecx;                       \
    120 	/* We loaded the jump table and adjuested ECX. Go.  */  \
    121 	jmp	*%ecx
    122 #else
    123 # define JMPTBL(I, B)	I
    124 
    125 /* Branch to an entry in a jump table.  TABLE is a jump table with
    126 	absolute	offsets.  INDEX is a register contains the index into the
    127 	jump	table.  SCALE is the scale of INDEX. */
    128 
    129 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
    130 	jmp	*TABLE(,INDEX,SCALE)
    131 #endif
    132 
    133 .text
    134 ENTRY (STRCPY)
    135 	ENTRANCE
    136 	mov	STR1(%esp), %edi
    137 	mov	STR2(%esp), %esi
    138 #ifdef USE_AS_STRNCPY
    139 	movl	LEN(%esp), %ebx
    140 	test	%ebx, %ebx
    141 	jz	L(ExitZero)
    142 #endif
    143 
    144 	mov	%esi, %ecx
    145 #ifndef USE_AS_STPCPY
    146 	mov	%edi, %eax      /* save result */
    147 #endif
    148 	and	$15, %ecx
    149 	jz	L(SourceStringAlignmentZero)
    150 
    151 	and	$-16, %esi
    152 	pxor	%xmm0, %xmm0
    153 	pxor	%xmm1, %xmm1
    154 
    155 	pcmpeqb	(%esi), %xmm1
    156 #ifdef USE_AS_STRNCPY
    157 	add	%ecx, %ebx
    158 #endif
    159 	pmovmskb %xmm1, %edx
    160 	shr	%cl, %edx
    161 #ifdef USE_AS_STRNCPY
    162 #ifdef USE_AS_STPCPY
    163 	cmp	$16, %ebx
    164 	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
    165 #else
    166 	cmp	$17, %ebx
    167 	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
    168 #endif
    169 #endif
    170 	test	%edx, %edx
    171 	jnz	L(CopyFrom1To16BytesTail)
    172 
    173 	pcmpeqb	16(%esi), %xmm0
    174 	pmovmskb %xmm0, %edx
    175 #ifdef USE_AS_STRNCPY
    176 #ifdef USE_AS_STPCPY
    177 	cmp	$32, %ebx
    178 	jbe	L(CopyFrom1To32BytesCase2OrCase3)
    179 #else
    180 	cmp	$33, %ebx
    181 	jbe	L(CopyFrom1To32BytesCase2OrCase3)
    182 #endif
    183 #endif
    184 	test	%edx, %edx
    185 	jnz	L(CopyFrom1To32Bytes)
    186 
    187 	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
    188 	movdqu	%xmm1, (%edi)
    189 
    190 	sub	%ecx, %edi
    191 	mov	%edi, %edx
    192 	mov	$16, %ecx
    193 	and	$15, %edx
    194 	jz	L(Align16Both)
    195 
    196 /* If source adress alignment != destination adress alignment */
    197 	.p2align 4
    198 L(Unalign16Both):
    199 	movdqa	(%esi, %ecx), %xmm1
    200 	movaps	16(%esi, %ecx), %xmm2
    201 	movdqu	%xmm1, (%edi, %ecx)
    202 	pcmpeqb	%xmm2, %xmm0
    203 	pmovmskb %xmm0, %edx
    204 	add	$16, %ecx
    205 #ifdef USE_AS_STRNCPY
    206 	sub	$48, %ebx
    207 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    208 	test	%edx, %edx
    209 	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
    210 #else
    211 	test	%edx, %edx
    212 	jnz	L(CopyFrom1To16Bytes)
    213 #endif
    214 
    215 	movaps	16(%esi, %ecx), %xmm3
    216 	movdqu	%xmm2, (%edi, %ecx)
    217 	pcmpeqb	%xmm3, %xmm0
    218 	pmovmskb %xmm0, %edx
    219 	add	$16, %ecx
    220 #ifdef USE_AS_STRNCPY
    221 	sub	$16, %ebx
    222 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    223 	test	%edx, %edx
    224 	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
    225 #else
    226 	test	%edx, %edx
    227 	jnz	L(CopyFrom1To16Bytes)
    228 #endif
    229 
    230 	movaps	16(%esi, %ecx), %xmm4
    231 	movdqu	%xmm3, (%edi, %ecx)
    232 	pcmpeqb	%xmm4, %xmm0
    233 	pmovmskb %xmm0, %edx
    234 	add	$16, %ecx
    235 #ifdef USE_AS_STRNCPY
    236 	sub	$16, %ebx
    237 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    238 	test	%edx, %edx
    239 	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
    240 #else
    241 	test	%edx, %edx
    242 	jnz	L(CopyFrom1To16Bytes)
    243 #endif
    244 
    245 	movaps	16(%esi, %ecx), %xmm1
    246 	movdqu	%xmm4, (%edi, %ecx)
    247 	pcmpeqb	%xmm1, %xmm0
    248 	pmovmskb %xmm0, %edx
    249 	add	$16, %ecx
    250 #ifdef USE_AS_STRNCPY
    251 	sub	$16, %ebx
    252 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    253 	test	%edx, %edx
    254 	jnz	L(CopyFrom1To16BytesUnalignedXmm1)
    255 #else
    256 	test	%edx, %edx
    257 	jnz	L(CopyFrom1To16Bytes)
    258 #endif
    259 
    260 	movaps	16(%esi, %ecx), %xmm2
    261 	movdqu	%xmm1, (%edi, %ecx)
    262 	pcmpeqb	%xmm2, %xmm0
    263 	pmovmskb %xmm0, %edx
    264 	add	$16, %ecx
    265 #ifdef USE_AS_STRNCPY
    266 	sub	$16, %ebx
    267 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    268 	test	%edx, %edx
    269 	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
    270 #else
    271 	test	%edx, %edx
    272 	jnz	L(CopyFrom1To16Bytes)
    273 #endif
    274 
    275 	movaps	16(%esi, %ecx), %xmm3
    276 	movdqu	%xmm2, (%edi, %ecx)
    277 	pcmpeqb	%xmm3, %xmm0
    278 	pmovmskb %xmm0, %edx
    279 	add	$16, %ecx
    280 #ifdef USE_AS_STRNCPY
    281 	sub	$16, %ebx
    282 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    283 	test	%edx, %edx
    284 	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
    285 #else
    286 	test	%edx, %edx
    287 	jnz	L(CopyFrom1To16Bytes)
    288 #endif
    289 
    290 	movdqu	%xmm3, (%edi, %ecx)
    291 	mov	%esi, %edx
    292 	lea	16(%esi, %ecx), %esi
    293 	and	$-0x40, %esi
    294 	sub	%esi, %edx
    295 	sub	%edx, %edi
    296 #ifdef USE_AS_STRNCPY
    297 	lea	64+64(%ebx, %edx), %ebx
    298 #endif
    299 L(Unaligned64Loop):
    300 	movaps	(%esi), %xmm2
    301 	movaps	%xmm2, %xmm4
    302 	movaps	16(%esi), %xmm5
    303 	movaps	32(%esi), %xmm3
    304 	movaps	%xmm3, %xmm6
    305 	movaps	48(%esi), %xmm7
    306 	pminub	%xmm5, %xmm2
    307 	pminub	%xmm7, %xmm3
    308 	pminub	%xmm2, %xmm3
    309 	pcmpeqb	%xmm0, %xmm3
    310 	pmovmskb %xmm3, %edx
    311 #ifdef USE_AS_STRNCPY
    312 	sub	$64, %ebx
    313 	jbe	L(UnalignedLeaveCase2OrCase3)
    314 #endif
    315 	test	%edx, %edx
    316 	jnz	L(Unaligned64Leave)
    317 
    318 L(Unaligned64Loop_start):
    319 	add	$64, %edi
    320 	add	$64, %esi
    321 	movdqu	%xmm4, -64(%edi)
    322 	movaps	(%esi), %xmm2
    323 	movdqa	%xmm2, %xmm4
    324 	movdqu	%xmm5, -48(%edi)
    325 	movaps	16(%esi), %xmm5
    326 	pminub	%xmm5, %xmm2
    327 	movaps	32(%esi), %xmm3
    328 	movdqu	%xmm6, -32(%edi)
    329 	movaps	%xmm3, %xmm6
    330 	movdqu	%xmm7, -16(%edi)
    331 	movaps	48(%esi), %xmm7
    332 	pminub	%xmm7, %xmm3
    333 	pminub	%xmm2, %xmm3
    334 	pcmpeqb	%xmm3, %xmm0
    335 	pmovmskb %xmm0, %edx
    336 #ifdef USE_AS_STRNCPY
    337 	sub	$64, %ebx
    338 	jbe	L(UnalignedLeaveCase2OrCase3)
    339 #endif
    340 	test	%edx, %edx
    341 	jz	L(Unaligned64Loop_start)
    342 
    343 L(Unaligned64Leave):
    344 	pxor	%xmm0, %xmm0
    345 	pxor	%xmm1, %xmm1
    346 
    347 	pcmpeqb	%xmm4, %xmm0
    348 	pcmpeqb	%xmm5, %xmm1
    349 	pmovmskb %xmm0, %edx
    350 	pmovmskb %xmm1, %ecx
    351 	test	%edx, %edx
    352 	jnz	L(CopyFrom1To16BytesUnaligned_0)
    353 	test	%ecx, %ecx
    354 	jnz	L(CopyFrom1To16BytesUnaligned_16)
    355 
    356 	pcmpeqb	%xmm6, %xmm0
    357 	pcmpeqb	%xmm7, %xmm1
    358 	pmovmskb %xmm0, %edx
    359 	pmovmskb %xmm1, %ecx
    360 	test	%edx, %edx
    361 	jnz	L(CopyFrom1To16BytesUnaligned_32)
    362 
    363 	bsf	%ecx, %edx
    364 	movdqu	%xmm4, (%edi)
    365 	movdqu	%xmm5, 16(%edi)
    366 	movdqu	%xmm6, 32(%edi)
    367 #ifdef USE_AS_STRNCPY
    368 #ifdef USE_AS_STPCPY
    369 	lea	48(%edi, %edx), %eax
    370 #endif
    371 	movdqu	%xmm7, 48(%edi)
    372 	add	$15, %ebx
    373 	sub	%edx, %ebx
    374 	lea	49(%edi, %edx), %edi
    375 	jmp	L(StrncpyFillTailWithZero)
    376 #else
    377 	add	$48, %esi
    378 	add	$48, %edi
    379 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    380 #endif
    381 
    382 /* If source adress alignment == destination adress alignment */
    383 
    384 L(SourceStringAlignmentZero):
    385 	pxor	%xmm0, %xmm0
    386 	movdqa	(%esi), %xmm1
    387 	pcmpeqb	%xmm1, %xmm0
    388 	pmovmskb %xmm0, %edx
    389 
    390 #ifdef USE_AS_STRNCPY
    391 #ifdef USE_AS_STPCPY
    392 	cmp	$16, %ebx
    393 	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
    394 #else
    395 	cmp	$17, %ebx
    396 	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
    397 #endif
    398 #endif
    399 	test	%edx, %edx
    400 	jnz	L(CopyFrom1To16BytesTail1)
    401 
    402 	pcmpeqb	16(%esi), %xmm0
    403 	movdqu	%xmm1, (%edi)
    404 	pmovmskb %xmm0, %edx
    405 
    406 #ifdef USE_AS_STRNCPY
    407 #ifdef USE_AS_STPCPY
    408 	cmp	$32, %ebx
    409 	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
    410 #else
    411 	cmp	$33, %ebx
    412 	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
    413 #endif
    414 #endif
    415 	test	%edx, %edx
    416 	jnz	L(CopyFrom1To32Bytes1)
    417 
    418 	mov	%edi, %edx
    419 	mov	$16, %ecx
    420 	and	$15, %edx
    421 	jnz	L(Unalign16Both)
    422 
    423 L(Align16Both):
    424 	movdqa	(%esi, %ecx), %xmm1
    425 	movdqa	16(%esi, %ecx), %xmm2
    426 	movdqa	%xmm1, (%edi, %ecx)
    427 	pcmpeqb	%xmm2, %xmm0
    428 	pmovmskb %xmm0, %edx
    429 	add	$16, %ecx
    430 #ifdef USE_AS_STRNCPY
    431 	sub	$48, %ebx
    432 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    433 	test	%edx, %edx
    434 	jnz	L(CopyFrom1To16BytesXmm2)
    435 #else
    436 	test	%edx, %edx
    437 	jnz	L(CopyFrom1To16Bytes)
    438 #endif
    439 
    440 	movdqa	16(%esi, %ecx), %xmm3
    441 	movdqa	%xmm2, (%edi, %ecx)
    442 	pcmpeqb	%xmm3, %xmm0
    443 	pmovmskb %xmm0, %edx
    444 	lea	16(%ecx), %ecx
    445 #ifdef USE_AS_STRNCPY
    446 	sub	$16, %ebx
    447 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    448 	test	%edx, %edx
    449 	jnz	L(CopyFrom1To16BytesXmm3)
    450 #else
    451 	test	%edx, %edx
    452 	jnz	L(CopyFrom1To16Bytes)
    453 #endif
    454 
    455 	movdqa	16(%esi, %ecx), %xmm4
    456 	movdqa	%xmm3, (%edi, %ecx)
    457 	pcmpeqb	%xmm4, %xmm0
    458 	pmovmskb %xmm0, %edx
    459 	lea	16(%ecx), %ecx
    460 #ifdef USE_AS_STRNCPY
    461 	sub	$16, %ebx
    462 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    463 	test	%edx, %edx
    464 	jnz	L(CopyFrom1To16BytesXmm4)
    465 #else
    466 	test	%edx, %edx
    467 	jnz	L(CopyFrom1To16Bytes)
    468 #endif
    469 
    470 	movdqa	16(%esi, %ecx), %xmm1
    471 	movdqa	%xmm4, (%edi, %ecx)
    472 	pcmpeqb	%xmm1, %xmm0
    473 	pmovmskb %xmm0, %edx
    474 	lea	16(%ecx), %ecx
    475 #ifdef USE_AS_STRNCPY
    476 	sub	$16, %ebx
    477 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    478 	test	%edx, %edx
    479 	jnz	L(CopyFrom1To16BytesXmm1)
    480 #else
    481 	test	%edx, %edx
    482 	jnz	L(CopyFrom1To16Bytes)
    483 #endif
    484 
    485 	movdqa	16(%esi, %ecx), %xmm2
    486 	movdqa	%xmm1, (%edi, %ecx)
    487 	pcmpeqb	%xmm2, %xmm0
    488 	pmovmskb %xmm0, %edx
    489 	lea	16(%ecx), %ecx
    490 #ifdef USE_AS_STRNCPY
    491 	sub	$16, %ebx
    492 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    493 	test	%edx, %edx
    494 	jnz	L(CopyFrom1To16BytesXmm2)
    495 #else
    496 	test	%edx, %edx
    497 	jnz	L(CopyFrom1To16Bytes)
    498 #endif
    499 
    500 	movdqa	16(%esi, %ecx), %xmm3
    501 	movdqa	%xmm2, (%edi, %ecx)
    502 	pcmpeqb	%xmm3, %xmm0
    503 	pmovmskb %xmm0, %edx
    504 	lea	16(%ecx), %ecx
    505 #ifdef USE_AS_STRNCPY
    506 	sub	$16, %ebx
    507 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
    508 	test	%edx, %edx
    509 	jnz	L(CopyFrom1To16BytesXmm3)
    510 #else
    511 	test	%edx, %edx
    512 	jnz	L(CopyFrom1To16Bytes)
    513 #endif
    514 
    515 	movdqa	%xmm3, (%edi, %ecx)
    516 	mov	%esi, %edx
    517 	lea	16(%esi, %ecx), %esi
    518 	and	$-0x40, %esi
    519 	sub	%esi, %edx
    520 	sub	%edx, %edi
    521 #ifdef USE_AS_STRNCPY
    522 	lea	64+64(%ebx, %edx), %ebx
    523 #endif
    524 L(Aligned64Loop):
    525 	movdqa	(%esi), %xmm2
    526 	movdqa	%xmm2, %xmm4
    527 	movaps	16(%esi), %xmm5
    528 	movdqa	32(%esi), %xmm3
    529 	movdqa	%xmm3, %xmm6
    530 	movaps	48(%esi), %xmm7
    531 	pminub	%xmm5, %xmm2
    532 	pminub	%xmm7, %xmm3
    533 	pminub	%xmm2, %xmm3
    534 	pcmpeqb	%xmm0, %xmm3
    535 	pmovmskb %xmm3, %edx
    536 #ifdef USE_AS_STRNCPY
    537 	sub	$64, %ebx
    538 	jbe	L(AlignedLeaveCase2OrCase3)
    539 #endif
    540 	test	%edx, %edx
    541 	jnz	L(Aligned64Leave)
    542 
    543 L(Aligned64Loop_start):
    544 	add	$64, %esi
    545 	add	$64, %edi
    546 	movaps	%xmm4, -64(%edi)
    547 	movdqa	(%esi), %xmm2
    548 	movdqa	%xmm2, %xmm4
    549 	movaps	%xmm5, -48(%edi)
    550 	movaps	16(%esi), %xmm5
    551 	pminub	%xmm5, %xmm2
    552 	movaps	32(%esi), %xmm3
    553 	movaps	%xmm6, -32(%edi)
    554 	movdqa	%xmm3, %xmm6
    555 	movaps	%xmm7, -16(%edi)
    556 	movaps	48(%esi), %xmm7
    557 	pminub	%xmm7, %xmm3
    558 	pminub	%xmm2, %xmm3
    559 	pcmpeqb	%xmm3, %xmm0
    560 	pmovmskb %xmm0, %edx
    561 #ifdef USE_AS_STRNCPY
    562 	sub	$64, %ebx
    563 	jbe	L(AlignedLeaveCase2OrCase3)
    564 #endif
    565 	test	%edx, %edx
    566 	jz	L(Aligned64Loop_start)
    567 
    568 L(Aligned64Leave):
    569 	pxor	%xmm0, %xmm0
    570 	pxor	%xmm1, %xmm1
    571 
    572 	pcmpeqb	%xmm4, %xmm0
    573 	pcmpeqb	%xmm5, %xmm1
    574 	pmovmskb %xmm0, %edx
    575 	pmovmskb %xmm1, %ecx
    576 	test	%edx, %edx
    577 	jnz	L(CopyFrom1To16Bytes_0)
    578 	test	%ecx, %ecx
    579 	jnz	L(CopyFrom1To16Bytes_16)
    580 
    581 	pcmpeqb	%xmm6, %xmm0
    582 	pcmpeqb	%xmm7, %xmm1
    583 	pmovmskb %xmm0, %edx
    584 	pmovmskb %xmm1, %ecx
    585 	test	%edx, %edx
    586 	jnz	L(CopyFrom1To16Bytes_32)
    587 
    588 	bsf	%ecx, %edx
    589 	movdqa	%xmm4, (%edi)
    590 	movdqa	%xmm5, 16(%edi)
    591 	movdqa	%xmm6, 32(%edi)
    592 #ifdef USE_AS_STRNCPY
    593 #ifdef USE_AS_STPCPY
    594 	lea	48(%edi, %edx), %eax
    595 #endif
    596 	movdqa	%xmm7, 48(%edi)
    597 	add	$15, %ebx
    598 	sub	%edx, %ebx
    599 	lea	49(%edi, %edx), %edi
    600 	jmp	L(StrncpyFillTailWithZero)
    601 #else
    602 	add	$48, %esi
    603 	add	$48, %edi
    604 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    605 #endif
    606 
    607 /*----------------------------------------------------*/
    608 
    609 /* Case1 */
    610 #ifndef USE_AS_STRNCPY
    611 	.p2align 4
    612 L(CopyFrom1To16Bytes):
    613 	add	%ecx, %edi
    614 	add	%ecx, %esi
    615 	bsf	%edx, %edx
    616 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    617 #endif
    618 	.p2align 4
    619 L(CopyFrom1To16BytesTail):
    620 #ifdef USE_AS_STRNCPY
    621 	sub	%ecx, %ebx
    622 #endif
    623 	add	%ecx, %esi
    624 	bsf	%edx, %edx
    625 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    626 
    627 	.p2align 4
    628 L(CopyFrom1To32Bytes1):
    629 	add	$16, %esi
    630 	add	$16, %edi
    631 #ifdef USE_AS_STRNCPY
    632 	sub	$16, %ebx
    633 #endif
    634 L(CopyFrom1To16BytesTail1):
    635 	bsf	%edx, %edx
    636 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    637 
    638 	.p2align 4
    639 L(CopyFrom1To32Bytes):
    640 #ifdef USE_AS_STRNCPY
    641 	sub	%ecx, %ebx
    642 #endif
    643 	bsf	%edx, %edx
    644 	add	%ecx, %esi
    645 	add	$16, %edx
    646 	sub	%ecx, %edx
    647 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    648 
    649 	.p2align 4
    650 L(CopyFrom1To16Bytes_0):
    651 	bsf	%edx, %edx
    652 #ifdef USE_AS_STRNCPY
    653 #ifdef USE_AS_STPCPY
    654 	lea	(%edi, %edx), %eax
    655 #endif
    656 	movdqa	%xmm4, (%edi)
    657 	add	$63, %ebx
    658 	sub	%edx, %ebx
    659 	lea	1(%edi, %edx), %edi
    660 	jmp	L(StrncpyFillTailWithZero)
    661 #else
    662 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    663 #endif
    664 
    665 	.p2align 4
    666 L(CopyFrom1To16Bytes_16):
    667 	bsf	%ecx, %edx
    668 	movdqa	%xmm4, (%edi)
    669 #ifdef USE_AS_STRNCPY
    670 #ifdef USE_AS_STPCPY
    671 	lea	16(%edi, %edx), %eax
    672 #endif
    673 	movdqa	%xmm5, 16(%edi)
    674 	add	$47, %ebx
    675 	sub	%edx, %ebx
    676 	lea	17(%edi, %edx), %edi
    677 	jmp	L(StrncpyFillTailWithZero)
    678 #else
    679 	add	$16, %esi
    680 	add	$16, %edi
    681 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    682 #endif
    683 
    684 	.p2align 4
    685 L(CopyFrom1To16Bytes_32):
    686 	bsf	%edx, %edx
    687 	movdqa	%xmm4, (%edi)
    688 	movdqa	%xmm5, 16(%edi)
    689 #ifdef USE_AS_STRNCPY
    690 #ifdef USE_AS_STPCPY
    691 	lea	32(%edi, %edx), %eax
    692 #endif
    693 	movdqa	%xmm6, 32(%edi)
    694 	add	$31, %ebx
    695 	sub	%edx, %ebx
    696 	lea	33(%edi, %edx), %edi
    697 	jmp	L(StrncpyFillTailWithZero)
    698 #else
    699 	add	$32, %esi
    700 	add	$32, %edi
    701 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    702 #endif
    703 
    704 	.p2align 4
    705 L(CopyFrom1To16BytesUnaligned_0):
    706 	bsf	%edx, %edx
    707 #ifdef USE_AS_STRNCPY
    708 #ifdef USE_AS_STPCPY
    709 	lea	(%edi, %edx), %eax
    710 #endif
    711 	movdqu	%xmm4, (%edi)
    712 	add	$63, %ebx
    713 	sub	%edx, %ebx
    714 	lea	1(%edi, %edx), %edi
    715 	jmp	L(StrncpyFillTailWithZero)
    716 #else
    717 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    718 #endif
    719 
    720 	.p2align 4
    721 L(CopyFrom1To16BytesUnaligned_16):
    722 	bsf	%ecx, %edx
    723 	movdqu	%xmm4, (%edi)
    724 #ifdef USE_AS_STRNCPY
    725 #ifdef USE_AS_STPCPY
    726 	lea	16(%edi, %edx), %eax
    727 #endif
    728 	movdqu	%xmm5, 16(%edi)
    729 	add	$47, %ebx
    730 	sub	%edx, %ebx
    731 	lea	17(%edi, %edx), %edi
    732 	jmp	L(StrncpyFillTailWithZero)
    733 #else
    734 	add	$16, %esi
    735 	add	$16, %edi
    736 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    737 #endif
    738 
    739 	.p2align 4
    740 L(CopyFrom1To16BytesUnaligned_32):
    741 	bsf	%edx, %edx
    742 	movdqu	%xmm4, (%edi)
    743 	movdqu	%xmm5, 16(%edi)
    744 #ifdef USE_AS_STRNCPY
    745 #ifdef USE_AS_STPCPY
    746 	lea	32(%edi, %edx), %eax
    747 #endif
    748 	movdqu	%xmm6, 32(%edi)
    749 	add	$31, %ebx
    750 	sub	%edx, %ebx
    751 	lea	33(%edi, %edx), %edi
    752 	jmp	L(StrncpyFillTailWithZero)
    753 #else
    754 	add	$32, %esi
    755 	add	$32, %edi
    756 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    757 #endif
    758 
    759 #ifdef USE_AS_STRNCPY
    760 	.p2align 4
    761 L(CopyFrom1To16BytesXmm6):
    762 	movdqa	%xmm6, (%edi, %ecx)
    763 	jmp	L(CopyFrom1To16BytesXmmExit)
    764 
    765 	.p2align 4
    766 L(CopyFrom1To16BytesXmm5):
    767 	movdqa	%xmm5, (%edi, %ecx)
    768 	jmp	L(CopyFrom1To16BytesXmmExit)
    769 
    770 	.p2align 4
    771 L(CopyFrom1To16BytesXmm4):
    772 	movdqa	%xmm4, (%edi, %ecx)
    773 	jmp	L(CopyFrom1To16BytesXmmExit)
    774 
    775 	.p2align 4
    776 L(CopyFrom1To16BytesXmm3):
    777 	movdqa	%xmm3, (%edi, %ecx)
    778 	jmp	L(CopyFrom1To16BytesXmmExit)
    779 
    780 	.p2align 4
    781 L(CopyFrom1To16BytesXmm2):
    782 	movdqa	%xmm2, (%edi, %ecx)
    783 	jmp	L(CopyFrom1To16BytesXmmExit)
    784 
    785 	.p2align 4
    786 L(CopyFrom1To16BytesXmm1):
    787 	movdqa	%xmm1, (%edi, %ecx)
    788 	jmp	L(CopyFrom1To16BytesXmmExit)
    789 
    790 	.p2align 4
    791 L(CopyFrom1To16BytesUnalignedXmm6):
    792 	movdqu	%xmm6, (%edi, %ecx)
    793 	jmp	L(CopyFrom1To16BytesXmmExit)
    794 
    795 	.p2align 4
    796 L(CopyFrom1To16BytesUnalignedXmm5):
    797 	movdqu	%xmm5, (%edi, %ecx)
    798 	jmp	L(CopyFrom1To16BytesXmmExit)
    799 
    800 	.p2align 4
    801 L(CopyFrom1To16BytesUnalignedXmm4):
    802 	movdqu	%xmm4, (%edi, %ecx)
    803 	jmp	L(CopyFrom1To16BytesXmmExit)
    804 
    805 	.p2align 4
    806 L(CopyFrom1To16BytesUnalignedXmm3):
    807 	movdqu	%xmm3, (%edi, %ecx)
    808 	jmp	L(CopyFrom1To16BytesXmmExit)
    809 
    810 	.p2align 4
    811 L(CopyFrom1To16BytesUnalignedXmm1):
    812 	movdqu	%xmm1, (%edi, %ecx)
    813 	jmp	L(CopyFrom1To16BytesXmmExit)
    814 
    815 	.p2align 4
    816 L(CopyFrom1To16BytesExit):
    817 	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
    818 
    819 /* Case2 */
    820 
    821 	.p2align 4
    822 L(CopyFrom1To16BytesCase2):
    823 	add	$16, %ebx
    824 	add	%ecx, %edi
    825 	add	%ecx, %esi
    826 	bsf	%edx, %edx
    827 	cmp	%ebx, %edx
    828 	jb	L(CopyFrom1To16BytesExit)
    829 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
    830 
    831 	.p2align 4
    832 L(CopyFrom1To32BytesCase2):
    833 	sub	%ecx, %ebx
    834 	add	%ecx, %esi
    835 	bsf	%edx, %edx
    836 	add	$16, %edx
    837 	sub	%ecx, %edx
    838 	cmp	%ebx, %edx
    839 	jb	L(CopyFrom1To16BytesExit)
    840 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
    841 
    842 L(CopyFrom1To16BytesTailCase2):
    843 	sub	%ecx, %ebx
    844 	add	%ecx, %esi
    845 	bsf	%edx, %edx
    846 	cmp	%ebx, %edx
    847 	jb	L(CopyFrom1To16BytesExit)
    848 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
    849 
    850 L(CopyFrom1To16BytesTail1Case2):
    851 	bsf	%edx, %edx
    852 	cmp	%ebx, %edx
    853 	jb	L(CopyFrom1To16BytesExit)
    854 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
    855 
    856 /* Case2 or Case3,  Case3 */
    857 
    858 	.p2align 4
    859 L(CopyFrom1To16BytesCase2OrCase3):
    860 	test	%edx, %edx
    861 	jnz	L(CopyFrom1To16BytesCase2)
    862 L(CopyFrom1To16BytesCase3):
    863 	add	$16, %ebx
    864 	add	%ecx, %edi
    865 	add	%ecx, %esi
    866 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
    867 
    868 	.p2align 4
    869 L(CopyFrom1To32BytesCase2OrCase3):
    870 	test	%edx, %edx
    871 	jnz	L(CopyFrom1To32BytesCase2)
    872 	sub	%ecx, %ebx
    873 	add	%ecx, %esi
    874 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
    875 
    876 	.p2align 4
    877 L(CopyFrom1To16BytesTailCase2OrCase3):
    878 	test	%edx, %edx
    879 	jnz	L(CopyFrom1To16BytesTailCase2)
    880 	sub	%ecx, %ebx
    881 	add	%ecx, %esi
    882 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
    883 
    884 	.p2align 4
    885 L(CopyFrom1To32Bytes1Case2OrCase3):
    886 	add	$16, %edi
    887 	add	$16, %esi
    888 	sub	$16, %ebx
    889 L(CopyFrom1To16BytesTail1Case2OrCase3):
    890 	test	%edx, %edx
    891 	jnz	L(CopyFrom1To16BytesTail1Case2)
    892 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
    893 
    894 #endif
    895 
    896 /*-----------------------------------------------------------------*/
    897 	.p2align 4
    898 L(Exit0):
    899 #ifdef USE_AS_STPCPY
    900 	mov	%edi, %eax
    901 #endif
    902 	RETURN
    903 
    904 	.p2align 4
    905 L(Exit1):
    906 	movb	%dh, (%edi)
    907 #ifdef USE_AS_STPCPY
    908 	lea	(%edi), %eax
    909 #endif
    910 #ifdef USE_AS_STRNCPY
    911 	sub	$1, %ebx
    912 	lea	1(%edi), %edi
    913 	jnz	L(StrncpyFillTailWithZero)
    914 #endif
    915 	RETURN
    916 
    917 	.p2align 4
    918 L(Exit2):
    919 	movw	(%esi), %dx
    920 	movw	%dx, (%edi)
    921 #ifdef USE_AS_STPCPY
    922 	lea	1(%edi), %eax
    923 #endif
    924 #ifdef USE_AS_STRNCPY
    925 	sub	$2, %ebx
    926 	lea	2(%edi), %edi
    927 	jnz	L(StrncpyFillTailWithZero)
    928 #endif
    929 	RETURN
    930 
    931 	.p2align 4
    932 L(Exit3):
    933 	movw	(%esi), %cx
    934 	movw	%cx, (%edi)
    935 	movb	%dh, 2(%edi)
    936 #ifdef USE_AS_STPCPY
    937 	lea	2(%edi), %eax
    938 #endif
    939 #ifdef USE_AS_STRNCPY
    940 	sub	$3, %ebx
    941 	lea	3(%edi), %edi
    942 	jnz	L(StrncpyFillTailWithZero)
    943 #endif
    944 	RETURN
    945 
    946 	.p2align 4
    947 L(Exit4):
    948 	movl	(%esi), %edx
    949 	movl	%edx, (%edi)
    950 #ifdef USE_AS_STPCPY
    951 	lea	3(%edi), %eax
    952 #endif
    953 #ifdef USE_AS_STRNCPY
    954 	sub	$4, %ebx
    955 	lea	4(%edi), %edi
    956 	jnz	L(StrncpyFillTailWithZero)
    957 #endif
    958 	RETURN
    959 
    960 	.p2align 4
    961 L(Exit5):
    962 	movl	(%esi), %ecx
    963 	movb	%dh, 4(%edi)
    964 	movl	%ecx, (%edi)
    965 #ifdef USE_AS_STPCPY
    966 	lea	4(%edi), %eax
    967 #endif
    968 #ifdef USE_AS_STRNCPY
    969 	sub	$5, %ebx
    970 	lea	5(%edi), %edi
    971 	jnz	L(StrncpyFillTailWithZero)
    972 #endif
    973 	RETURN
    974 
    975 	.p2align 4
    976 L(Exit6):
    977 	movl	(%esi), %ecx
    978 	movw	4(%esi), %dx
    979 	movl	%ecx, (%edi)
    980 	movw	%dx, 4(%edi)
    981 #ifdef USE_AS_STPCPY
    982 	lea	5(%edi), %eax
    983 #endif
    984 #ifdef USE_AS_STRNCPY
    985 	sub	$6, %ebx
    986 	lea	6(%edi), %edi
    987 	jnz	L(StrncpyFillTailWithZero)
    988 #endif
    989 	RETURN
    990 
    991 	.p2align 4
    992 L(Exit7):
    993 	movl	(%esi), %ecx
    994 	movl	3(%esi), %edx
    995 	movl	%ecx, (%edi)
    996 	movl	%edx, 3(%edi)
    997 #ifdef USE_AS_STPCPY
    998 	lea	6(%edi), %eax
    999 #endif
   1000 #ifdef USE_AS_STRNCPY
   1001 	sub	$7, %ebx
   1002 	lea	7(%edi), %edi
   1003 	jnz	L(StrncpyFillTailWithZero)
   1004 #endif
   1005 	RETURN
   1006 
   1007 	.p2align 4
   1008 L(Exit8):
   1009 	movlpd	(%esi), %xmm0
   1010 	movlpd	%xmm0, (%edi)
   1011 #ifdef USE_AS_STPCPY
   1012 	lea	7(%edi), %eax
   1013 #endif
   1014 #ifdef USE_AS_STRNCPY
   1015 	sub	$8, %ebx
   1016 	lea	8(%edi), %edi
   1017 	jnz	L(StrncpyFillTailWithZero)
   1018 #endif
   1019 	RETURN
   1020 
   1021 	.p2align 4
   1022 L(Exit9):
   1023 	movlpd	(%esi), %xmm0
   1024 	movb	%dh, 8(%edi)
   1025 	movlpd	%xmm0, (%edi)
   1026 #ifdef USE_AS_STPCPY
   1027 	lea	8(%edi), %eax
   1028 #endif
   1029 #ifdef USE_AS_STRNCPY
   1030 	sub	$9, %ebx
   1031 	lea	9(%edi), %edi
   1032 	jnz	L(StrncpyFillTailWithZero)
   1033 #endif
   1034 	RETURN
   1035 
   1036 	.p2align 4
   1037 L(Exit10):
   1038 	movlpd	(%esi), %xmm0
   1039 	movw	8(%esi), %dx
   1040 	movlpd	%xmm0, (%edi)
   1041 	movw	%dx, 8(%edi)
   1042 #ifdef USE_AS_STPCPY
   1043 	lea	9(%edi), %eax
   1044 #endif
   1045 #ifdef USE_AS_STRNCPY
   1046 	sub	$10, %ebx
   1047 	lea	10(%edi), %edi
   1048 	jnz	L(StrncpyFillTailWithZero)
   1049 #endif
   1050 	RETURN
   1051 
   1052 	.p2align 4
   1053 L(Exit11):
   1054 	movlpd	(%esi), %xmm0
   1055 	movl	7(%esi), %edx
   1056 	movlpd	%xmm0, (%edi)
   1057 	movl	%edx, 7(%edi)
   1058 #ifdef USE_AS_STPCPY
   1059 	lea	10(%edi), %eax
   1060 #endif
   1061 #ifdef USE_AS_STRNCPY
   1062 	sub	$11, %ebx
   1063 	lea	11(%edi), %edi
   1064 	jnz	L(StrncpyFillTailWithZero)
   1065 #endif
   1066 	RETURN
   1067 
   1068 	.p2align 4
   1069 L(Exit12):
   1070 	movlpd	(%esi), %xmm0
   1071 	movl	8(%esi), %edx
   1072 	movlpd	%xmm0, (%edi)
   1073 	movl	%edx, 8(%edi)
   1074 #ifdef USE_AS_STPCPY
   1075 	lea	11(%edi), %eax
   1076 #endif
   1077 #ifdef USE_AS_STRNCPY
   1078 	sub	$12, %ebx
   1079 	lea	12(%edi), %edi
   1080 	jnz	L(StrncpyFillTailWithZero)
   1081 #endif
   1082 	RETURN
   1083 
   1084 	.p2align 4
   1085 L(Exit13):
   1086 	movlpd	(%esi), %xmm0
   1087 	movlpd	5(%esi), %xmm1
   1088 	movlpd	%xmm0, (%edi)
   1089 	movlpd	%xmm1, 5(%edi)
   1090 #ifdef USE_AS_STPCPY
   1091 	lea	12(%edi), %eax
   1092 #endif
   1093 #ifdef USE_AS_STRNCPY
   1094 	sub	$13, %ebx
   1095 	lea	13(%edi), %edi
   1096 	jnz	L(StrncpyFillTailWithZero)
   1097 #endif
   1098 	RETURN
   1099 
   1100 	.p2align 4
   1101 L(Exit14):
   1102 	movlpd	(%esi), %xmm0
   1103 	movlpd	6(%esi), %xmm1
   1104 	movlpd	%xmm0, (%edi)
   1105 	movlpd	%xmm1, 6(%edi)
   1106 #ifdef USE_AS_STPCPY
   1107 	lea	13(%edi), %eax
   1108 #endif
   1109 #ifdef USE_AS_STRNCPY
   1110 	sub	$14, %ebx
   1111 	lea	14(%edi), %edi
   1112 	jnz	L(StrncpyFillTailWithZero)
   1113 #endif
   1114 	RETURN
   1115 
   1116 	.p2align 4
   1117 L(Exit15):
   1118 	movlpd	(%esi), %xmm0
   1119 	movlpd	7(%esi), %xmm1
   1120 	movlpd	%xmm0, (%edi)
   1121 	movlpd	%xmm1, 7(%edi)
   1122 #ifdef USE_AS_STPCPY
   1123 	lea	14(%edi), %eax
   1124 #endif
   1125 #ifdef USE_AS_STRNCPY
   1126 	sub	$15, %ebx
   1127 	lea	15(%edi), %edi
   1128 	jnz	L(StrncpyFillTailWithZero)
   1129 #endif
   1130 	RETURN
   1131 
   1132 	.p2align 4
   1133 L(Exit16):
   1134 	movdqu	(%esi), %xmm0
   1135 	movdqu	%xmm0, (%edi)
   1136 #ifdef USE_AS_STPCPY
   1137 	lea	15(%edi), %eax
   1138 #endif
   1139 #ifdef USE_AS_STRNCPY
   1140 	sub	$16, %ebx
   1141 	lea	16(%edi), %edi
   1142 	jnz	L(StrncpyFillTailWithZero)
   1143 #endif
   1144 	RETURN
   1145 
   1146 	.p2align 4
   1147 L(Exit17):
   1148 	movdqu	(%esi), %xmm0
   1149 	xor	%cl, %cl
   1150 	movdqu	%xmm0, (%edi)
   1151 	movb	%cl, 16(%edi)
   1152 #ifdef USE_AS_STPCPY
   1153 	lea	16(%edi), %eax
   1154 #endif
   1155 #ifdef USE_AS_STRNCPY
   1156 	sub	$17, %ebx
   1157 	lea	17(%edi), %edi
   1158 	jnz	L(StrncpyFillTailWithZero)
   1159 #endif
   1160 	RETURN
   1161 
   1162 	.p2align 4
   1163 L(Exit18):
   1164 	movdqu	(%esi), %xmm0
   1165 	movw	16(%esi), %cx
   1166 	movdqu	%xmm0, (%edi)
   1167 	movw	%cx, 16(%edi)
   1168 #ifdef USE_AS_STPCPY
   1169 	lea	17(%edi), %eax
   1170 #endif
   1171 #ifdef USE_AS_STRNCPY
   1172 	sub	$18, %ebx
   1173 	lea	18(%edi), %edi
   1174 	jnz	L(StrncpyFillTailWithZero)
   1175 #endif
   1176 	RETURN
   1177 
   1178 	.p2align 4
   1179 L(Exit19):
   1180 	movdqu	(%esi), %xmm0
   1181 	movl	15(%esi), %ecx
   1182 	movdqu	%xmm0, (%edi)
   1183 	movl	%ecx, 15(%edi)
   1184 #ifdef USE_AS_STPCPY
   1185 	lea	18(%edi), %eax
   1186 #endif
   1187 #ifdef USE_AS_STRNCPY
   1188 	sub	$19, %ebx
   1189 	lea	19(%edi), %edi
   1190 	jnz	L(StrncpyFillTailWithZero)
   1191 #endif
   1192 	RETURN
   1193 
   1194 	.p2align 4
   1195 L(Exit20):
   1196 	movdqu	(%esi), %xmm0
   1197 	movl	16(%esi), %ecx
   1198 	movdqu	%xmm0, (%edi)
   1199 	movl	%ecx, 16(%edi)
   1200 #ifdef USE_AS_STPCPY
   1201 	lea	19(%edi), %eax
   1202 #endif
   1203 #ifdef USE_AS_STRNCPY
   1204 	sub	$20, %ebx
   1205 	lea	20(%edi), %edi
   1206 	jnz	L(StrncpyFillTailWithZero)
   1207 #endif
   1208 	RETURN
   1209 
   1210 	.p2align 4
   1211 L(Exit21):
   1212 	movdqu	(%esi), %xmm0
   1213 	movl	16(%esi), %ecx
   1214 	xor	%dl, %dl
   1215 	movdqu	%xmm0, (%edi)
   1216 	movl	%ecx, 16(%edi)
   1217 	movb	%dl, 20(%edi)
   1218 #ifdef USE_AS_STPCPY
   1219 	lea	20(%edi), %eax
   1220 #endif
   1221 #ifdef USE_AS_STRNCPY
   1222 	sub	$21, %ebx
   1223 	lea	21(%edi), %edi
   1224 	jnz	L(StrncpyFillTailWithZero)
   1225 #endif
   1226 	RETURN
   1227 
   1228 	.p2align 4
   1229 L(Exit22):
   1230 	movdqu	(%esi), %xmm0
   1231 	movlpd	14(%esi), %xmm3
   1232 	movdqu	%xmm0, (%edi)
   1233 	movlpd	%xmm3, 14(%edi)
   1234 #ifdef USE_AS_STPCPY
   1235 	lea	21(%edi), %eax
   1236 #endif
   1237 #ifdef USE_AS_STRNCPY
   1238 	sub	$22, %ebx
   1239 	lea	22(%edi), %edi
   1240 	jnz	L(StrncpyFillTailWithZero)
   1241 #endif
   1242 	RETURN
   1243 
   1244 	.p2align 4
   1245 L(Exit23):
   1246 	movdqu	(%esi), %xmm0
   1247 	movlpd	15(%esi), %xmm3
   1248 	movdqu	%xmm0, (%edi)
   1249 	movlpd	%xmm3, 15(%edi)
   1250 #ifdef USE_AS_STPCPY
   1251 	lea	22(%edi), %eax
   1252 #endif
   1253 #ifdef USE_AS_STRNCPY
   1254 	sub	$23, %ebx
   1255 	lea	23(%edi), %edi
   1256 	jnz	L(StrncpyFillTailWithZero)
   1257 #endif
   1258 	RETURN
   1259 
   1260 	.p2align 4
   1261 L(Exit24):
   1262 	movdqu	(%esi), %xmm0
   1263 	movlpd	16(%esi), %xmm2
   1264 	movdqu	%xmm0, (%edi)
   1265 	movlpd	%xmm2, 16(%edi)
   1266 #ifdef USE_AS_STPCPY
   1267 	lea	23(%edi), %eax
   1268 #endif
   1269 #ifdef USE_AS_STRNCPY
   1270 	sub	$24, %ebx
   1271 	lea	24(%edi), %edi
   1272 	jnz	L(StrncpyFillTailWithZero)
   1273 #endif
   1274 	RETURN
   1275 
   1276 	.p2align 4
   1277 L(Exit25):
   1278 	movdqu	(%esi), %xmm0
   1279 	movlpd	16(%esi), %xmm2
   1280 	xor	%cl, %cl
   1281 	movdqu	%xmm0, (%edi)
   1282 	movlpd	%xmm2, 16(%edi)
   1283 	movb	%cl, 24(%edi)
   1284 #ifdef USE_AS_STPCPY
   1285 	lea	24(%edi), %eax
   1286 #endif
   1287 #ifdef USE_AS_STRNCPY
   1288 	sub	$25, %ebx
   1289 	lea	25(%edi), %edi
   1290 	jnz	L(StrncpyFillTailWithZero)
   1291 #endif
   1292 	RETURN
   1293 
   1294 	.p2align 4
   1295 L(Exit26):
   1296 	movdqu	(%esi), %xmm0
   1297 	movlpd	16(%esi), %xmm2
   1298 	movw	24(%esi), %cx
   1299 	movdqu	%xmm0, (%edi)
   1300 	movlpd	%xmm2, 16(%edi)
   1301 	movw	%cx, 24(%edi)
   1302 #ifdef USE_AS_STPCPY
   1303 	lea	25(%edi), %eax
   1304 #endif
   1305 #ifdef USE_AS_STRNCPY
   1306 	sub	$26, %ebx
   1307 	lea	26(%edi), %edi
   1308 	jnz	L(StrncpyFillTailWithZero)
   1309 #endif
   1310 	RETURN
   1311 
   1312 	.p2align 4
   1313 L(Exit27):
   1314 	movdqu	(%esi), %xmm0
   1315 	movlpd	16(%esi), %xmm2
   1316 	movl	23(%esi), %ecx
   1317 	movdqu	%xmm0, (%edi)
   1318 	movlpd	%xmm2, 16(%edi)
   1319 	movl	%ecx, 23(%edi)
   1320 #ifdef USE_AS_STPCPY
   1321 	lea	26(%edi), %eax
   1322 #endif
   1323 #ifdef USE_AS_STRNCPY
   1324 	sub	$27, %ebx
   1325 	lea	27(%edi), %edi
   1326 	jnz	L(StrncpyFillTailWithZero)
   1327 #endif
   1328 	RETURN
   1329 
   1330 	.p2align 4
   1331 L(Exit28):
   1332 	movdqu	(%esi), %xmm0
   1333 	movlpd	16(%esi), %xmm2
   1334 	movl	24(%esi), %ecx
   1335 	movdqu	%xmm0, (%edi)
   1336 	movlpd	%xmm2, 16(%edi)
   1337 	movl	%ecx, 24(%edi)
   1338 #ifdef USE_AS_STPCPY
   1339 	lea	27(%edi), %eax
   1340 #endif
   1341 #ifdef USE_AS_STRNCPY
   1342 	sub	$28, %ebx
   1343 	lea	28(%edi), %edi
   1344 	jnz	L(StrncpyFillTailWithZero)
   1345 #endif
   1346 	RETURN
   1347 
   1348 	.p2align 4
   1349 L(Exit29):
   1350 	movdqu	(%esi), %xmm0
   1351 	movdqu	13(%esi), %xmm2
   1352 	movdqu	%xmm0, (%edi)
   1353 	movdqu	%xmm2, 13(%edi)
   1354 #ifdef USE_AS_STPCPY
   1355 	lea	28(%edi), %eax
   1356 #endif
   1357 #ifdef USE_AS_STRNCPY
   1358 	sub	$29, %ebx
   1359 	lea	29(%edi), %edi
   1360 	jnz	L(StrncpyFillTailWithZero)
   1361 #endif
   1362 	RETURN
   1363 
   1364 	.p2align 4
   1365 L(Exit30):
   1366 	movdqu	(%esi), %xmm0
   1367 	movdqu	14(%esi), %xmm2
   1368 	movdqu	%xmm0, (%edi)
   1369 	movdqu	%xmm2, 14(%edi)
   1370 #ifdef USE_AS_STPCPY
   1371 	lea	29(%edi), %eax
   1372 #endif
   1373 #ifdef USE_AS_STRNCPY
   1374 	sub	$30, %ebx
   1375 	lea	30(%edi), %edi
   1376 	jnz	L(StrncpyFillTailWithZero)
   1377 #endif
   1378 	RETURN
   1379 
   1380 
   1381 	.p2align 4
   1382 L(Exit31):
   1383 	movdqu	(%esi), %xmm0
   1384 	movdqu	15(%esi), %xmm2
   1385 	movdqu	%xmm0, (%edi)
   1386 	movdqu	%xmm2, 15(%edi)
   1387 #ifdef USE_AS_STPCPY
   1388 	lea	30(%edi), %eax
   1389 #endif
   1390 #ifdef USE_AS_STRNCPY
   1391 	sub	$31, %ebx
   1392 	lea	31(%edi), %edi
   1393 	jnz	L(StrncpyFillTailWithZero)
   1394 #endif
   1395 	RETURN
   1396 
   1397 	.p2align 4
   1398 L(Exit32):
   1399 	movdqu	(%esi), %xmm0
   1400 	movdqu	16(%esi), %xmm2
   1401 	movdqu	%xmm0, (%edi)
   1402 	movdqu	%xmm2, 16(%edi)
   1403 #ifdef USE_AS_STPCPY
   1404 	lea	31(%edi), %eax
   1405 #endif
   1406 #ifdef USE_AS_STRNCPY
   1407 	sub	$32, %ebx
   1408 	lea	32(%edi), %edi
   1409 	jnz	L(StrncpyFillTailWithZero)
   1410 #endif
   1411 	RETURN
   1412 
   1413 #ifdef USE_AS_STRNCPY
   1414 
   1415 	.p2align 4
   1416 L(StrncpyExit1):
   1417 	movb	(%esi), %dl
   1418 	movb	%dl, (%edi)
   1419 #ifdef USE_AS_STPCPY
   1420 	lea	1(%edi), %eax
   1421 #endif
   1422 	RETURN
   1423 
   1424 	.p2align 4
   1425 L(StrncpyExit2):
   1426 	movw	(%esi), %dx
   1427 	movw	%dx, (%edi)
   1428 #ifdef USE_AS_STPCPY
   1429 	lea	2(%edi), %eax
   1430 #endif
   1431 	RETURN
   1432 	.p2align 4
   1433 L(StrncpyExit3):
   1434 	movw	(%esi), %cx
   1435 	movb	2(%esi), %dl
   1436 	movw	%cx, (%edi)
   1437 	movb	%dl, 2(%edi)
   1438 #ifdef USE_AS_STPCPY
   1439 	lea	3(%edi), %eax
   1440 #endif
   1441 	RETURN
   1442 
   1443 	.p2align 4
   1444 L(StrncpyExit4):
   1445 	movl	(%esi), %edx
   1446 	movl	%edx, (%edi)
   1447 #ifdef USE_AS_STPCPY
   1448 	lea	4(%edi), %eax
   1449 #endif
   1450 	RETURN
   1451 
   1452 	.p2align 4
   1453 L(StrncpyExit5):
   1454 	movl	(%esi), %ecx
   1455 	movb	4(%esi), %dl
   1456 	movl	%ecx, (%edi)
   1457 	movb	%dl, 4(%edi)
   1458 #ifdef USE_AS_STPCPY
   1459 	lea	5(%edi), %eax
   1460 #endif
   1461 	RETURN
   1462 
   1463 	.p2align 4
   1464 L(StrncpyExit6):
   1465 	movl	(%esi), %ecx
   1466 	movw	4(%esi), %dx
   1467 	movl	%ecx, (%edi)
   1468 	movw	%dx, 4(%edi)
   1469 #ifdef USE_AS_STPCPY
   1470 	lea	6(%edi), %eax
   1471 #endif
   1472 	RETURN
   1473 
   1474 	.p2align 4
   1475 L(StrncpyExit7):
   1476 	movl	(%esi), %ecx
   1477 	movl	3(%esi), %edx
   1478 	movl	%ecx, (%edi)
   1479 	movl	%edx, 3(%edi)
   1480 #ifdef USE_AS_STPCPY
   1481 	lea	7(%edi), %eax
   1482 #endif
   1483 	RETURN
   1484 
   1485 	.p2align 4
   1486 L(StrncpyExit8):
   1487 	movlpd	(%esi), %xmm0
   1488 	movlpd	%xmm0, (%edi)
   1489 #ifdef USE_AS_STPCPY
   1490 	lea	8(%edi), %eax
   1491 #endif
   1492 	RETURN
   1493 
   1494 	.p2align 4
   1495 L(StrncpyExit9):
   1496 	movlpd	(%esi), %xmm0
   1497 	movb	8(%esi), %dl
   1498 	movlpd	%xmm0, (%edi)
   1499 	movb	%dl, 8(%edi)
   1500 #ifdef USE_AS_STPCPY
   1501 	lea	9(%edi), %eax
   1502 #endif
   1503 	RETURN
   1504 
   1505 	.p2align 4
   1506 L(StrncpyExit10):
   1507 	movlpd	(%esi), %xmm0
   1508 	movw	8(%esi), %dx
   1509 	movlpd	%xmm0, (%edi)
   1510 	movw	%dx, 8(%edi)
   1511 #ifdef USE_AS_STPCPY
   1512 	lea	10(%edi), %eax
   1513 #endif
   1514 	RETURN
   1515 
   1516 	.p2align 4
   1517 L(StrncpyExit11):
   1518 	movlpd	(%esi), %xmm0
   1519 	movl	7(%esi), %edx
   1520 	movlpd	%xmm0, (%edi)
   1521 	movl	%edx, 7(%edi)
   1522 #ifdef USE_AS_STPCPY
   1523 	lea	11(%edi), %eax
   1524 #endif
   1525 	RETURN
   1526 
   1527 	.p2align 4
   1528 L(StrncpyExit12):
   1529 	movlpd	(%esi), %xmm0
   1530 	movl	8(%esi), %edx
   1531 	movlpd	%xmm0, (%edi)
   1532 	movl	%edx, 8(%edi)
   1533 #ifdef USE_AS_STPCPY
   1534 	lea	12(%edi), %eax
   1535 #endif
   1536 	RETURN
   1537 
   1538 	.p2align 4
   1539 L(StrncpyExit13):
   1540 	movlpd	(%esi), %xmm0
   1541 	movlpd	5(%esi), %xmm1
   1542 	movlpd	%xmm0, (%edi)
   1543 	movlpd	%xmm1, 5(%edi)
   1544 #ifdef USE_AS_STPCPY
   1545 	lea	13(%edi), %eax
   1546 #endif
   1547 	RETURN
   1548 
   1549 	.p2align 4
   1550 L(StrncpyExit14):
   1551 	movlpd	(%esi), %xmm0
   1552 	movlpd	6(%esi), %xmm1
   1553 	movlpd	%xmm0, (%edi)
   1554 	movlpd	%xmm1, 6(%edi)
   1555 #ifdef USE_AS_STPCPY
   1556 	lea	14(%edi), %eax
   1557 #endif
   1558 	RETURN
   1559 
   1560 	.p2align 4
   1561 L(StrncpyExit15):
   1562 	movlpd	(%esi), %xmm0
   1563 	movlpd	7(%esi), %xmm1
   1564 	movlpd	%xmm0, (%edi)
   1565 	movlpd	%xmm1, 7(%edi)
   1566 #ifdef USE_AS_STPCPY
   1567 	lea	15(%edi), %eax
   1568 #endif
   1569 	RETURN
   1570 
   1571 	.p2align 4
   1572 L(StrncpyExit16):
   1573 	movdqu	(%esi), %xmm0
   1574 	movdqu	%xmm0, (%edi)
   1575 #ifdef USE_AS_STPCPY
   1576 	lea	16(%edi), %eax
   1577 #endif
   1578 	RETURN
   1579 
   1580 	.p2align 4
   1581 L(StrncpyExit17):
   1582 	movdqu	(%esi), %xmm0
   1583 	movb	16(%esi), %cl
   1584 	movdqu	%xmm0, (%edi)
   1585 	movb	%cl, 16(%edi)
   1586 #ifdef USE_AS_STPCPY
   1587 	lea	17(%edi), %eax
   1588 #endif
   1589 	RETURN
   1590 
   1591 	.p2align 4
   1592 L(StrncpyExit18):
   1593 	movdqu	(%esi), %xmm0
   1594 	movw	16(%esi), %cx
   1595 	movdqu	%xmm0, (%edi)
   1596 	movw	%cx, 16(%edi)
   1597 #ifdef USE_AS_STPCPY
   1598 	lea	18(%edi), %eax
   1599 #endif
   1600 	RETURN
   1601 
   1602 	.p2align 4
   1603 L(StrncpyExit19):
   1604 	movdqu	(%esi), %xmm0
   1605 	movl	15(%esi), %ecx
   1606 	movdqu	%xmm0, (%edi)
   1607 	movl	%ecx, 15(%edi)
   1608 #ifdef USE_AS_STPCPY
   1609 	lea	19(%edi), %eax
   1610 #endif
   1611 	RETURN
   1612 
   1613 	.p2align 4
   1614 L(StrncpyExit20):
   1615 	movdqu	(%esi), %xmm0
   1616 	movl	16(%esi), %ecx
   1617 	movdqu	%xmm0, (%edi)
   1618 	movl	%ecx, 16(%edi)
   1619 #ifdef USE_AS_STPCPY
   1620 	lea	20(%edi), %eax
   1621 #endif
   1622 	RETURN
   1623 
   1624 	.p2align 4
   1625 L(StrncpyExit21):
   1626 	movdqu	(%esi), %xmm0
   1627 	movl	16(%esi), %ecx
   1628 	movb	20(%esi), %dl
   1629 	movdqu	%xmm0, (%edi)
   1630 	movl	%ecx, 16(%edi)
   1631 	movb	%dl, 20(%edi)
   1632 #ifdef USE_AS_STPCPY
   1633 	lea	21(%edi), %eax
   1634 #endif
   1635 	RETURN
   1636 
   1637 	.p2align 4
   1638 L(StrncpyExit22):
   1639 	movdqu	(%esi), %xmm0
   1640 	movlpd	14(%esi), %xmm3
   1641 	movdqu	%xmm0, (%edi)
   1642 	movlpd	%xmm3, 14(%edi)
   1643 #ifdef USE_AS_STPCPY
   1644 	lea	22(%edi), %eax
   1645 #endif
   1646 	RETURN
   1647 
   1648 	.p2align 4
   1649 L(StrncpyExit23):
   1650 	movdqu	(%esi), %xmm0
   1651 	movlpd	15(%esi), %xmm3
   1652 	movdqu	%xmm0, (%edi)
   1653 	movlpd	%xmm3, 15(%edi)
   1654 #ifdef USE_AS_STPCPY
   1655 	lea	23(%edi), %eax
   1656 #endif
   1657 	RETURN
   1658 
   1659 	.p2align 4
   1660 L(StrncpyExit24):
   1661 	movdqu	(%esi), %xmm0
   1662 	movlpd	16(%esi), %xmm2
   1663 	movdqu	%xmm0, (%edi)
   1664 	movlpd	%xmm2, 16(%edi)
   1665 #ifdef USE_AS_STPCPY
   1666 	lea	24(%edi), %eax
   1667 #endif
   1668 	RETURN
   1669 
   1670 	.p2align 4
   1671 L(StrncpyExit25):
   1672 	movdqu	(%esi), %xmm0
   1673 	movlpd	16(%esi), %xmm2
   1674 	movb	24(%esi), %cl
   1675 	movdqu	%xmm0, (%edi)
   1676 	movlpd	%xmm2, 16(%edi)
   1677 	movb	%cl, 24(%edi)
   1678 #ifdef USE_AS_STPCPY
   1679 	lea	25(%edi), %eax
   1680 #endif
   1681 	RETURN
   1682 
   1683 	.p2align 4
   1684 L(StrncpyExit26):
   1685 	movdqu	(%esi), %xmm0
   1686 	movlpd	16(%esi), %xmm2
   1687 	movw	24(%esi), %cx
   1688 	movdqu	%xmm0, (%edi)
   1689 	movlpd	%xmm2, 16(%edi)
   1690 	movw	%cx, 24(%edi)
   1691 #ifdef USE_AS_STPCPY
   1692 	lea	26(%edi), %eax
   1693 #endif
   1694 	RETURN
   1695 
   1696 	.p2align 4
   1697 L(StrncpyExit27):
   1698 	movdqu	(%esi), %xmm0
   1699 	movlpd	16(%esi), %xmm2
   1700 	movl	23(%esi), %ecx
   1701 	movdqu	%xmm0, (%edi)
   1702 	movlpd	%xmm2, 16(%edi)
   1703 	movl	%ecx, 23(%edi)
   1704 #ifdef USE_AS_STPCPY
   1705 	lea	27(%edi), %eax
   1706 #endif
   1707 	RETURN
   1708 
   1709 	.p2align 4
   1710 L(StrncpyExit28):
   1711 	movdqu	(%esi), %xmm0
   1712 	movlpd	16(%esi), %xmm2
   1713 	movl	24(%esi), %ecx
   1714 	movdqu	%xmm0, (%edi)
   1715 	movlpd	%xmm2, 16(%edi)
   1716 	movl	%ecx, 24(%edi)
   1717 #ifdef USE_AS_STPCPY
   1718 	lea	28(%edi), %eax
   1719 #endif
   1720 	RETURN
   1721 
   1722 	.p2align 4
   1723 L(StrncpyExit29):
   1724 	movdqu	(%esi), %xmm0
   1725 	movdqu	13(%esi), %xmm2
   1726 	movdqu	%xmm0, (%edi)
   1727 	movdqu	%xmm2, 13(%edi)
   1728 #ifdef USE_AS_STPCPY
   1729 	lea	29(%edi), %eax
   1730 #endif
   1731 	RETURN
   1732 
   1733 	.p2align 4
   1734 L(StrncpyExit30):
   1735 	movdqu	(%esi), %xmm0
   1736 	movdqu	14(%esi), %xmm2
   1737 	movdqu	%xmm0, (%edi)
   1738 	movdqu	%xmm2, 14(%edi)
   1739 #ifdef USE_AS_STPCPY
   1740 	lea	30(%edi), %eax
   1741 #endif
   1742 	RETURN
   1743 
   1744 	.p2align 4
   1745 L(StrncpyExit31):
   1746 	movdqu	(%esi), %xmm0
   1747 	movdqu	15(%esi), %xmm2
   1748 	movdqu	%xmm0, (%edi)
   1749 	movdqu	%xmm2, 15(%edi)
   1750 #ifdef USE_AS_STPCPY
   1751 	lea	31(%edi), %eax
   1752 #endif
   1753 	RETURN
   1754 
   1755 	.p2align 4
   1756 L(StrncpyExit32):
   1757 	movdqu	(%esi), %xmm0
   1758 	movdqu	16(%esi), %xmm2
   1759 	movdqu	%xmm0, (%edi)
   1760 	movdqu	%xmm2, 16(%edi)
   1761 #ifdef USE_AS_STPCPY
   1762 	lea	32(%edi), %eax
   1763 #endif
   1764 	RETURN
   1765 
   1766 	.p2align 4
   1767 L(StrncpyExit33):
   1768 	movdqu	(%esi), %xmm0
   1769 	movdqu	16(%esi), %xmm2
   1770 	movb	32(%esi), %cl
   1771 	movdqu	%xmm0, (%edi)
   1772 	movdqu	%xmm2, 16(%edi)
   1773 	movb	%cl, 32(%edi)
   1774 	RETURN
   1775 
   1776 	.p2align 4
   1777 L(Fill0):
   1778 	RETURN
   1779 
   1780 	.p2align 4
   1781 L(Fill1):
   1782 	movb	%dl, (%edi)
   1783 	RETURN
   1784 
   1785 	.p2align 4
   1786 L(Fill2):
   1787 	movw	%dx, (%edi)
   1788 	RETURN
   1789 
   1790 	.p2align 4
   1791 L(Fill3):
   1792 	movl	%edx, -1(%edi)
   1793 	RETURN
   1794 
   1795 	.p2align 4
   1796 L(Fill4):
   1797 	movl	%edx, (%edi)
   1798 	RETURN
   1799 
   1800 	.p2align 4
   1801 L(Fill5):
   1802 	movl	%edx, (%edi)
   1803 	movb	%dl, 4(%edi)
   1804 	RETURN
   1805 
   1806 	.p2align 4
   1807 L(Fill6):
   1808 	movl	%edx, (%edi)
   1809 	movw	%dx, 4(%edi)
   1810 	RETURN
   1811 
   1812 	.p2align 4
   1813 L(Fill7):
   1814 	movlpd	%xmm0, -1(%edi)
   1815 	RETURN
   1816 
   1817 	.p2align 4
   1818 L(Fill8):
   1819 	movlpd	%xmm0, (%edi)
   1820 	RETURN
   1821 
   1822 	.p2align 4
   1823 L(Fill9):
   1824 	movlpd	%xmm0, (%edi)
   1825 	movb	%dl, 8(%edi)
   1826 	RETURN
   1827 
   1828 	.p2align 4
   1829 L(Fill10):
   1830 	movlpd	%xmm0, (%edi)
   1831 	movw	%dx, 8(%edi)
   1832 	RETURN
   1833 
   1834 	.p2align 4
   1835 L(Fill11):
   1836 	movlpd	%xmm0, (%edi)
   1837 	movl	%edx, 7(%edi)
   1838 	RETURN
   1839 
   1840 	.p2align 4
   1841 L(Fill12):
   1842 	movlpd	%xmm0, (%edi)
   1843 	movl	%edx, 8(%edi)
   1844 	RETURN
   1845 
   1846 	.p2align 4
   1847 L(Fill13):
   1848 	movlpd	%xmm0, (%edi)
   1849 	movlpd	%xmm0, 5(%edi)
   1850 	RETURN
   1851 
   1852 	.p2align 4
   1853 L(Fill14):
   1854 	movlpd	%xmm0, (%edi)
   1855 	movlpd	%xmm0, 6(%edi)
   1856 	RETURN
   1857 
   1858 	.p2align 4
   1859 L(Fill15):
   1860 	movdqu	%xmm0, -1(%edi)
   1861 	RETURN
   1862 
   1863 	.p2align 4
   1864 L(Fill16):
   1865 	movdqu	%xmm0, (%edi)
   1866 	RETURN
   1867 
   1868 	.p2align 4
   1869 L(CopyFrom1To16BytesUnalignedXmm2):
   1870 	movdqu	%xmm2, (%edi, %ecx)
   1871 
   1872 	.p2align 4
   1873 L(CopyFrom1To16BytesXmmExit):
   1874 	bsf	%edx, %edx
   1875 	add	$15, %ebx
   1876 	add	%ecx, %edi
   1877 #ifdef USE_AS_STPCPY
   1878 	lea	(%edi, %edx), %eax
   1879 #endif
   1880 	sub	%edx, %ebx
   1881 	lea	1(%edi, %edx), %edi
   1882 
   1883 	.p2align 4
   1884 L(StrncpyFillTailWithZero):
   1885 	pxor	%xmm0, %xmm0
   1886 	xor	%edx, %edx
   1887 	sub	$16, %ebx
   1888 	jbe	L(StrncpyFillExit)
   1889 
   1890 	movdqu	%xmm0, (%edi)
   1891 	add	$16, %edi
   1892 
   1893 	mov	%edi, %esi
   1894 	and	$0xf, %esi
   1895 	sub	%esi, %edi
   1896 	add	%esi, %ebx
   1897 	sub	$64, %ebx
   1898 	jb	L(StrncpyFillLess64)
   1899 
   1900 L(StrncpyFillLoopMovdqa):
   1901 	movdqa	%xmm0, (%edi)
   1902 	movdqa	%xmm0, 16(%edi)
   1903 	movdqa	%xmm0, 32(%edi)
   1904 	movdqa	%xmm0, 48(%edi)
   1905 	add	$64, %edi
   1906 	sub	$64, %ebx
   1907 	jae	L(StrncpyFillLoopMovdqa)
   1908 
   1909 L(StrncpyFillLess64):
   1910 	add	$32, %ebx
   1911 	jl	L(StrncpyFillLess32)
   1912 	movdqa	%xmm0, (%edi)
   1913 	movdqa	%xmm0, 16(%edi)
   1914 	add	$32, %edi
   1915 	sub	$16, %ebx
   1916 	jl	L(StrncpyFillExit)
   1917 	movdqa	%xmm0, (%edi)
   1918 	add	$16, %edi
   1919 	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
   1920 
   1921 L(StrncpyFillLess32):
   1922 	add	$16, %ebx
   1923 	jl	L(StrncpyFillExit)
   1924 	movdqa	%xmm0, (%edi)
   1925 	add	$16, %edi
   1926 	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
   1927 
   1928 L(StrncpyFillExit):
   1929 	add	$16, %ebx
   1930 	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
   1931 
   1932 	.p2align 4
   1933 L(AlignedLeaveCase2OrCase3):
   1934 	test	%edx, %edx
   1935 	jnz	L(Aligned64LeaveCase2)
   1936 L(Aligned64LeaveCase3):
   1937 	lea	64(%ebx), %ecx
   1938 	and	$-16, %ecx
   1939 	add	$48, %ebx
   1940 	jl	L(CopyFrom1To16BytesCase3)
   1941 	movdqa	%xmm4, (%edi)
   1942 	sub	$16, %ebx
   1943 	jb	L(CopyFrom1To16BytesCase3)
   1944 	movdqa	%xmm5, 16(%edi)
   1945 	sub	$16, %ebx
   1946 	jb	L(CopyFrom1To16BytesCase3)
   1947 	movdqa	%xmm6, 32(%edi)
   1948 	sub	$16, %ebx
   1949 	jb	L(CopyFrom1To16BytesCase3)
   1950 	movdqa	%xmm7, 48(%edi)
   1951 #ifdef USE_AS_STPCPY
   1952 	lea	64(%edi), %eax
   1953 #endif
   1954 	RETURN
   1955 
   1956 	.p2align 4
   1957 L(Aligned64LeaveCase2):
   1958 	pxor	%xmm0, %xmm0
   1959 	xor	%ecx, %ecx
   1960 	pcmpeqb	%xmm4, %xmm0
   1961 	pmovmskb %xmm0, %edx
   1962 	add	$48, %ebx
   1963 	jle	L(CopyFrom1To16BytesCase2OrCase3)
   1964 	test	%edx, %edx
   1965 	jnz	L(CopyFrom1To16BytesXmm4)
   1966 
   1967 	pcmpeqb	%xmm5, %xmm0
   1968 	pmovmskb %xmm0, %edx
   1969 	movdqa	%xmm4, (%edi)
   1970 	add	$16, %ecx
   1971 	sub	$16, %ebx
   1972 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
   1973 	test	%edx, %edx
   1974 	jnz	L(CopyFrom1To16BytesXmm5)
   1975 
   1976 	pcmpeqb	%xmm6, %xmm0
   1977 	pmovmskb %xmm0, %edx
   1978 	movdqa	%xmm5, 16(%edi)
   1979 	add	$16, %ecx
   1980 	sub	$16, %ebx
   1981 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
   1982 	test	%edx, %edx
   1983 	jnz	L(CopyFrom1To16BytesXmm6)
   1984 
   1985 	pcmpeqb	%xmm7, %xmm0
   1986 	pmovmskb %xmm0, %edx
   1987 	movdqa	%xmm6, 32(%edi)
   1988 	lea	16(%edi, %ecx), %edi
   1989 	lea	16(%esi, %ecx), %esi
   1990 	bsf	%edx, %edx
   1991 	cmp	%ebx, %edx
   1992 	jb	L(CopyFrom1To16BytesExit)
   1993 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
   1994 
   1995 	.p2align 4
   1996 L(UnalignedLeaveCase2OrCase3):
   1997 	test	%edx, %edx
   1998 	jnz	L(Unaligned64LeaveCase2)
   1999 L(Unaligned64LeaveCase3):
   2000 	lea	64(%ebx), %ecx
   2001 	and	$-16, %ecx
   2002 	add	$48, %ebx
   2003 	jl	L(CopyFrom1To16BytesCase3)
   2004 	movdqu	%xmm4, (%edi)
   2005 	sub	$16, %ebx
   2006 	jb	L(CopyFrom1To16BytesCase3)
   2007 	movdqu	%xmm5, 16(%edi)
   2008 	sub	$16, %ebx
   2009 	jb	L(CopyFrom1To16BytesCase3)
   2010 	movdqu	%xmm6, 32(%edi)
   2011 	sub	$16, %ebx
   2012 	jb	L(CopyFrom1To16BytesCase3)
   2013 	movdqu	%xmm7, 48(%edi)
   2014 #ifdef USE_AS_STPCPY
   2015 	lea	64(%edi), %eax
   2016 #endif
   2017 	RETURN
   2018 
   2019 	.p2align 4
   2020 L(Unaligned64LeaveCase2):
   2021 	pxor	%xmm0, %xmm0
   2022 	xor	%ecx, %ecx
   2023 	pcmpeqb	%xmm4, %xmm0
   2024 	pmovmskb %xmm0, %edx
   2025 	add	$48, %ebx
   2026 	jle	L(CopyFrom1To16BytesCase2OrCase3)
   2027 	test	%edx, %edx
   2028 	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
   2029 
   2030 	pcmpeqb	%xmm5, %xmm0
   2031 	pmovmskb %xmm0, %edx
   2032 	movdqu	%xmm4, (%edi)
   2033 	add	$16, %ecx
   2034 	sub	$16, %ebx
   2035 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
   2036 	test	%edx, %edx
   2037 	jnz	L(CopyFrom1To16BytesUnalignedXmm5)
   2038 
   2039 	pcmpeqb	%xmm6, %xmm0
   2040 	pmovmskb %xmm0, %edx
   2041 	movdqu	%xmm5, 16(%edi)
   2042 	add	$16, %ecx
   2043 	sub	$16, %ebx
   2044 	jbe	L(CopyFrom1To16BytesCase2OrCase3)
   2045 	test	%edx, %edx
   2046 	jnz	L(CopyFrom1To16BytesUnalignedXmm6)
   2047 
   2048 	pcmpeqb	%xmm7, %xmm0
   2049 	pmovmskb %xmm0, %edx
   2050 	movdqu	%xmm6, 32(%edi)
   2051 	lea	16(%edi, %ecx), %edi
   2052 	lea	16(%esi, %ecx), %esi
   2053 	bsf	%edx, %edx
   2054 	cmp	%ebx, %edx
   2055 	jb	L(CopyFrom1To16BytesExit)
   2056 	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
   2057 
   2058 	.p2align 4
   2059 L(ExitZero):
   2060 	movl	%edi, %eax
   2061 	RETURN
   2062 #endif
   2063 
   2064 END (STRCPY)
   2065 
   2066 	.p2align 4
   2067 	.section .rodata
   2068 L(ExitTable):
   2069 	.int	JMPTBL(L(Exit1), L(ExitTable))
   2070 	.int	JMPTBL(L(Exit2), L(ExitTable))
   2071 	.int	JMPTBL(L(Exit3), L(ExitTable))
   2072 	.int	JMPTBL(L(Exit4), L(ExitTable))
   2073 	.int	JMPTBL(L(Exit5), L(ExitTable))
   2074 	.int	JMPTBL(L(Exit6), L(ExitTable))
   2075 	.int	JMPTBL(L(Exit7), L(ExitTable))
   2076 	.int	JMPTBL(L(Exit8), L(ExitTable))
   2077 	.int	JMPTBL(L(Exit9), L(ExitTable))
   2078 	.int	JMPTBL(L(Exit10), L(ExitTable))
   2079 	.int	JMPTBL(L(Exit11), L(ExitTable))
   2080 	.int	JMPTBL(L(Exit12), L(ExitTable))
   2081 	.int	JMPTBL(L(Exit13), L(ExitTable))
   2082 	.int	JMPTBL(L(Exit14), L(ExitTable))
   2083 	.int	JMPTBL(L(Exit15), L(ExitTable))
   2084 	.int	JMPTBL(L(Exit16), L(ExitTable))
   2085 	.int	JMPTBL(L(Exit17), L(ExitTable))
   2086 	.int	JMPTBL(L(Exit18), L(ExitTable))
   2087 	.int	JMPTBL(L(Exit19), L(ExitTable))
   2088 	.int	JMPTBL(L(Exit20), L(ExitTable))
   2089 	.int	JMPTBL(L(Exit21), L(ExitTable))
   2090 	.int	JMPTBL(L(Exit22), L(ExitTable))
   2091 	.int    JMPTBL(L(Exit23), L(ExitTable))
   2092 	.int	JMPTBL(L(Exit24), L(ExitTable))
   2093 	.int	JMPTBL(L(Exit25), L(ExitTable))
   2094 	.int	JMPTBL(L(Exit26), L(ExitTable))
   2095 	.int	JMPTBL(L(Exit27), L(ExitTable))
   2096 	.int	JMPTBL(L(Exit28), L(ExitTable))
   2097 	.int	JMPTBL(L(Exit29), L(ExitTable))
   2098 	.int	JMPTBL(L(Exit30), L(ExitTable))
   2099 	.int	JMPTBL(L(Exit31), L(ExitTable))
   2100 	.int	JMPTBL(L(Exit32), L(ExitTable))
   2101 #ifdef USE_AS_STRNCPY
   2102 L(ExitStrncpyTable):
   2103 	.int	JMPTBL(L(Exit0), L(ExitStrncpyTable))
   2104 	.int	JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
   2105 	.int	JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
   2106 	.int	JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
   2107 	.int	JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
   2108 	.int	JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
   2109 	.int	JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
   2110 	.int	JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
   2111 	.int	JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
   2112 	.int	JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
   2113 	.int	JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
   2114 	.int	JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
   2115 	.int	JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
   2116 	.int	JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
   2117 	.int	JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
   2118 	.int	JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
   2119 	.int	JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
   2120 	.int	JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
   2121 	.int	JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
   2122 	.int	JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
   2123 	.int	JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
   2124 	.int	JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
   2125 	.int	JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
   2126 	.int    JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
   2127 	.int	JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
   2128 	.int	JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
   2129 	.int	JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
   2130 	.int	JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
   2131 	.int	JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
   2132 	.int	JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
   2133 	.int	JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
   2134 	.int	JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
   2135 	.int	JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
   2136 	.int	JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
   2137 
   2138 	.p2align 4
   2139 L(FillTable):
   2140 	.int	JMPTBL(L(Fill0), L(FillTable))
   2141 	.int	JMPTBL(L(Fill1), L(FillTable))
   2142 	.int	JMPTBL(L(Fill2), L(FillTable))
   2143 	.int	JMPTBL(L(Fill3), L(FillTable))
   2144 	.int	JMPTBL(L(Fill4), L(FillTable))
   2145 	.int	JMPTBL(L(Fill5), L(FillTable))
   2146 	.int	JMPTBL(L(Fill6), L(FillTable))
   2147 	.int	JMPTBL(L(Fill7), L(FillTable))
   2148 	.int	JMPTBL(L(Fill8), L(FillTable))
   2149 	.int	JMPTBL(L(Fill9), L(FillTable))
   2150 	.int	JMPTBL(L(Fill10), L(FillTable))
   2151 	.int	JMPTBL(L(Fill11), L(FillTable))
   2152 	.int	JMPTBL(L(Fill12), L(FillTable))
   2153 	.int	JMPTBL(L(Fill13), L(FillTable))
   2154 	.int	JMPTBL(L(Fill14), L(FillTable))
   2155 	.int	JMPTBL(L(Fill15), L(FillTable))
   2156 	.int	JMPTBL(L(Fill16), L(FillTable))
   2157 #endif
   2158