Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2011, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef USE_AS_WCSCAT
     32 
     33 # ifndef L
     34 #  define L(label)	.L##label
     35 # endif
     36 
     37 # ifndef cfi_startproc
     38 #  define cfi_startproc	.cfi_startproc
     39 # endif
     40 
     41 # ifndef cfi_endproc
     42 #  define cfi_endproc	.cfi_endproc
     43 # endif
     44 
     45 # ifndef cfi_rel_offset
     46 #  define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     47 # endif
     48 
     49 # ifndef cfi_restore
     50 #  define cfi_restore(reg)	.cfi_restore reg
     51 # endif
     52 
     53 # ifndef cfi_adjust_cfa_offset
     54 #  define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     55 # endif
     56 
     57 # ifndef ENTRY
     58 #  define ENTRY(name)	\
     59 	.type name, @function;	\
     60 	.globl name;	\
     61 	.p2align 4;	\
     62 name:	\
     63 	cfi_startproc
     64 # endif
     65 
     66 # ifndef END
     67 #  define END(name)	\
     68 	cfi_endproc;	\
     69 	.size name, .-name
     70 # endif
     71 
     72 # define CFI_PUSH(REG)	\
     73 	cfi_adjust_cfa_offset (4);	\
     74 	cfi_rel_offset (REG, 0)
     75 
     76 # define CFI_POP(REG)	\
     77 	cfi_adjust_cfa_offset (-4);	\
     78 	cfi_restore (REG)
     79 
     80 # define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     81 # define POP(REG)	popl REG; CFI_POP (REG)
     82 
     83 # define PARMS	4
     84 # define RETURN	POP (%edi); ret; CFI_PUSH (%edi)
     85 
     86 # define STR1	PARMS
     87 # define STR2	STR1+4
     88 # define LEN	STR2+4
     89 
     90 .text
     91 ENTRY (wcscpy)
     92 	mov	STR1(%esp), %edx
     93 	mov	STR2(%esp), %ecx
     94 
     95 	cmpl	$0, (%ecx)
     96 	jz	L(ExitTail4)
     97 	cmpl	$0, 4(%ecx)
     98 	jz	L(ExitTail8)
     99 	cmpl	$0, 8(%ecx)
    100 	jz	L(ExitTail12)
    101 	cmpl	$0, 12(%ecx)
    102 	jz	L(ExitTail16)
    103 
    104 	PUSH	(%edi)
    105 	mov	%edx, %edi
    106 #endif
    107 	PUSH	(%esi)
    108 	lea	16(%ecx), %esi
    109 
    110 	and	$-16, %esi
    111 
    112 	pxor	%xmm0, %xmm0
    113 	pcmpeqd	(%esi), %xmm0
    114 	movdqu	(%ecx), %xmm1
    115 	movdqu	%xmm1, (%edx)
    116 
    117 	pmovmskb %xmm0, %eax
    118 	sub	%ecx, %esi
    119 
    120 	test	%eax, %eax
    121 	jnz	L(CopyFrom1To16Bytes)
    122 
    123 	mov	%edx, %eax
    124 	lea	16(%edx), %edx
    125 	and	$-16, %edx
    126 	sub	%edx, %eax
    127 
    128 	sub	%eax, %ecx
    129 	mov	%ecx, %eax
    130 	and	$0xf, %eax
    131 	mov	$0, %esi
    132 
    133 	jz	L(Align16Both)
    134 	cmp	$4, %eax
    135 	je	L(Shl4)
    136 	cmp	$8, %eax
    137 	je	L(Shl8)
    138 	jmp	L(Shl12)
    139 
    140 L(Align16Both):
    141 	movaps	(%ecx), %xmm1
    142 	movaps	16(%ecx), %xmm2
    143 	movaps	%xmm1, (%edx)
    144 	pcmpeqd	%xmm2, %xmm0
    145 	pmovmskb %xmm0, %eax
    146 	lea	16(%esi), %esi
    147 
    148 	test	%eax, %eax
    149 	jnz	L(CopyFrom1To16Bytes)
    150 
    151 	movaps	16(%ecx, %esi), %xmm3
    152 	movaps	%xmm2, (%edx, %esi)
    153 	pcmpeqd	%xmm3, %xmm0
    154 	pmovmskb %xmm0, %eax
    155 	lea	16(%esi), %esi
    156 
    157 	test	%eax, %eax
    158 	jnz	L(CopyFrom1To16Bytes)
    159 
    160 	movaps	16(%ecx, %esi), %xmm4
    161 	movaps	%xmm3, (%edx, %esi)
    162 	pcmpeqd	%xmm4, %xmm0
    163 	pmovmskb %xmm0, %eax
    164 	lea	16(%esi), %esi
    165 
    166 	test	%eax, %eax
    167 	jnz	L(CopyFrom1To16Bytes)
    168 
    169 	movaps	16(%ecx, %esi), %xmm1
    170 	movaps	%xmm4, (%edx, %esi)
    171 	pcmpeqd	%xmm1, %xmm0
    172 	pmovmskb %xmm0, %eax
    173 	lea	16(%esi), %esi
    174 
    175 	test	%eax, %eax
    176 	jnz	L(CopyFrom1To16Bytes)
    177 
    178 	movaps	16(%ecx, %esi), %xmm2
    179 	movaps	%xmm1, (%edx, %esi)
    180 	pcmpeqd	%xmm2, %xmm0
    181 	pmovmskb %xmm0, %eax
    182 	lea	16(%esi), %esi
    183 
    184 	test	%eax, %eax
    185 	jnz	L(CopyFrom1To16Bytes)
    186 
    187 	movaps	16(%ecx, %esi), %xmm3
    188 	movaps	%xmm2, (%edx, %esi)
    189 	pcmpeqd	%xmm3, %xmm0
    190 	pmovmskb %xmm0, %eax
    191 	lea	16(%esi), %esi
    192 
    193 	test	%eax, %eax
    194 	jnz	L(CopyFrom1To16Bytes)
    195 
    196 	movaps	%xmm3, (%edx, %esi)
    197 	mov	%ecx, %eax
    198 	lea	16(%ecx, %esi), %ecx
    199 	and	$-0x40, %ecx
    200 	sub	%ecx, %eax
    201 	sub	%eax, %edx
    202 
    203 	mov	$-0x40, %esi
    204 
    205 L(Aligned64Loop):
    206 	movaps	(%ecx), %xmm2
    207 	movaps	32(%ecx), %xmm3
    208 	movaps	%xmm2, %xmm4
    209 	movaps	16(%ecx), %xmm5
    210 	movaps	%xmm3, %xmm6
    211 	movaps	48(%ecx), %xmm7
    212 	pminub	%xmm5, %xmm2
    213 	pminub	%xmm7, %xmm3
    214 	pminub	%xmm2, %xmm3
    215 	lea	64(%edx), %edx
    216 	pcmpeqd	%xmm0, %xmm3
    217 	lea	64(%ecx), %ecx
    218 	pmovmskb %xmm3, %eax
    219 
    220 	test	%eax, %eax
    221 	jnz	L(Aligned64Leave)
    222 	movaps	%xmm4, -64(%edx)
    223 	movaps	%xmm5, -48(%edx)
    224 	movaps	%xmm6, -32(%edx)
    225 	movaps	%xmm7, -16(%edx)
    226 	jmp	L(Aligned64Loop)
    227 
    228 L(Aligned64Leave):
    229 	pcmpeqd	%xmm4, %xmm0
    230 	pmovmskb %xmm0, %eax
    231 	test	%eax, %eax
    232 	jnz	L(CopyFrom1To16Bytes)
    233 
    234 	pcmpeqd	%xmm5, %xmm0
    235 	pmovmskb %xmm0, %eax
    236 	movaps	%xmm4, -64(%edx)
    237 	lea	16(%esi), %esi
    238 	test	%eax, %eax
    239 	jnz	L(CopyFrom1To16Bytes)
    240 
    241 	pcmpeqd	%xmm6, %xmm0
    242 	pmovmskb %xmm0, %eax
    243 	movaps	%xmm5, -48(%edx)
    244 	lea	16(%esi), %esi
    245 	test	%eax, %eax
    246 	jnz	L(CopyFrom1To16Bytes)
    247 
    248 	movaps	%xmm6, -32(%edx)
    249 	pcmpeqd	%xmm7, %xmm0
    250 	pmovmskb %xmm0, %eax
    251 	lea	16(%esi), %esi
    252 	test	%eax, %eax
    253 	jnz	L(CopyFrom1To16Bytes)
    254 
    255 	mov	$-0x40, %esi
    256 	movaps	%xmm7, -16(%edx)
    257 	jmp	L(Aligned64Loop)
    258 
    259 	.p2align 4
    260 L(Shl4):
    261 	movaps	-4(%ecx), %xmm1
    262 	movaps	12(%ecx), %xmm2
    263 L(Shl4Start):
    264 	pcmpeqd	%xmm2, %xmm0
    265 	pmovmskb %xmm0, %eax
    266 	movaps	%xmm2, %xmm3
    267 
    268 	test	%eax, %eax
    269 	jnz	L(Shl4LoopExit)
    270 
    271 	palignr	$4, %xmm1, %xmm2
    272 	movaps	%xmm2, (%edx)
    273 	movaps	28(%ecx), %xmm2
    274 
    275 	pcmpeqd	%xmm2, %xmm0
    276 	lea	16(%edx), %edx
    277 	pmovmskb %xmm0, %eax
    278 	lea	16(%ecx), %ecx
    279 	movaps	%xmm2, %xmm1
    280 
    281 	test	%eax, %eax
    282 	jnz	L(Shl4LoopExit)
    283 
    284 	palignr	$4, %xmm3, %xmm2
    285 	movaps	%xmm2, (%edx)
    286 	movaps	28(%ecx), %xmm2
    287 
    288 	pcmpeqd	%xmm2, %xmm0
    289 	lea	16(%edx), %edx
    290 	pmovmskb %xmm0, %eax
    291 	lea	16(%ecx), %ecx
    292 	movaps	%xmm2, %xmm3
    293 
    294 	test	%eax, %eax
    295 	jnz	L(Shl4LoopExit)
    296 
    297 	palignr	$4, %xmm1, %xmm2
    298 	movaps	%xmm2, (%edx)
    299 	movaps	28(%ecx), %xmm2
    300 
    301 	pcmpeqd	%xmm2, %xmm0
    302 	lea	16(%edx), %edx
    303 	pmovmskb %xmm0, %eax
    304 	lea	16(%ecx), %ecx
    305 
    306 	test	%eax, %eax
    307 	jnz	L(Shl4LoopExit)
    308 
    309 	palignr	$4, %xmm3, %xmm2
    310 	movaps	%xmm2, (%edx)
    311 	lea	28(%ecx), %ecx
    312 	lea	16(%edx), %edx
    313 
    314 	mov	%ecx, %eax
    315 	and	$-0x40, %ecx
    316 	sub	%ecx, %eax
    317 	lea	-12(%ecx), %ecx
    318 	sub	%eax, %edx
    319 
    320 	movaps	-4(%ecx), %xmm1
    321 
    322 L(Shl4LoopStart):
    323 	movaps	12(%ecx), %xmm2
    324 	movaps	28(%ecx), %xmm3
    325 	movaps	%xmm3, %xmm6
    326 	movaps	44(%ecx), %xmm4
    327 	movaps	%xmm4, %xmm7
    328 	movaps	60(%ecx), %xmm5
    329 	pminub	%xmm2, %xmm6
    330 	pminub	%xmm5, %xmm7
    331 	pminub	%xmm6, %xmm7
    332 	pcmpeqd	%xmm0, %xmm7
    333 	pmovmskb %xmm7, %eax
    334 	movaps	%xmm5, %xmm7
    335 	palignr	$4, %xmm4, %xmm5
    336 	palignr	$4, %xmm3, %xmm4
    337 	test	%eax, %eax
    338 	jnz	L(Shl4Start)
    339 
    340 	palignr	$4, %xmm2, %xmm3
    341 	lea	64(%ecx), %ecx
    342 	palignr	$4, %xmm1, %xmm2
    343 	movaps	%xmm7, %xmm1
    344 	movaps	%xmm5, 48(%edx)
    345 	movaps	%xmm4, 32(%edx)
    346 	movaps	%xmm3, 16(%edx)
    347 	movaps	%xmm2, (%edx)
    348 	lea	64(%edx), %edx
    349 	jmp	L(Shl4LoopStart)
    350 
    351 L(Shl4LoopExit):
    352 	movlpd	(%ecx), %xmm0
    353 	movl	8(%ecx), %esi
    354 	movlpd	%xmm0, (%edx)
    355 	movl	%esi, 8(%edx)
    356 	POP	(%esi)
    357 	add	$12, %edx
    358 	add	$12, %ecx
    359 	test	%al, %al
    360 	jz	L(ExitHigh)
    361 	test	$0x01, %al
    362 	jnz	L(Exit4)
    363 	movlpd	(%ecx), %xmm0
    364 	movlpd	%xmm0, (%edx)
    365 	movl	%edi, %eax
    366 	RETURN
    367 
    368 	CFI_PUSH	(%esi)
    369 
    370 	.p2align 4
    371 L(Shl8):
    372 	movaps	-8(%ecx), %xmm1
    373 	movaps	8(%ecx), %xmm2
    374 L(Shl8Start):
    375 	pcmpeqd	%xmm2, %xmm0
    376 	pmovmskb %xmm0, %eax
    377 	movaps	%xmm2, %xmm3
    378 
    379 	test	%eax, %eax
    380 	jnz	L(Shl8LoopExit)
    381 
    382 	palignr	$8, %xmm1, %xmm2
    383 	movaps	%xmm2, (%edx)
    384 	movaps	24(%ecx), %xmm2
    385 
    386 	pcmpeqd	%xmm2, %xmm0
    387 	lea	16(%edx), %edx
    388 	pmovmskb %xmm0, %eax
    389 	lea	16(%ecx), %ecx
    390 	movaps	%xmm2, %xmm1
    391 
    392 	test	%eax, %eax
    393 	jnz	L(Shl8LoopExit)
    394 
    395 	palignr	$8, %xmm3, %xmm2
    396 	movaps	%xmm2, (%edx)
    397 	movaps	24(%ecx), %xmm2
    398 
    399 	pcmpeqd	%xmm2, %xmm0
    400 	lea	16(%edx), %edx
    401 	pmovmskb %xmm0, %eax
    402 	lea	16(%ecx), %ecx
    403 	movaps	%xmm2, %xmm3
    404 
    405 	test	%eax, %eax
    406 	jnz	L(Shl8LoopExit)
    407 
    408 	palignr	$8, %xmm1, %xmm2
    409 	movaps	%xmm2, (%edx)
    410 	movaps	24(%ecx), %xmm2
    411 
    412 	pcmpeqd	%xmm2, %xmm0
    413 	lea	16(%edx), %edx
    414 	pmovmskb %xmm0, %eax
    415 	lea	16(%ecx), %ecx
    416 
    417 	test	%eax, %eax
    418 	jnz	L(Shl8LoopExit)
    419 
    420 	palignr	$8, %xmm3, %xmm2
    421 	movaps	%xmm2, (%edx)
    422 	lea	24(%ecx), %ecx
    423 	lea	16(%edx), %edx
    424 
    425 	mov	%ecx, %eax
    426 	and	$-0x40, %ecx
    427 	sub	%ecx, %eax
    428 	lea	-8(%ecx), %ecx
    429 	sub	%eax, %edx
    430 
    431 	movaps	-8(%ecx), %xmm1
    432 
    433 L(Shl8LoopStart):
    434 	movaps	8(%ecx), %xmm2
    435 	movaps	24(%ecx), %xmm3
    436 	movaps	%xmm3, %xmm6
    437 	movaps	40(%ecx), %xmm4
    438 	movaps	%xmm4, %xmm7
    439 	movaps	56(%ecx), %xmm5
    440 	pminub	%xmm2, %xmm6
    441 	pminub	%xmm5, %xmm7
    442 	pminub	%xmm6, %xmm7
    443 	pcmpeqd	%xmm0, %xmm7
    444 	pmovmskb %xmm7, %eax
    445 	movaps	%xmm5, %xmm7
    446 	palignr	$8, %xmm4, %xmm5
    447 	palignr	$8, %xmm3, %xmm4
    448 	test	%eax, %eax
    449 	jnz	L(Shl8Start)
    450 
    451 	palignr	$8, %xmm2, %xmm3
    452 	lea	64(%ecx), %ecx
    453 	palignr	$8, %xmm1, %xmm2
    454 	movaps	%xmm7, %xmm1
    455 	movaps	%xmm5, 48(%edx)
    456 	movaps	%xmm4, 32(%edx)
    457 	movaps	%xmm3, 16(%edx)
    458 	movaps	%xmm2, (%edx)
    459 	lea	64(%edx), %edx
    460 	jmp	L(Shl8LoopStart)
    461 
    462 L(Shl8LoopExit):
    463 	movlpd	(%ecx), %xmm0
    464 	movlpd	%xmm0, (%edx)
    465 	POP	(%esi)
    466 	add	$8, %edx
    467 	add	$8, %ecx
    468 	test	%al, %al
    469 	jz	L(ExitHigh)
    470 	test	$0x01, %al
    471 	jnz	L(Exit4)
    472 	movlpd	(%ecx), %xmm0
    473 	movlpd	%xmm0, (%edx)
    474 	movl	%edi, %eax
    475 	RETURN
    476 
    477 	CFI_PUSH	(%esi)
    478 
    479 	.p2align 4
    480 L(Shl12):
    481 	movaps	-12(%ecx), %xmm1
    482 	movaps	4(%ecx), %xmm2
    483 L(Shl12Start):
    484 	pcmpeqd	%xmm2, %xmm0
    485 	pmovmskb %xmm0, %eax
    486 	movaps	%xmm2, %xmm3
    487 
    488 	test	%eax, %eax
    489 	jnz	L(Shl12LoopExit)
    490 
    491 	palignr	$12, %xmm1, %xmm2
    492 	movaps	%xmm2, (%edx)
    493 	movaps	20(%ecx), %xmm2
    494 
    495 	pcmpeqd	%xmm2, %xmm0
    496 	lea	16(%edx), %edx
    497 	pmovmskb %xmm0, %eax
    498 	lea	16(%ecx), %ecx
    499 	movaps	%xmm2, %xmm1
    500 
    501 	test	%eax, %eax
    502 	jnz	L(Shl12LoopExit)
    503 
    504 	palignr	$12, %xmm3, %xmm2
    505 	movaps	%xmm2, (%edx)
    506 	movaps	20(%ecx), %xmm2
    507 
    508 	pcmpeqd	%xmm2, %xmm0
    509 	lea	16(%edx), %edx
    510 	pmovmskb %xmm0, %eax
    511 	lea	16(%ecx), %ecx
    512 	movaps	%xmm2, %xmm3
    513 
    514 	test	%eax, %eax
    515 	jnz	L(Shl12LoopExit)
    516 
    517 	palignr	$12, %xmm1, %xmm2
    518 	movaps	%xmm2, (%edx)
    519 	movaps	20(%ecx), %xmm2
    520 
    521 	pcmpeqd	%xmm2, %xmm0
    522 	lea	16(%edx), %edx
    523 	pmovmskb %xmm0, %eax
    524 	lea	16(%ecx), %ecx
    525 
    526 	test	%eax, %eax
    527 	jnz	L(Shl12LoopExit)
    528 
    529 	palignr	$12, %xmm3, %xmm2
    530 	movaps	%xmm2, (%edx)
    531 	lea	20(%ecx), %ecx
    532 	lea	16(%edx), %edx
    533 
    534 	mov	%ecx, %eax
    535 	and	$-0x40, %ecx
    536 	sub	%ecx, %eax
    537 	lea	-4(%ecx), %ecx
    538 	sub	%eax, %edx
    539 
    540 	movaps	-12(%ecx), %xmm1
    541 
    542 L(Shl12LoopStart):
    543 	movaps	4(%ecx), %xmm2
    544 	movaps	20(%ecx), %xmm3
    545 	movaps	%xmm3, %xmm6
    546 	movaps	36(%ecx), %xmm4
    547 	movaps	%xmm4, %xmm7
    548 	movaps	52(%ecx), %xmm5
    549 	pminub	%xmm2, %xmm6
    550 	pminub	%xmm5, %xmm7
    551 	pminub	%xmm6, %xmm7
    552 	pcmpeqd	%xmm0, %xmm7
    553 	pmovmskb %xmm7, %eax
    554 	movaps	%xmm5, %xmm7
    555 	palignr	$12, %xmm4, %xmm5
    556 	palignr	$12, %xmm3, %xmm4
    557 	test	%eax, %eax
    558 	jnz	L(Shl12Start)
    559 
    560 	palignr	$12, %xmm2, %xmm3
    561 	lea	64(%ecx), %ecx
    562 	palignr	$12, %xmm1, %xmm2
    563 	movaps	%xmm7, %xmm1
    564 	movaps	%xmm5, 48(%edx)
    565 	movaps	%xmm4, 32(%edx)
    566 	movaps	%xmm3, 16(%edx)
    567 	movaps	%xmm2, (%edx)
    568 	lea	64(%edx), %edx
    569 	jmp	L(Shl12LoopStart)
    570 
    571 L(Shl12LoopExit):
    572 	movl	(%ecx), %esi
    573 	movl	%esi, (%edx)
    574 	mov	$4, %esi
    575 
    576 	.p2align 4
    577 L(CopyFrom1To16Bytes):
    578 	add	%esi, %edx
    579 	add	%esi, %ecx
    580 
    581 	POP	(%esi)
    582 	test	%al, %al
    583 	jz	L(ExitHigh)
    584 	test	$0x01, %al
    585 	jnz	L(Exit4)
    586 L(Exit8):
    587 	movlpd	(%ecx), %xmm0
    588 	movlpd	%xmm0, (%edx)
    589 	movl	%edi, %eax
    590 	RETURN
    591 
    592 	.p2align 4
    593 L(ExitHigh):
    594 	test	$0x01, %ah
    595 	jnz	L(Exit12)
    596 L(Exit16):
    597 	movdqu	(%ecx), %xmm0
    598 	movdqu	%xmm0, (%edx)
    599 	movl	%edi, %eax
    600 	RETURN
    601 
    602 	.p2align 4
    603 L(Exit4):
    604 	movl	(%ecx), %eax
    605 	movl	%eax, (%edx)
    606 	movl	%edi, %eax
    607 	RETURN
    608 
    609 	.p2align 4
    610 L(Exit12):
    611 	movlpd	(%ecx), %xmm0
    612 	movlpd	%xmm0, (%edx)
    613 	movl	8(%ecx), %eax
    614 	movl	%eax, 8(%edx)
    615 	movl	%edi, %eax
    616 	RETURN
    617 
    618 CFI_POP	(%edi)
    619 
    620 	.p2align 4
    621 L(ExitTail4):
    622 	movl	(%ecx), %eax
    623 	movl	%eax, (%edx)
    624 	movl	%edx, %eax
    625 	ret
    626 
    627 	.p2align 4
    628 L(ExitTail8):
    629 	movlpd	(%ecx), %xmm0
    630 	movlpd	%xmm0, (%edx)
    631 	movl	%edx, %eax
    632 	ret
    633 
    634 	.p2align 4
    635 L(ExitTail12):
    636 	movlpd	(%ecx), %xmm0
    637 	movlpd	%xmm0, (%edx)
    638 	movl	8(%ecx), %eax
    639 	movl	%eax, 8(%edx)
    640 	movl	%edx, %eax
    641 	ret
    642 
    643 	.p2align 4
    644 L(ExitTail16):
    645 	movdqu	(%ecx), %xmm0
    646 	movdqu	%xmm0, (%edx)
    647 	movl	%edx, %eax
    648 	ret
    649 
    650 #ifndef USE_AS_WCSCAT
    651 END (wcscpy)
    652 #endif
    653