Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2011, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc			.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc			.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)		.cfi_restore reg
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef cfi_remember_state
     56 # define cfi_remember_state		.cfi_remember_state
     57 #endif
     58 
     59 #ifndef cfi_restore_state
     60 # define cfi_restore_state		.cfi_restore_state
     61 #endif
     62 
     63 #ifndef ENTRY
     64 # define ENTRY(name)			\
     65 	.type name,  @function; 	\
     66 	.globl name;			\
     67 	.p2align 4;			\
     68 name:					\
     69 	cfi_startproc
     70 #endif
     71 
     72 #ifndef END
     73 # define END(name)			\
     74 	cfi_endproc;			\
     75 	.size name, .-name
     76 #endif
     77 
     78 #define CFI_PUSH(REG)			\
     79   cfi_adjust_cfa_offset (4);		\
     80   cfi_rel_offset (REG, 0)
     81 
     82 #define CFI_POP(REG)			\
     83   cfi_adjust_cfa_offset (-4);		\
     84   cfi_restore (REG)
     85 
     86 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     87 #define POP(REG)	popl REG; CFI_POP (REG)
     88 
     89 #ifndef STRCAT
     90 # define STRCAT	strcat
     91 #endif
     92 
     93 #define PARMS	4
     94 #define STR1	PARMS+4
     95 #define STR2	STR1+4
     96 
     97 #ifdef USE_AS_STRNCAT
     98 # define LEN	STR2+8
     99 #endif
    100 
    101 #define USE_AS_STRCAT
    102 
    103 	.section .text.ssse3,"ax",@progbits
    104 ENTRY (STRCAT)
    105 	PUSH	(%edi)
    106 	mov	STR1(%esp), %edi
    107 	mov	%edi, %edx
    108 
    109 #define RETURN	jmp	L(StrcpyAtom)
    110 #include "sse2-strlen-atom.S"
    111 
    112 L(StrcpyAtom):
    113 	mov	STR2(%esp), %ecx
    114 	lea	(%edi, %eax), %edx
    115 #ifdef USE_AS_STRNCAT
    116 	PUSH	(%ebx)
    117 	mov	LEN(%esp), %ebx
    118 	test	%ebx, %ebx
    119 	jz	L(StrncatExit0)
    120 	cmp	$8, %ebx
    121 	jbe	L(StrncpyExit8Bytes)
    122 #endif
    123 	cmpb	$0, (%ecx)
    124 	jz	L(Exit1)
    125 	cmpb	$0, 1(%ecx)
    126 	jz	L(Exit2)
    127 	cmpb	$0, 2(%ecx)
    128 	jz	L(Exit3)
    129 	cmpb	$0, 3(%ecx)
    130 	jz	L(Exit4)
    131 	cmpb	$0, 4(%ecx)
    132 	jz	L(Exit5)
    133 	cmpb	$0, 5(%ecx)
    134 	jz	L(Exit6)
    135 	cmpb	$0, 6(%ecx)
    136 	jz	L(Exit7)
    137 	cmpb	$0, 7(%ecx)
    138 	jz	L(Exit8)
    139 	cmpb	$0, 8(%ecx)
    140 	jz	L(Exit9)
    141 #ifdef USE_AS_STRNCAT
    142 	cmp	$16, %ebx
    143 	jb	L(StrncpyExit15Bytes)
    144 #endif
    145 	cmpb	$0, 9(%ecx)
    146 	jz	L(Exit10)
    147 	cmpb	$0, 10(%ecx)
    148 	jz	L(Exit11)
    149 	cmpb	$0, 11(%ecx)
    150 	jz	L(Exit12)
    151 	cmpb	$0, 12(%ecx)
    152 	jz	L(Exit13)
    153 	cmpb	$0, 13(%ecx)
    154 	jz	L(Exit14)
    155 	cmpb	$0, 14(%ecx)
    156 	jz	L(Exit15)
    157 	cmpb	$0, 15(%ecx)
    158 	jz	L(Exit16)
    159 #ifdef USE_AS_STRNCAT
    160 	cmp	$16, %ebx
    161 	je	L(StrncatExit16)
    162 
    163 # define RETURN1	POP (%ebx); POP (%edi);	ret; \
    164 	CFI_PUSH (%ebx); CFI_PUSH (%edi)
    165 # define USE_AS_STRNCPY
    166 #else
    167 # define RETURN1	POP(%edi); ret; CFI_PUSH(%edi)
    168 #endif
    169 #include "ssse3-strcpy-atom.S"
    170 
    171 	.p2align 4
    172 L(CopyFrom1To16Bytes):
    173 	add	%esi, %edx
    174 	add	%esi, %ecx
    175 
    176 	POP	(%esi)
    177 	test	%al, %al
    178 	jz	L(ExitHigh)
    179 	test	$0x01, %al
    180 	jnz	L(Exit1)
    181 	test	$0x02, %al
    182 	jnz	L(Exit2)
    183 	test	$0x04, %al
    184 	jnz	L(Exit3)
    185 	test	$0x08, %al
    186 	jnz	L(Exit4)
    187 	test	$0x10, %al
    188 	jnz	L(Exit5)
    189 	test	$0x20, %al
    190 	jnz	L(Exit6)
    191 	test	$0x40, %al
    192 	jnz	L(Exit7)
    193 	movlpd	(%ecx), %xmm0
    194 	movlpd	%xmm0, (%edx)
    195 	movl	%edi, %eax
    196 	RETURN1
    197 
    198 	.p2align 4
    199 L(ExitHigh):
    200 	test	$0x01, %ah
    201 	jnz	L(Exit9)
    202 	test	$0x02, %ah
    203 	jnz	L(Exit10)
    204 	test	$0x04, %ah
    205 	jnz	L(Exit11)
    206 	test	$0x08, %ah
    207 	jnz	L(Exit12)
    208 	test	$0x10, %ah
    209 	jnz	L(Exit13)
    210 	test	$0x20, %ah
    211 	jnz	L(Exit14)
    212 	test	$0x40, %ah
    213 	jnz	L(Exit15)
    214 	movlpd	(%ecx), %xmm0
    215 	movlpd	8(%ecx), %xmm1
    216 	movlpd	%xmm0, (%edx)
    217 	movlpd	%xmm1, 8(%edx)
    218 	movl	%edi, %eax
    219 	RETURN1
    220 
    221 	.p2align 4
    222 L(StrncatExit1):
    223 	movb	%bh, 1(%edx)
    224 L(Exit1):
    225 	movb	(%ecx), %al
    226 	movb	%al, (%edx)
    227 	movl	%edi, %eax
    228 	RETURN1
    229 
    230 	.p2align 4
    231 L(StrncatExit2):
    232 	movb	%bh, 2(%edx)
    233 L(Exit2):
    234 	movw	(%ecx), %ax
    235 	movw	%ax, (%edx)
    236 	movl	%edi, %eax
    237 	RETURN1
    238 
    239 	.p2align 4
    240 L(StrncatExit3):
    241 	movb	%bh, 3(%edx)
    242 L(Exit3):
    243 	movw	(%ecx), %ax
    244 	movw	%ax, (%edx)
    245 	movb	2(%ecx), %al
    246 	movb	%al, 2(%edx)
    247 	movl	%edi, %eax
    248 	RETURN1
    249 
    250 	.p2align 4
    251 L(StrncatExit4):
    252 	movb	%bh, 4(%edx)
    253 L(Exit4):
    254 	movl	(%ecx), %eax
    255 	movl	%eax, (%edx)
    256 	movl	%edi, %eax
    257 	RETURN1
    258 
    259 	.p2align 4
    260 L(StrncatExit5):
    261 	movb	%bh, 5(%edx)
    262 L(Exit5):
    263 	movl	(%ecx), %eax
    264 	movl	%eax, (%edx)
    265 	movb	4(%ecx), %al
    266 	movb	%al, 4(%edx)
    267 	movl	%edi, %eax
    268 	RETURN1
    269 
    270 	.p2align 4
    271 L(StrncatExit6):
    272 	movb	%bh, 6(%edx)
    273 L(Exit6):
    274 	movl	(%ecx), %eax
    275 	movl	%eax, (%edx)
    276 	movw	4(%ecx), %ax
    277 	movw	%ax, 4(%edx)
    278 	movl	%edi, %eax
    279 	RETURN1
    280 
    281 	.p2align 4
    282 L(StrncatExit7):
    283 	movb	%bh, 7(%edx)
    284 L(Exit7):
    285 	movl	(%ecx), %eax
    286 	movl	%eax, (%edx)
    287 	movl	3(%ecx), %eax
    288 	movl	%eax, 3(%edx)
    289 	movl	%edi, %eax
    290 	RETURN1
    291 
    292 	.p2align 4
    293 L(StrncatExit8):
    294 	movb	%bh, 8(%edx)
    295 L(Exit8):
    296 	movlpd	(%ecx), %xmm0
    297 	movlpd	%xmm0, (%edx)
    298 	movl	%edi, %eax
    299 	RETURN1
    300 
    301 	.p2align 4
    302 L(StrncatExit9):
    303 	movb	%bh, 9(%edx)
    304 L(Exit9):
    305 	movlpd	(%ecx), %xmm0
    306 	movlpd	%xmm0, (%edx)
    307 	movb	8(%ecx), %al
    308 	movb	%al, 8(%edx)
    309 	movl	%edi, %eax
    310 	RETURN1
    311 
    312 	.p2align 4
    313 L(StrncatExit10):
    314 	movb	%bh, 10(%edx)
    315 L(Exit10):
    316 	movlpd	(%ecx), %xmm0
    317 	movlpd	%xmm0, (%edx)
    318 	movw	8(%ecx), %ax
    319 	movw	%ax, 8(%edx)
    320 	movl	%edi, %eax
    321 	RETURN1
    322 
    323 	.p2align 4
    324 L(StrncatExit11):
    325 	movb	%bh, 11(%edx)
    326 L(Exit11):
    327 	movlpd	(%ecx), %xmm0
    328 	movlpd	%xmm0, (%edx)
    329 	movl	7(%ecx), %eax
    330 	movl	%eax, 7(%edx)
    331 	movl	%edi, %eax
    332 	RETURN1
    333 
    334 	.p2align 4
    335 L(StrncatExit12):
    336 	movb	%bh, 12(%edx)
    337 L(Exit12):
    338 	movlpd	(%ecx), %xmm0
    339 	movlpd	%xmm0, (%edx)
    340 	movl	8(%ecx), %eax
    341 	movl	%eax, 8(%edx)
    342 	movl	%edi, %eax
    343 	RETURN1
    344 
    345 	.p2align 4
    346 L(StrncatExit13):
    347 	movb	%bh, 13(%edx)
    348 L(Exit13):
    349 	movlpd	(%ecx), %xmm0
    350 	movlpd	%xmm0, (%edx)
    351 	movlpd	5(%ecx), %xmm0
    352 	movlpd	%xmm0, 5(%edx)
    353 	movl	%edi, %eax
    354 	RETURN1
    355 
    356 	.p2align 4
    357 L(StrncatExit14):
    358 	movb	%bh, 14(%edx)
    359 L(Exit14):
    360 	movlpd	(%ecx), %xmm0
    361 	movlpd	%xmm0, (%edx)
    362 	movlpd	6(%ecx), %xmm0
    363 	movlpd	%xmm0, 6(%edx)
    364 	movl	%edi, %eax
    365 	RETURN1
    366 
    367 	.p2align 4
    368 L(StrncatExit15):
    369 	movb	%bh, 15(%edx)
    370 L(Exit15):
    371 	movlpd	(%ecx), %xmm0
    372 	movlpd	%xmm0, (%edx)
    373 	movlpd	7(%ecx), %xmm0
    374 	movlpd	%xmm0, 7(%edx)
    375 	movl	%edi, %eax
    376 	RETURN1
    377 
    378 	.p2align 4
    379 L(StrncatExit16):
    380 	movb	%bh, 16(%edx)
    381 L(Exit16):
    382 	movlpd	(%ecx), %xmm0
    383 	movlpd	8(%ecx), %xmm1
    384 	movlpd	%xmm0, (%edx)
    385 	movlpd	%xmm1, 8(%edx)
    386 	movl	%edi, %eax
    387 	RETURN1
    388 
    389 #ifdef USE_AS_STRNCPY
    390 
    391 	CFI_PUSH(%esi)
    392 
    393 	.p2align 4
    394 L(CopyFrom1To16BytesCase2):
    395 	add	$16, %ebx
    396 	add	%esi, %ecx
    397 	lea	(%esi, %edx), %esi
    398 	lea	-9(%ebx), %edx
    399 	and	$1<<7, %dh
    400 	or	%al, %dh
    401 	lea	(%esi), %edx
    402 	POP	(%esi)
    403 	jz	L(ExitHighCase2)
    404 
    405 	test	$0x01, %al
    406 	jnz	L(Exit1)
    407 	cmp	$1, %ebx
    408 	je	L(StrncatExit1)
    409 	test	$0x02, %al
    410 	jnz	L(Exit2)
    411 	cmp	$2, %ebx
    412 	je	L(StrncatExit2)
    413 	test	$0x04, %al
    414 	jnz	L(Exit3)
    415 	cmp	$3, %ebx
    416 	je	L(StrncatExit3)
    417 	test	$0x08, %al
    418 	jnz	L(Exit4)
    419 	cmp	$4, %ebx
    420 	je	L(StrncatExit4)
    421 	test	$0x10, %al
    422 	jnz	L(Exit5)
    423 	cmp	$5, %ebx
    424 	je	L(StrncatExit5)
    425 	test	$0x20, %al
    426 	jnz	L(Exit6)
    427 	cmp	$6, %ebx
    428 	je	L(StrncatExit6)
    429 	test	$0x40, %al
    430 	jnz	L(Exit7)
    431 	cmp	$7, %ebx
    432 	je	L(StrncatExit7)
    433 	movlpd	(%ecx), %xmm0
    434 	movlpd	%xmm0, (%edx)
    435 	lea	7(%edx), %eax
    436 	cmpb	$1, (%eax)
    437 	sbb	$-1, %eax
    438 	xor	%cl, %cl
    439 	movb	%cl, (%eax)
    440 	movl	%edi, %eax
    441 	RETURN1
    442 
    443 	.p2align 4
    444 L(ExitHighCase2):
    445 	test	$0x01, %ah
    446 	jnz	L(Exit9)
    447 	cmp	$9, %ebx
    448 	je	L(StrncatExit9)
    449 	test	$0x02, %ah
    450 	jnz	L(Exit10)
    451 	cmp	$10, %ebx
    452 	je	L(StrncatExit10)
    453 	test	$0x04, %ah
    454 	jnz	L(Exit11)
    455 	cmp	$11, %ebx
    456 	je	L(StrncatExit11)
    457 	test	$0x8, %ah
    458 	jnz	L(Exit12)
    459 	cmp	$12, %ebx
    460 	je	L(StrncatExit12)
    461 	test	$0x10, %ah
    462 	jnz	L(Exit13)
    463 	cmp	$13, %ebx
    464 	je	L(StrncatExit13)
    465 	test	$0x20, %ah
    466 	jnz	L(Exit14)
    467 	cmp	$14, %ebx
    468 	je	L(StrncatExit14)
    469 	test	$0x40, %ah
    470 	jnz	L(Exit15)
    471 	cmp	$15, %ebx
    472 	je	L(StrncatExit15)
    473 	movlpd	(%ecx), %xmm0
    474 	movlpd	%xmm0, (%edx)
    475 	movlpd	8(%ecx), %xmm1
    476 	movlpd	%xmm1, 8(%edx)
    477 	movl	%edi, %eax
    478 	RETURN1
    479 
    480 	CFI_PUSH(%esi)
    481 
    482 L(CopyFrom1To16BytesCase2OrCase3):
    483 	test	%eax, %eax
    484 	jnz	L(CopyFrom1To16BytesCase2)
    485 
    486 	.p2align 4
    487 L(CopyFrom1To16BytesCase3):
    488 	add	$16, %ebx
    489 	add	%esi, %edx
    490 	add	%esi, %ecx
    491 
    492 	POP	(%esi)
    493 
    494 	cmp	$8, %ebx
    495 	ja	L(ExitHighCase3)
    496 	cmp	$1, %ebx
    497 	je	L(StrncatExit1)
    498 	cmp	$2, %ebx
    499 	je	L(StrncatExit2)
    500 	cmp	$3, %ebx
    501 	je	L(StrncatExit3)
    502 	cmp	$4, %ebx
    503 	je	L(StrncatExit4)
    504 	cmp	$5, %ebx
    505 	je	L(StrncatExit5)
    506 	cmp	$6, %ebx
    507 	je	L(StrncatExit6)
    508 	cmp	$7, %ebx
    509 	je	L(StrncatExit7)
    510 	movlpd	(%ecx), %xmm0
    511 	movlpd	%xmm0, (%edx)
    512 	movb	%bh, 8(%edx)
    513 	movl	%edi, %eax
    514 	RETURN1
    515 
    516 	.p2align 4
    517 L(ExitHighCase3):
    518 	cmp	$9, %ebx
    519 	je	L(StrncatExit9)
    520 	cmp	$10, %ebx
    521 	je	L(StrncatExit10)
    522 	cmp	$11, %ebx
    523 	je	L(StrncatExit11)
    524 	cmp	$12, %ebx
    525 	je	L(StrncatExit12)
    526 	cmp	$13, %ebx
    527 	je	L(StrncatExit13)
    528 	cmp	$14, %ebx
    529 	je	L(StrncatExit14)
    530 	cmp	$15, %ebx
    531 	je	L(StrncatExit15)
    532 	movlpd	(%ecx), %xmm0
    533 	movlpd	%xmm0, (%edx)
    534 	movlpd	8(%ecx), %xmm1
    535 	movlpd	%xmm1, 8(%edx)
    536 	movb	%bh, 16(%edx)
    537 	movl	%edi, %eax
    538 	RETURN1
    539 
    540 	.p2align 4
    541 L(StrncatExit0):
    542 	movl	%edi, %eax
    543 	RETURN1
    544 
    545 	.p2align 4
    546 L(StrncpyExit15Bytes):
    547 	cmp	$9, %ebx
    548 	je	L(StrncatExit9)
    549 	cmpb	$0, 9(%ecx)
    550 	jz	L(Exit10)
    551 	cmp	$10, %ebx
    552 	je	L(StrncatExit10)
    553 	cmpb	$0, 10(%ecx)
    554 	jz	L(Exit11)
    555 	cmp	$11, %ebx
    556 	je	L(StrncatExit11)
    557 	cmpb	$0, 11(%ecx)
    558 	jz	L(Exit12)
    559 	cmp	$12, %ebx
    560 	je	L(StrncatExit12)
    561 	cmpb	$0, 12(%ecx)
    562 	jz	L(Exit13)
    563 	cmp	$13, %ebx
    564 	je	L(StrncatExit13)
    565 	cmpb	$0, 13(%ecx)
    566 	jz	L(Exit14)
    567 	cmp	$14, %ebx
    568 	je	L(StrncatExit14)
    569 	movlpd	(%ecx), %xmm0
    570 	movlpd	%xmm0, (%edx)
    571 	movlpd	7(%ecx), %xmm0
    572 	movlpd	%xmm0, 7(%edx)
    573 	lea	14(%edx), %eax
    574 	cmpb	$1, (%eax)
    575 	sbb	$-1, %eax
    576 	movb	%bh, (%eax)
    577 	movl	%edi, %eax
    578 	RETURN1
    579 
    580 	.p2align 4
    581 L(StrncpyExit8Bytes):
    582 	cmpb	$0, (%ecx)
    583 	jz	L(Exit1)
    584 	cmp	$1, %ebx
    585 	je	L(StrncatExit1)
    586 	cmpb	$0, 1(%ecx)
    587 	jz	L(Exit2)
    588 	cmp	$2, %ebx
    589 	je	L(StrncatExit2)
    590 	cmpb	$0, 2(%ecx)
    591 	jz	L(Exit3)
    592 	cmp	$3, %ebx
    593 	je	L(StrncatExit3)
    594 	cmpb	$0, 3(%ecx)
    595 	jz	L(Exit4)
    596 	cmp	$4, %ebx
    597 	je	L(StrncatExit4)
    598 	cmpb	$0, 4(%ecx)
    599 	jz	L(Exit5)
    600 	cmp	$5, %ebx
    601 	je	L(StrncatExit5)
    602 	cmpb	$0, 5(%ecx)
    603 	jz	L(Exit6)
    604 	cmp	$6, %ebx
    605 	je	L(StrncatExit6)
    606 	cmpb	$0, 6(%ecx)
    607 	jz	L(Exit7)
    608 	cmp	$7, %ebx
    609 	je	L(StrncatExit7)
    610 	movlpd	(%ecx), %xmm0
    611 	movlpd	%xmm0, (%edx)
    612 	lea	7(%edx), %eax
    613 	cmpb	$1, (%eax)
    614 	sbb	$-1, %eax
    615 	movb	%bh, (%eax)
    616 	movl	%edi, %eax
    617 	RETURN1
    618 
    619 #endif
    620 END (STRCAT)
    621