Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2014, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef USE_AS_STRCAT
     32 
     33 #ifndef STRLEN
     34 # define STRLEN		strlen
     35 #endif
     36 
     37 #ifndef L
     38 # define L(label)	.L##label
     39 #endif
     40 
     41 #ifndef cfi_startproc
     42 # define cfi_startproc			.cfi_startproc
     43 #endif
     44 
     45 #ifndef cfi_endproc
     46 # define cfi_endproc			.cfi_endproc
     47 #endif
     48 
     49 #ifndef ENTRY
     50 # define ENTRY(name)			\
     51 	.type name,  @function; 	\
     52 	.globl name;			\
     53 	.p2align 4;			\
     54 name:					\
     55 	cfi_startproc
     56 #endif
     57 
     58 #ifndef END
     59 # define END(name)			\
     60 	cfi_endproc;			\
     61 	.size name, .-name
     62 #endif
     63 #define RETURN ret
     64 	.section .text.sse2,"ax",@progbits
     65 ENTRY (STRLEN)
     66 /* end ifndef USE_AS_STRCAT */
     67 #endif
     68 	xor	%rax, %rax
     69 	mov	%edi, %ecx
     70 	and	$0x3f, %ecx
     71 	pxor	%xmm0, %xmm0
     72 	cmp	$0x30, %ecx
     73 	ja	L(next)
     74 	movdqu	(%rdi), %xmm1
     75 	pcmpeqb	%xmm1, %xmm0
     76 	pmovmskb %xmm0, %edx
     77 	test	%edx, %edx
     78 	jnz	L(exit_less16)
     79 	mov	%rdi, %rax
     80 	and	$-16, %rax
     81 	jmp	L(align16_start)
     82 L(next):
     83 	mov	%rdi, %rax
     84 	and	$-16, %rax
     85 	pcmpeqb	(%rax), %xmm0
     86 	mov	$-1, %r10d
     87 	sub	%rax, %rcx
     88 	shl	%cl, %r10d
     89 	pmovmskb %xmm0, %edx
     90 	and	%r10d, %edx
     91 	jnz	L(exit)
     92 L(align16_start):
     93 	pxor	%xmm0, %xmm0
     94 	pxor	%xmm1, %xmm1
     95 	pxor	%xmm2, %xmm2
     96 	pxor	%xmm3, %xmm3
     97 	pcmpeqb	16(%rax), %xmm0
     98 	pmovmskb %xmm0, %edx
     99 	test	%edx, %edx
    100 	jnz	L(exit16)
    101 
    102 	pcmpeqb	32(%rax), %xmm1
    103 	pmovmskb %xmm1, %edx
    104 	test	%edx, %edx
    105 	jnz	L(exit32)
    106 
    107 	pcmpeqb	48(%rax), %xmm2
    108 	pmovmskb %xmm2, %edx
    109 	test	%edx, %edx
    110 	jnz	L(exit48)
    111 
    112 	pcmpeqb	64(%rax), %xmm3
    113 	pmovmskb %xmm3, %edx
    114 	test	%edx, %edx
    115 	jnz	L(exit64)
    116 
    117 	pcmpeqb	80(%rax), %xmm0
    118 	add	$64, %rax
    119 	pmovmskb %xmm0, %edx
    120 	test	%edx, %edx
    121 	jnz	L(exit16)
    122 
    123 	pcmpeqb	32(%rax), %xmm1
    124 	pmovmskb %xmm1, %edx
    125 	test	%edx, %edx
    126 	jnz	L(exit32)
    127 
    128 	pcmpeqb	48(%rax), %xmm2
    129 	pmovmskb %xmm2, %edx
    130 	test	%edx, %edx
    131 	jnz	L(exit48)
    132 
    133 	pcmpeqb	64(%rax), %xmm3
    134 	pmovmskb %xmm3, %edx
    135 	test	%edx, %edx
    136 	jnz	L(exit64)
    137 
    138 	pcmpeqb	80(%rax), %xmm0
    139 	add	$64, %rax
    140 	pmovmskb %xmm0, %edx
    141 	test	%edx, %edx
    142 	jnz	L(exit16)
    143 
    144 	pcmpeqb	32(%rax), %xmm1
    145 	pmovmskb %xmm1, %edx
    146 	test	%edx, %edx
    147 	jnz	L(exit32)
    148 
    149 	pcmpeqb	48(%rax), %xmm2
    150 	pmovmskb %xmm2, %edx
    151 	test	%edx, %edx
    152 	jnz	L(exit48)
    153 
    154 	pcmpeqb	64(%rax), %xmm3
    155 	pmovmskb %xmm3, %edx
    156 	test	%edx, %edx
    157 	jnz	L(exit64)
    158 
    159 	pcmpeqb	80(%rax), %xmm0
    160 	add	$64, %rax
    161 	pmovmskb %xmm0, %edx
    162 	test	%edx, %edx
    163 	jnz	L(exit16)
    164 
    165 	pcmpeqb	32(%rax), %xmm1
    166 	pmovmskb %xmm1, %edx
    167 	test	%edx, %edx
    168 	jnz	L(exit32)
    169 
    170 	pcmpeqb	48(%rax), %xmm2
    171 	pmovmskb %xmm2, %edx
    172 	test	%edx, %edx
    173 	jnz	L(exit48)
    174 
    175 	pcmpeqb	64(%rax), %xmm3
    176 	pmovmskb %xmm3, %edx
    177 	test	%edx, %edx
    178 	jnz	L(exit64)
    179 
    180 
    181 	test	$0x3f, %rax
    182 	jz	L(align64_loop)
    183 
    184 	pcmpeqb	80(%rax), %xmm0
    185 	add	$80, %rax
    186 	pmovmskb %xmm0, %edx
    187 	test	%edx, %edx
    188 	jnz	L(exit)
    189 
    190 	test	$0x3f, %rax
    191 	jz	L(align64_loop)
    192 
    193 	pcmpeqb	16(%rax), %xmm1
    194 	add	$16, %rax
    195 	pmovmskb %xmm1, %edx
    196 	test	%edx, %edx
    197 	jnz	L(exit)
    198 
    199 	test	$0x3f, %rax
    200 	jz	L(align64_loop)
    201 
    202 	pcmpeqb	16(%rax), %xmm2
    203 	add	$16, %rax
    204 	pmovmskb %xmm2, %edx
    205 	test	%edx, %edx
    206 	jnz	L(exit)
    207 
    208 	test	$0x3f, %rax
    209 	jz	L(align64_loop)
    210 
    211 	pcmpeqb	16(%rax), %xmm3
    212 	add	$16, %rax
    213 	pmovmskb %xmm3, %edx
    214 	test	%edx, %edx
    215 	jnz	L(exit)
    216 
    217 	add	$16, %rax
    218 	.p2align 4
    219 	L(align64_loop):
    220 	movaps	(%rax),	%xmm4
    221 	pminub	16(%rax), 	%xmm4
    222 	movaps	32(%rax), 	%xmm5
    223 	pminub	48(%rax), 	%xmm5
    224 	add	$64, 	%rax
    225 	pminub	%xmm4,	%xmm5
    226 	pcmpeqb	%xmm0,	%xmm5
    227 	pmovmskb %xmm5,	%edx
    228 	test	%edx,	%edx
    229 	jz	L(align64_loop)
    230 
    231 
    232 	pcmpeqb	-64(%rax), %xmm0
    233 	sub	$80, 	%rax
    234 	pmovmskb %xmm0, %edx
    235 	test	%edx, %edx
    236 	jnz	L(exit16)
    237 
    238 	pcmpeqb	32(%rax), %xmm1
    239 	pmovmskb %xmm1, %edx
    240 	test	%edx, %edx
    241 	jnz	L(exit32)
    242 
    243 	pcmpeqb	48(%rax), %xmm2
    244 	pmovmskb %xmm2, %edx
    245 	test	%edx, %edx
    246 	jnz	L(exit48)
    247 
    248 	pcmpeqb	64(%rax), %xmm3
    249 	pmovmskb %xmm3, %edx
    250 	sub	%rdi, %rax
    251 	bsf	%rdx, %rdx
    252 	add	%rdx, %rax
    253 	add	$64, %rax
    254 	RETURN
    255 
    256 	.p2align 4
    257 L(exit):
    258 	sub	%rdi, %rax
    259 L(exit_less16):
    260 	bsf	%rdx, %rdx
    261 	add	%rdx, %rax
    262 	RETURN
    263 	.p2align 4
    264 L(exit16):
    265 	sub	%rdi, %rax
    266 	bsf	%rdx, %rdx
    267 	add	%rdx, %rax
    268 	add	$16, %rax
    269 	RETURN
    270 	.p2align 4
    271 L(exit32):
    272 	sub	%rdi, %rax
    273 	bsf	%rdx, %rdx
    274 	add	%rdx, %rax
    275 	add	$32, %rax
    276 	RETURN
    277 	.p2align 4
    278 L(exit48):
    279 	sub	%rdi, %rax
    280 	bsf	%rdx, %rdx
    281 	add	%rdx, %rax
    282 	add	$48, %rax
    283 	RETURN
    284 	.p2align 4
    285 L(exit64):
    286 	sub	%rdi, %rax
    287 	bsf	%rdx, %rdx
    288 	add	%rdx, %rax
    289 	add	$64, %rax
    290 #ifndef USE_AS_STRCAT
    291 	RETURN
    292 
    293 END (STRLEN)
    294 #endif
    295