Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2011 Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef USE_AS_WCSCAT
     32 
     33 # ifndef L
     34 #  define L(label)	.L##label
     35 # endif
     36 
     37 # ifndef cfi_startproc
     38 #  define cfi_startproc	.cfi_startproc
     39 # endif
     40 
     41 # ifndef cfi_endproc
     42 #  define cfi_endproc	.cfi_endproc
     43 # endif
     44 
     45 # ifndef ENTRY
     46 #  define ENTRY(name)	\
     47 	.type name,  @function;	\
     48 	.globl name;	\
     49 	.p2align 4;	\
     50 name:	\
     51 	cfi_startproc
     52 # endif
     53 
     54 # ifndef END
     55 #  define END(name)	\
     56 	cfi_endproc;	\
     57 	.size name, .-name
     58 # endif
     59 
     60 # define PARMS	4
     61 # define STR	PARMS
     62 # define RETURN ret
     63 
     64 	.text
     65 ENTRY (wcslen)
     66 	mov	STR(%esp), %edx
     67 #endif
     68 	cmpl	$0, (%edx)
     69 	jz	L(exit_tail0)
     70 	cmpl	$0, 4(%edx)
     71 	jz	L(exit_tail1)
     72 	cmpl	$0, 8(%edx)
     73 	jz	L(exit_tail2)
     74 	cmpl	$0, 12(%edx)
     75 	jz	L(exit_tail3)
     76 	cmpl	$0, 16(%edx)
     77 	jz	L(exit_tail4)
     78 	cmpl	$0, 20(%edx)
     79 	jz	L(exit_tail5)
     80 	cmpl	$0, 24(%edx)
     81 	jz	L(exit_tail6)
     82 	cmpl	$0, 28(%edx)
     83 	jz	L(exit_tail7)
     84 
     85 	pxor	%xmm0, %xmm0
     86 
     87 	lea	32(%edx), %eax
     88 	lea	-16(%eax), %ecx
     89 	and	$-16, %eax
     90 
     91 	pcmpeqd	(%eax), %xmm0
     92 	pmovmskb %xmm0, %edx
     93 	pxor	%xmm1, %xmm1
     94 	lea	16(%eax), %eax
     95 	test	%edx, %edx
     96 	jnz	L(exit)
     97 
     98 	pcmpeqd	(%eax), %xmm1
     99 	pmovmskb %xmm1, %edx
    100 	pxor	%xmm2, %xmm2
    101 	lea	16(%eax), %eax
    102 	test	%edx, %edx
    103 	jnz	L(exit)
    104 
    105 	pcmpeqd	(%eax), %xmm2
    106 	pmovmskb %xmm2, %edx
    107 	pxor	%xmm3, %xmm3
    108 	lea	16(%eax), %eax
    109 	test	%edx, %edx
    110 	jnz	L(exit)
    111 
    112 	pcmpeqd	(%eax), %xmm3
    113 	pmovmskb %xmm3, %edx
    114 	lea	16(%eax), %eax
    115 	test	%edx, %edx
    116 	jnz	L(exit)
    117 
    118 	pcmpeqd	(%eax), %xmm0
    119 	pmovmskb %xmm0, %edx
    120 	lea	16(%eax), %eax
    121 	test	%edx, %edx
    122 	jnz	L(exit)
    123 
    124 	pcmpeqd	(%eax), %xmm1
    125 	pmovmskb %xmm1, %edx
    126 	lea	16(%eax), %eax
    127 	test	%edx, %edx
    128 	jnz	L(exit)
    129 
    130 	pcmpeqd	(%eax), %xmm2
    131 	pmovmskb %xmm2, %edx
    132 	lea	16(%eax), %eax
    133 	test	%edx, %edx
    134 	jnz	L(exit)
    135 
    136 	pcmpeqd	(%eax), %xmm3
    137 	pmovmskb %xmm3, %edx
    138 	lea	16(%eax), %eax
    139 	test	%edx, %edx
    140 	jnz	L(exit)
    141 
    142 	pcmpeqd	(%eax), %xmm0
    143 	pmovmskb %xmm0, %edx
    144 	lea	16(%eax), %eax
    145 	test	%edx, %edx
    146 	jnz	L(exit)
    147 
    148 	pcmpeqd	(%eax), %xmm1
    149 	pmovmskb %xmm1, %edx
    150 	lea	16(%eax), %eax
    151 	test	%edx, %edx
    152 	jnz	L(exit)
    153 
    154 	pcmpeqd	(%eax), %xmm2
    155 	pmovmskb %xmm2, %edx
    156 	lea	16(%eax), %eax
    157 	test	%edx, %edx
    158 	jnz	L(exit)
    159 
    160 	pcmpeqd	(%eax), %xmm3
    161 	pmovmskb %xmm3, %edx
    162 	lea	16(%eax), %eax
    163 	test	%edx, %edx
    164 	jnz	L(exit)
    165 
    166 	pcmpeqd	(%eax), %xmm0
    167 	pmovmskb %xmm0, %edx
    168 	lea	16(%eax), %eax
    169 	test	%edx, %edx
    170 	jnz	L(exit)
    171 
    172 	pcmpeqd	(%eax), %xmm1
    173 	pmovmskb %xmm1, %edx
    174 	lea	16(%eax), %eax
    175 	test	%edx, %edx
    176 	jnz	L(exit)
    177 
    178 	pcmpeqd	(%eax), %xmm2
    179 	pmovmskb %xmm2, %edx
    180 	lea	16(%eax), %eax
    181 	test	%edx, %edx
    182 	jnz	L(exit)
    183 
    184 	pcmpeqd	(%eax), %xmm3
    185 	pmovmskb %xmm3, %edx
    186 	lea	16(%eax), %eax
    187 	test	%edx, %edx
    188 	jnz	L(exit)
    189 
    190 	and	$-0x40, %eax
    191 
    192 	.p2align 4
    193 L(aligned_64_loop):
    194 	movaps	(%eax), %xmm0
    195 	movaps	16(%eax), %xmm1
    196 	movaps	32(%eax), %xmm2
    197 	movaps	48(%eax), %xmm6
    198 
    199 	pminub	%xmm1, %xmm0
    200 	pminub	%xmm6, %xmm2
    201 	pminub	%xmm0, %xmm2
    202 	pcmpeqd	%xmm3, %xmm2
    203 	pmovmskb %xmm2, %edx
    204 	lea	64(%eax), %eax
    205 	test	%edx, %edx
    206 	jz	L(aligned_64_loop)
    207 
    208 	pcmpeqd	-64(%eax), %xmm3
    209 	pmovmskb %xmm3, %edx
    210 	lea	48(%ecx), %ecx
    211 	test	%edx, %edx
    212 	jnz	L(exit)
    213 
    214 	pcmpeqd	%xmm1, %xmm3
    215 	pmovmskb %xmm3, %edx
    216 	lea	-16(%ecx), %ecx
    217 	test	%edx, %edx
    218 	jnz	L(exit)
    219 
    220 	pcmpeqd	-32(%eax), %xmm3
    221 	pmovmskb %xmm3, %edx
    222 	lea	-16(%ecx), %ecx
    223 	test	%edx, %edx
    224 	jnz	L(exit)
    225 
    226 	pcmpeqd	%xmm6, %xmm3
    227 	pmovmskb %xmm3, %edx
    228 	lea	-16(%ecx), %ecx
    229 	test	%edx, %edx
    230 	jnz	L(exit)
    231 
    232 	jmp	L(aligned_64_loop)
    233 
    234 	.p2align 4
    235 L(exit):
    236 	sub	%ecx, %eax
    237 	shr	$2, %eax
    238 	test	%dl, %dl
    239 	jz	L(exit_high)
    240 
    241 	mov	%dl, %cl
    242 	and	$15, %cl
    243 	jz	L(exit_1)
    244 	RETURN
    245 
    246 	.p2align 4
    247 L(exit_high):
    248 	mov	%dh, %ch
    249 	and	$15, %ch
    250 	jz	L(exit_3)
    251 	add	$2, %eax
    252 	RETURN
    253 
    254 	.p2align 4
    255 L(exit_1):
    256 	add	$1, %eax
    257 	RETURN
    258 
    259 	.p2align 4
    260 L(exit_3):
    261 	add	$3, %eax
    262 	RETURN
    263 
    264 	.p2align 4
    265 L(exit_tail0):
    266 	xor	%eax, %eax
    267 	RETURN
    268 
    269 	.p2align 4
    270 L(exit_tail1):
    271 	mov	$1, %eax
    272 	RETURN
    273 
    274 	.p2align 4
    275 L(exit_tail2):
    276 	mov	$2, %eax
    277 	RETURN
    278 
    279 	.p2align 4
    280 L(exit_tail3):
    281 	mov	$3, %eax
    282 	RETURN
    283 
    284 	.p2align 4
    285 L(exit_tail4):
    286 	mov	$4, %eax
    287 	RETURN
    288 
    289 	.p2align 4
    290 L(exit_tail5):
    291 	mov	$5, %eax
    292 	RETURN
    293 
    294 	.p2align 4
    295 L(exit_tail6):
    296 	mov	$6, %eax
    297 	RETURN
    298 
    299 	.p2align 4
    300 L(exit_tail7):
    301 	mov	$7, %eax
    302 #ifndef USE_AS_WCSCAT
    303 	RETURN
    304 
    305 END (wcslen)
    306 #endif
    307