Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2011 Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc	.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc	.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)	.cfi_restore reg
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef ENTRY
     56 # define ENTRY(name)	\
     57 	.type name, @function;	\
     58 	.globl name;	\
     59 	.p2align 4;	\
     60 name:	\
     61 	cfi_startproc
     62 #endif
     63 
     64 #ifndef END
     65 # define END(name)	\
     66 	cfi_endproc;	\
     67 	.size name, .-name
     68 #endif
     69 
     70 #define CFI_PUSH(REG)	\
     71 	cfi_adjust_cfa_offset (4);	\
     72 	cfi_rel_offset (REG, 0)
     73 
     74 #define CFI_POP(REG)	\
     75 	cfi_adjust_cfa_offset (-4);	\
     76 	cfi_restore (REG)
     77 
     78 #define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
     79 #define POP(REG)	popl REG;	CFI_POP (REG)
     80 
     81 #define PARMS  8
     82 #define ENTRANCE PUSH(%edi);
     83 #define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi);
     84 
     85 #define STR1  PARMS
     86 #define STR2  STR1+4
     87 
     88 	.text
     89 ENTRY (wcsrchr)
     90 
     91 	ENTRANCE
     92 	mov	STR1(%esp), %ecx
     93 	movd	STR2(%esp), %xmm1
     94 
     95 	mov	%ecx, %edi
     96 	punpckldq %xmm1, %xmm1
     97 	pxor	%xmm2, %xmm2
     98 	punpckldq %xmm1, %xmm1
     99 
    100 /* ECX has OFFSET. */
    101 	and	$63, %ecx
    102 	cmp	$48, %ecx
    103 	ja	L(crosscache)
    104 
    105 /* unaligned string. */
    106 	movdqu	(%edi), %xmm0
    107 	pcmpeqd	%xmm0, %xmm2
    108 	pcmpeqd	%xmm1, %xmm0
    109 /* Find where NULL is.  */
    110 	pmovmskb %xmm2, %ecx
    111 /* Check if there is a match.  */
    112 	pmovmskb %xmm0, %eax
    113 	add	$16, %edi
    114 
    115 	test	%eax, %eax
    116 	jnz	L(unaligned_match1)
    117 
    118 	test	%ecx, %ecx
    119 	jnz	L(return_null)
    120 
    121 	and	$-16, %edi
    122 
    123 	PUSH	(%esi)
    124 
    125 	xor	%edx, %edx
    126 	jmp	L(loop)
    127 
    128 	CFI_POP	(%esi)
    129 
    130 	.p2align 4
    131 L(unaligned_match1):
    132 	test	%ecx, %ecx
    133 	jnz	L(prolog_find_zero_1)
    134 
    135 	PUSH	(%esi)
    136 
    137 /* Save current match */
    138 	mov	%eax, %edx
    139 	mov	%edi, %esi
    140 	and	$-16, %edi
    141 	jmp	L(loop)
    142 
    143 	CFI_POP	(%esi)
    144 
    145 	.p2align 4
    146 L(crosscache):
    147 /* Hancle unaligned string.  */
    148 	and	$15, %ecx
    149 	and	$-16, %edi
    150 	pxor	%xmm3, %xmm3
    151 	movdqa	(%edi), %xmm0
    152 	pcmpeqd	%xmm0, %xmm3
    153 	pcmpeqd	%xmm1, %xmm0
    154 /* Find where NULL is.  */
    155 	pmovmskb %xmm3, %edx
    156 /* Check if there is a match.  */
    157 	pmovmskb %xmm0, %eax
    158 /* Remove the leading bytes.  */
    159 	shr	%cl, %edx
    160 	shr	%cl, %eax
    161 	add	$16, %edi
    162 
    163 	test	%eax, %eax
    164 	jnz	L(unaligned_match)
    165 
    166 	test	%edx, %edx
    167 	jnz	L(return_null)
    168 
    169 	PUSH	(%esi)
    170 
    171 	xor	%edx, %edx
    172 	jmp	L(loop)
    173 
    174 	CFI_POP	(%esi)
    175 
    176 	.p2align 4
    177 L(unaligned_match):
    178 	test	%edx, %edx
    179 	jnz	L(prolog_find_zero)
    180 
    181 	PUSH	(%esi)
    182 
    183 	mov	%eax, %edx
    184 	lea	(%edi, %ecx), %esi
    185 
    186 /* Loop start on aligned string.  */
    187 	.p2align 4
    188 L(loop):
    189 	movdqa	(%edi), %xmm0
    190 	pcmpeqd	%xmm0, %xmm2
    191 	add	$16, %edi
    192 	pcmpeqd	%xmm1, %xmm0
    193 	pmovmskb %xmm2, %ecx
    194 	pmovmskb %xmm0, %eax
    195 	or	%eax, %ecx
    196 	jnz	L(matches)
    197 
    198 	movdqa	(%edi), %xmm3
    199 	pcmpeqd	%xmm3, %xmm2
    200 	add	$16, %edi
    201 	pcmpeqd	%xmm1, %xmm3
    202 	pmovmskb %xmm2, %ecx
    203 	pmovmskb %xmm3, %eax
    204 	or	%eax, %ecx
    205 	jnz	L(matches)
    206 
    207 	movdqa	(%edi), %xmm4
    208 	pcmpeqd	%xmm4, %xmm2
    209 	add	$16, %edi
    210 	pcmpeqd	%xmm1, %xmm4
    211 	pmovmskb %xmm2, %ecx
    212 	pmovmskb %xmm4, %eax
    213 	or	%eax, %ecx
    214 	jnz	L(matches)
    215 
    216 	movdqa	(%edi), %xmm5
    217 	pcmpeqd	%xmm5, %xmm2
    218 	add	$16, %edi
    219 	pcmpeqd	%xmm1, %xmm5
    220 	pmovmskb %xmm2, %ecx
    221 	pmovmskb %xmm5, %eax
    222 	or	%eax, %ecx
    223 	jz	L(loop)
    224 
    225 	.p2align 4
    226 L(matches):
    227 	test	%eax, %eax
    228 	jnz	L(match)
    229 L(return_value):
    230 	test	%edx, %edx
    231 	jz	L(return_null_1)
    232 	mov	%edx, %eax
    233 	mov	%esi, %edi
    234 
    235 	POP	(%esi)
    236 
    237 	test	%ah, %ah
    238 	jnz	L(match_third_or_fourth_wchar)
    239 	test	$15 << 4, %al
    240 	jnz	L(match_second_wchar)
    241 	lea	-16(%edi), %eax
    242 	RETURN
    243 
    244 	CFI_PUSH	(%esi)
    245 
    246 	.p2align 4
    247 L(return_null_1):
    248 	POP	(%esi)
    249 
    250 	xor	%eax, %eax
    251 	RETURN
    252 
    253 	CFI_PUSH	(%esi)
    254 
    255 	.p2align 4
    256 L(match):
    257 	pmovmskb %xmm2, %ecx
    258 	test	%ecx, %ecx
    259 	jnz	L(find_zero)
    260 /* save match info */
    261 	mov	%eax, %edx
    262 	mov	%edi, %esi
    263 	jmp	L(loop)
    264 
    265 	.p2align 4
    266 L(find_zero):
    267 	test	%cl, %cl
    268 	jz	L(find_zero_in_third_or_fourth_wchar)
    269 	test	$15, %cl
    270 	jz	L(find_zero_in_second_wchar)
    271 	and	$1, %eax
    272 	jz	L(return_value)
    273 
    274 	POP	(%esi)
    275 
    276 	lea	-16(%edi), %eax
    277 	RETURN
    278 
    279 	CFI_PUSH	(%esi)
    280 
    281 	.p2align 4
    282 L(find_zero_in_second_wchar):
    283 	and	$(1 << 5) - 1, %eax
    284 	jz	L(return_value)
    285 
    286 	POP	(%esi)
    287 
    288 	test	$15 << 4, %al
    289 	jnz	L(match_second_wchar)
    290 	lea	-16(%edi), %eax
    291 	RETURN
    292 
    293 	CFI_PUSH	(%esi)
    294 
    295 	.p2align 4
    296 L(find_zero_in_third_or_fourth_wchar):
    297 	test	$15, %ch
    298 	jz	L(find_zero_in_fourth_wchar)
    299 	and	$(1 << 9) - 1, %eax
    300 	jz	L(return_value)
    301 
    302 	POP	(%esi)
    303 
    304 	test	%ah, %ah
    305 	jnz	L(match_third_wchar)
    306 	test	$15 << 4, %al
    307 	jnz	L(match_second_wchar)
    308 	lea	-16(%edi), %eax
    309 	RETURN
    310 
    311 	CFI_PUSH	(%esi)
    312 
    313 	.p2align 4
    314 L(find_zero_in_fourth_wchar):
    315 
    316 	POP	(%esi)
    317 
    318 	test	%ah, %ah
    319 	jnz	L(match_third_or_fourth_wchar)
    320 	test	$15 << 4, %al
    321 	jnz	L(match_second_wchar)
    322 	lea	-16(%edi), %eax
    323 	RETURN
    324 
    325 	CFI_PUSH	(%esi)
    326 
    327 	.p2align 4
    328 L(match_second_wchar):
    329 	lea	-12(%edi), %eax
    330 	RETURN
    331 
    332 	.p2align 4
    333 L(match_third_or_fourth_wchar):
    334 	test	$15 << 4, %ah
    335 	jnz	L(match_fourth_wchar)
    336 	lea	-8(%edi), %eax
    337 	RETURN
    338 
    339 	.p2align 4
    340 L(match_third_wchar):
    341 	lea	-8(%edi), %eax
    342 	RETURN
    343 
    344 	.p2align 4
    345 L(match_fourth_wchar):
    346 	lea	-4(%edi), %eax
    347 	RETURN
    348 
    349 	.p2align 4
    350 L(return_null):
    351 	xor	%eax, %eax
    352 	RETURN
    353 
    354 	.p2align 4
    355 L(prolog_find_zero):
    356 	add	%ecx, %edi
    357 	mov     %edx, %ecx
    358 L(prolog_find_zero_1):
    359 	test	%cl, %cl
    360 	jz	L(prolog_find_zero_in_third_or_fourth_wchar)
    361 	test	$15, %cl
    362 	jz	L(prolog_find_zero_in_second_wchar)
    363 	and	$1, %eax
    364 	jz	L(return_null)
    365 
    366 	lea	-16(%edi), %eax
    367 	RETURN
    368 
    369 	.p2align 4
    370 L(prolog_find_zero_in_second_wchar):
    371 	and	$(1 << 5) - 1, %eax
    372 	jz	L(return_null)
    373 
    374 	test	$15 << 4, %al
    375 	jnz	L(match_second_wchar)
    376 	lea	-16(%edi), %eax
    377 	RETURN
    378 
    379 	.p2align 4
    380 L(prolog_find_zero_in_third_or_fourth_wchar):
    381 	test	$15, %ch
    382 	jz	L(prolog_find_zero_in_fourth_wchar)
    383 	and	$(1 << 9) - 1, %eax
    384 	jz	L(return_null)
    385 
    386 	test	%ah, %ah
    387 	jnz	L(match_third_wchar)
    388 	test	$15 << 4, %al
    389 	jnz	L(match_second_wchar)
    390 	lea	-16(%edi), %eax
    391 	RETURN
    392 
    393 	.p2align 4
    394 L(prolog_find_zero_in_fourth_wchar):
    395 	test	%ah, %ah
    396 	jnz	L(match_third_or_fourth_wchar)
    397 	test	$15 << 4, %al
    398 	jnz	L(match_second_wchar)
    399 	lea	-16(%edi), %eax
    400 	RETURN
    401 
    402 END (wcsrchr)
    403