Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2011 Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc	.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc	.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)	.cfi_restore reg
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef ENTRY
     56 # define ENTRY(name)	\
     57 	.type name,  @function;	\
     58 	.globl name;	\
     59 	.p2align 4;	\
     60 name:	\
     61 	cfi_startproc
     62 #endif
     63 
     64 #ifndef END
     65 # define END(name)	\
     66 	cfi_endproc;	\
     67 	.size name,	.-name
     68 #endif
     69 
     70 #define CFI_PUSH(REG)	\
     71 	cfi_adjust_cfa_offset (4);	\
     72 	cfi_rel_offset (REG, 0)
     73 
     74 #define CFI_POP(REG)	\
     75 	cfi_adjust_cfa_offset (-4);	\
     76 	cfi_restore (REG)
     77 
     78 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     79 #define POP(REG)	popl REG; CFI_POP (REG)
     80 
     81 #define PARMS	4
     82 
     83 
     84 #define STR1  PARMS
     85 #define STR2  STR1+4
     86 
     87 	.text
     88 ENTRY (wcschr)
     89 
     90 	mov	STR1(%esp), %ecx
     91 	movd	STR2(%esp), %xmm1
     92 
     93 	mov	%ecx, %eax
     94 	punpckldq %xmm1, %xmm1
     95 	pxor	%xmm2, %xmm2
     96 	punpckldq %xmm1, %xmm1
     97 
     98 	and	$63, %eax
     99 	cmp	$48, %eax
    100 	ja	L(cross_cache)
    101 
    102 	movdqu	(%ecx), %xmm0
    103 	pcmpeqd	%xmm0, %xmm2
    104 	pcmpeqd	%xmm1, %xmm0
    105 	pmovmskb %xmm2, %edx
    106 	pmovmskb %xmm0, %eax
    107 	or	%eax, %edx
    108 	jnz	L(matches)
    109 	and	$-16, %ecx
    110 	jmp	L(loop)
    111 
    112 	.p2align 4
    113 L(cross_cache):
    114 	PUSH	(%edi)
    115 	mov	%ecx, %edi
    116 	mov	%eax, %ecx
    117 	and	$-16, %edi
    118 	and	$15, %ecx
    119 	movdqa	(%edi), %xmm0
    120 	pcmpeqd	%xmm0, %xmm2
    121 	pcmpeqd	%xmm1, %xmm0
    122 	pmovmskb %xmm2, %edx
    123 	pmovmskb %xmm0, %eax
    124 
    125 	sarl	%cl, %edx
    126 	sarl	%cl, %eax
    127 	test	%eax, %eax
    128 	jz	L(unaligned_no_match)
    129 
    130 	add	%edi, %ecx
    131 	POP	(%edi)
    132 
    133 	test	%edx, %edx
    134 	jz	L(match_case1)
    135 	test	%al, %al
    136 	jz	L(match_higth_case2)
    137 	test	$15, %al
    138 	jnz	L(match_case2_4)
    139 	test	$15, %dl
    140 	jnz	L(return_null)
    141 	lea	4(%ecx), %eax
    142 	ret
    143 
    144 	CFI_PUSH (%edi)
    145 
    146 	.p2align 4
    147 L(unaligned_no_match):
    148 	mov	%edi, %ecx
    149 	POP	(%edi)
    150 
    151 	test	%edx, %edx
    152 	jnz	L(return_null)
    153 
    154 	pxor	%xmm2, %xmm2
    155 
    156 /* Loop start on aligned string.  */
    157 	.p2align 4
    158 L(loop):
    159 	add	$16, %ecx
    160 	movdqa	(%ecx), %xmm0
    161 	pcmpeqd	%xmm0, %xmm2
    162 	pcmpeqd	%xmm1, %xmm0
    163 	pmovmskb %xmm2, %edx
    164 	pmovmskb %xmm0, %eax
    165 	or	%eax, %edx
    166 	jnz	L(matches)
    167 	add	$16, %ecx
    168 
    169 	movdqa	(%ecx), %xmm0
    170 	pcmpeqd	%xmm0, %xmm2
    171 	pcmpeqd	%xmm1, %xmm0
    172 	pmovmskb %xmm2, %edx
    173 	pmovmskb %xmm0, %eax
    174 	or	%eax, %edx
    175 	jnz	L(matches)
    176 	add	$16, %ecx
    177 
    178 	movdqa	(%ecx), %xmm0
    179 	pcmpeqd	%xmm0, %xmm2
    180 	pcmpeqd	%xmm1, %xmm0
    181 	pmovmskb %xmm2, %edx
    182 	pmovmskb %xmm0, %eax
    183 	or	%eax, %edx
    184 	jnz	L(matches)
    185 	add	$16, %ecx
    186 
    187 	movdqa	(%ecx), %xmm0
    188 	pcmpeqd	%xmm0, %xmm2
    189 	pcmpeqd	%xmm1, %xmm0
    190 	pmovmskb %xmm2, %edx
    191 	pmovmskb %xmm0, %eax
    192 	or	%eax, %edx
    193 	jz	L(loop)
    194 
    195 	.p2align 4
    196 L(matches):
    197 	pmovmskb %xmm2, %edx
    198 	test	%eax, %eax
    199 	jz	L(return_null)
    200 	test	%edx, %edx
    201 	jz	L(match_case1)
    202 
    203 	.p2align 4
    204 L(match_case2):
    205 	test	%al, %al
    206 	jz	L(match_higth_case2)
    207 	test	$15, %al
    208 	jnz	L(match_case2_4)
    209 	test	$15, %dl
    210 	jnz	L(return_null)
    211 	lea	4(%ecx), %eax
    212 	ret
    213 
    214 	.p2align 4
    215 L(match_case2_4):
    216 	mov	%ecx, %eax
    217 	ret
    218 
    219 	.p2align 4
    220 L(match_higth_case2):
    221 	test	%dl, %dl
    222 	jnz	L(return_null)
    223 	test	$15, %ah
    224 	jnz	L(match_case2_12)
    225 	test	$15, %dh
    226 	jnz	L(return_null)
    227 	lea	12(%ecx), %eax
    228 	ret
    229 
    230 	.p2align 4
    231 L(match_case2_12):
    232 	lea	8(%ecx), %eax
    233 	ret
    234 
    235 	.p2align 4
    236 L(match_case1):
    237 	test	%al, %al
    238 	jz	L(match_higth_case1)
    239 
    240 	test	$0x01, %al
    241 	jnz	L(exit0)
    242 	lea	4(%ecx), %eax
    243 	ret
    244 
    245 	.p2align 4
    246 L(match_higth_case1):
    247 	test	$0x01, %ah
    248 	jnz	L(exit3)
    249 	lea	12(%ecx), %eax
    250 	ret
    251 
    252 	.p2align 4
    253 L(exit0):
    254 	mov	%ecx, %eax
    255 	ret
    256 
    257 	.p2align 4
    258 L(exit3):
    259 	lea	8(%ecx), %eax
    260 	ret
    261 
    262 	.p2align 4
    263 L(return_null):
    264 	xor	%eax, %eax
    265 	ret
    266 
    267 END (wcschr)
    268