Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2011, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef cfi_startproc
     36 # define cfi_startproc	.cfi_startproc
     37 #endif
     38 
     39 #ifndef cfi_endproc
     40 # define cfi_endproc	.cfi_endproc
     41 #endif
     42 
     43 #ifndef cfi_rel_offset
     44 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     45 #endif
     46 
     47 #ifndef cfi_restore
     48 # define cfi_restore(reg)	.cfi_restore reg
     49 #endif
     50 
     51 #ifndef cfi_adjust_cfa_offset
     52 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     53 #endif
     54 
     55 #ifndef ENTRY
     56 # define ENTRY(name)	\
     57 	.type name,  @function;	\
     58 	.globl name;	\
     59 	.p2align 4;	\
     60 name:	\
     61 	cfi_startproc
     62 #endif
     63 
     64 #ifndef END
     65 # define END(name)	\
     66 	cfi_endproc;	\
     67 	.size name,	.-name
     68 #endif
     69 
     70 #define CFI_PUSH(REG)	\
     71 	cfi_adjust_cfa_offset (4);	\
     72 	cfi_rel_offset (REG, 0)
     73 
     74 #define CFI_POP(REG)	\
     75 	cfi_adjust_cfa_offset (-4);	\
     76 	cfi_restore (REG)
     77 
     78 #define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
     79 #define POP(REG)	popl REG;	CFI_POP (REG)
     80 
     81 #define PARMS	8
     82 #define ENTRANCE	PUSH(%edi)
     83 #define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
     84 
     85 
     86 #define STR1	PARMS
     87 #define STR2	STR1+4
     88 
     89 	.text
     90 ENTRY (strchr)
     91 
     92 	ENTRANCE
     93 	mov	STR1(%esp), %ecx
     94 	movd	STR2(%esp), %xmm1
     95 
     96 	pxor	%xmm2, %xmm2
     97 	mov	%ecx, %edi
     98 	punpcklbw %xmm1, %xmm1
     99 	punpcklbw %xmm1, %xmm1
    100 	/* ECX has OFFSET. */
    101 	and	$15, %ecx
    102 	pshufd	$0, %xmm1, %xmm1
    103 	je	L(loop)
    104 
    105 /* Handle unaligned string.  */
    106 	and	$-16, %edi
    107 	movdqa	(%edi), %xmm0
    108 	pcmpeqb	%xmm0, %xmm2
    109 	pcmpeqb	%xmm1, %xmm0
    110 	/* Find where NULL is.  */
    111 	pmovmskb %xmm2, %edx
    112 	/* Check if there is a match.  */
    113 	pmovmskb %xmm0, %eax
    114 	/* Remove the leading bytes.  */
    115 	sarl	%cl, %edx
    116 	sarl	%cl, %eax
    117 	test	%eax, %eax
    118 	jz	L(unaligned_no_match)
    119 	add	%ecx, %edi
    120 	test	%edx, %edx
    121 	jz	L(match_case1)
    122 	jmp	L(match_case2)
    123 
    124 	.p2align 4
    125 L(unaligned_no_match):
    126 	test	%edx, %edx
    127 	jne	L(return_null)
    128 
    129 	pxor	%xmm2, %xmm2
    130 	add	$16, %edi
    131 
    132 	.p2align 4
    133 /* Loop start on aligned string.  */
    134 L(loop):
    135 	movdqa	(%edi), %xmm0
    136 	pcmpeqb	%xmm0, %xmm2
    137 	pcmpeqb	%xmm1, %xmm0
    138 	pmovmskb %xmm2, %edx
    139 	pmovmskb %xmm0, %eax
    140 	test	%eax, %eax
    141 	jnz	L(matches)
    142 	test	%edx, %edx
    143 	jnz	L(return_null)
    144 	add	$16, %edi
    145 
    146 	movdqa	(%edi), %xmm0
    147 	pcmpeqb	%xmm0, %xmm2
    148 	pcmpeqb	%xmm1, %xmm0
    149 	pmovmskb %xmm2, %edx
    150 	pmovmskb %xmm0, %eax
    151 	test	%eax, %eax
    152 	jnz	L(matches)
    153 	test	%edx, %edx
    154 	jnz	L(return_null)
    155 	add	$16, %edi
    156 
    157 	movdqa	(%edi), %xmm0
    158 	pcmpeqb	%xmm0, %xmm2
    159 	pcmpeqb	%xmm1, %xmm0
    160 	pmovmskb %xmm2, %edx
    161 	pmovmskb %xmm0, %eax
    162 	test	%eax, %eax
    163 	jnz	L(matches)
    164 	test	%edx, %edx
    165 	jnz	L(return_null)
    166 	add	$16, %edi
    167 
    168 	movdqa	(%edi), %xmm0
    169 	pcmpeqb	%xmm0, %xmm2
    170 	pcmpeqb	%xmm1, %xmm0
    171 	pmovmskb %xmm2, %edx
    172 	pmovmskb %xmm0, %eax
    173 	test	%eax, %eax
    174 	jnz	L(matches)
    175 	test	%edx, %edx
    176 	jnz	L(return_null)
    177 	add	$16, %edi
    178 	jmp	L(loop)
    179 
    180 L(matches):
    181 	/* There is a match.  First find where NULL is.  */
    182 	test	%edx, %edx
    183 	jz	L(match_case1)
    184 
    185 	.p2align 4
    186 L(match_case2):
    187 	test	%al, %al
    188 	jz	L(match_higth_case2)
    189 
    190 	mov	%al, %cl
    191 	and	$15, %cl
    192 	jnz	L(match_case2_4)
    193 
    194 	mov	%dl, %ch
    195 	and	$15, %ch
    196 	jnz	L(return_null)
    197 
    198 	test	$0x10, %al
    199 	jnz	L(Exit5)
    200 	test	$0x10, %dl
    201 	jnz	L(return_null)
    202 	test	$0x20, %al
    203 	jnz	L(Exit6)
    204 	test	$0x20, %dl
    205 	jnz	L(return_null)
    206 	test	$0x40, %al
    207 	jnz	L(Exit7)
    208 	test	$0x40, %dl
    209 	jnz	L(return_null)
    210 	lea	7(%edi), %eax
    211 	RETURN
    212 
    213 	.p2align 4
    214 L(match_case2_4):
    215 	test	$0x01, %al
    216 	jnz	L(Exit1)
    217 	test	$0x01, %dl
    218 	jnz	L(return_null)
    219 	test	$0x02, %al
    220 	jnz	L(Exit2)
    221 	test	$0x02, %dl
    222 	jnz	L(return_null)
    223 	test	$0x04, %al
    224 	jnz	L(Exit3)
    225 	test	$0x04, %dl
    226 	jnz	L(return_null)
    227 	lea	3(%edi), %eax
    228 	RETURN
    229 
    230 	.p2align 4
    231 L(match_higth_case2):
    232 	test	%dl, %dl
    233 	jnz	L(return_null)
    234 
    235 	mov	%ah, %cl
    236 	and	$15, %cl
    237 	jnz	L(match_case2_12)
    238 
    239 	mov	%dh, %ch
    240 	and	$15, %ch
    241 	jnz	L(return_null)
    242 
    243 	test	$0x10, %ah
    244 	jnz	L(Exit13)
    245 	test	$0x10, %dh
    246 	jnz	L(return_null)
    247 	test	$0x20, %ah
    248 	jnz	L(Exit14)
    249 	test	$0x20, %dh
    250 	jnz	L(return_null)
    251 	test	$0x40, %ah
    252 	jnz	L(Exit15)
    253 	test	$0x40, %dh
    254 	jnz	L(return_null)
    255 	lea	15(%edi), %eax
    256 	RETURN
    257 
    258 	.p2align 4
    259 L(match_case2_12):
    260 	test	$0x01, %ah
    261 	jnz	L(Exit9)
    262 	test	$0x01, %dh
    263 	jnz	L(return_null)
    264 	test	$0x02, %ah
    265 	jnz	L(Exit10)
    266 	test	$0x02, %dh
    267 	jnz	L(return_null)
    268 	test	$0x04, %ah
    269 	jnz	L(Exit11)
    270 	test	$0x04, %dh
    271 	jnz	L(return_null)
    272 	lea	11(%edi), %eax
    273 	RETURN
    274 
    275 	.p2align 4
    276 L(match_case1):
    277 	test	%al, %al
    278 	jz	L(match_higth_case1)
    279 
    280 	test	$0x01, %al
    281 	jnz	L(Exit1)
    282 	test	$0x02, %al
    283 	jnz	L(Exit2)
    284 	test	$0x04, %al
    285 	jnz	L(Exit3)
    286 	test	$0x08, %al
    287 	jnz	L(Exit4)
    288 	test	$0x10, %al
    289 	jnz	L(Exit5)
    290 	test	$0x20, %al
    291 	jnz	L(Exit6)
    292 	test	$0x40, %al
    293 	jnz	L(Exit7)
    294 	lea	7(%edi), %eax
    295 	RETURN
    296 
    297 	.p2align 4
    298 L(match_higth_case1):
    299 	test	$0x01, %ah
    300 	jnz	L(Exit9)
    301 	test	$0x02, %ah
    302 	jnz	L(Exit10)
    303 	test	$0x04, %ah
    304 	jnz	L(Exit11)
    305 	test	$0x08, %ah
    306 	jnz	L(Exit12)
    307 	test	$0x10, %ah
    308 	jnz	L(Exit13)
    309 	test	$0x20, %ah
    310 	jnz	L(Exit14)
    311 	test	$0x40, %ah
    312 	jnz	L(Exit15)
    313 	lea	15(%edi), %eax
    314 	RETURN
    315 
    316 	.p2align 4
    317 L(Exit1):
    318 	lea	(%edi), %eax
    319 	RETURN
    320 
    321 	.p2align 4
    322 L(Exit2):
    323 	lea	1(%edi), %eax
    324 	RETURN
    325 
    326 	.p2align 4
    327 L(Exit3):
    328 	lea	2(%edi), %eax
    329 	RETURN
    330 
    331 	.p2align 4
    332 L(Exit4):
    333 	lea	3(%edi), %eax
    334 	RETURN
    335 
    336 	.p2align 4
    337 L(Exit5):
    338 	lea	4(%edi), %eax
    339 	RETURN
    340 
    341 	.p2align 4
    342 L(Exit6):
    343 	lea	5(%edi), %eax
    344 	RETURN
    345 
    346 	.p2align 4
    347 L(Exit7):
    348 	lea	6(%edi), %eax
    349 	RETURN
    350 
    351 	.p2align 4
    352 L(Exit9):
    353 	lea	8(%edi), %eax
    354 	RETURN
    355 
    356 	.p2align 4
    357 L(Exit10):
    358 	lea	9(%edi), %eax
    359 	RETURN
    360 
    361 	.p2align 4
    362 L(Exit11):
    363 	lea	10(%edi), %eax
    364 	RETURN
    365 
    366 	.p2align 4
    367 L(Exit12):
    368 	lea	11(%edi), %eax
    369 	RETURN
    370 
    371 	.p2align 4
    372 L(Exit13):
    373 	lea	12(%edi), %eax
    374 	RETURN
    375 
    376 	.p2align 4
    377 L(Exit14):
    378 	lea	13(%edi), %eax
    379 	RETURN
    380 
    381 	.p2align 4
    382 L(Exit15):
    383 	lea	14(%edi), %eax
    384 	RETURN
    385 
    386 	.p2align 4
    387 L(return_null):
    388 	xor	%eax, %eax
    389 	RETURN
    390 
    391 END (strchr)
    392