Home | History | Annotate | Download | only in bionic
      1 /*
      2  * memchr - find a character in a memory zone
      3  *
      4  * Copyright (c) 2014, ARM Limited
      5  * All rights Reserved.
      6  * Copyright (c) 2014, Linaro Ltd.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions are met:
     10  *     * Redistributions of source code must retain the above copyright
     11  *       notice, this list of conditions and the following disclaimer.
     12  *     * Redistributions in binary form must reproduce the above copyright
     13  *       notice, this list of conditions and the following disclaimer in the
     14  *       documentation and/or other materials provided with the distribution.
     15  *     * Neither the name of the company nor the names of its contributors
     16  *       may be used to endorse or promote products derived from this
     17  *       software without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /* Assumptions:
     33  *
     34  * ARMv8-a, AArch64
     35  * Neon Available.
     36  */
     37 
     38 #include <private/bionic_asm.h>
     39 
     40 /* Arguments and results.  */
     41 #define srcin		x0
     42 #define chrin		w1
     43 #define cntin		x2
     44 
     45 #define result		x0
     46 
     47 #define src		x3
     48 #define	tmp		x4
     49 #define wtmp2		w5
     50 #define synd		x6
     51 #define soff		x9
     52 #define cntrem		x10
     53 
     54 #define vrepchr		v0
     55 #define vdata1		v1
     56 #define vdata2		v2
     57 #define vhas_chr1	v3
     58 #define vhas_chr2	v4
     59 #define vrepmask	v5
     60 #define vend		v6
     61 
     62 /*
     63  * Core algorithm:
     64  *
     65  * For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
     66  * per byte. For each tuple, bit 0 is set if the relevant byte matched the
     67  * requested character and bit 1 is not used (faster than using a 32bit
     68  * syndrome). Since the bits in the syndrome reflect exactly the order in which
     69  * things occur in the original string, counting trailing zeros allows to
     70  * identify exactly which byte has matched.
     71  */
     72 
     73 ENTRY(memchr)
     74 	/*
     75 	 * Magic constant 0x40100401 allows us to identify which lane matches
     76 	 * the requested byte.
     77 	 */
     78 	cbz	cntin, .Lzero_length
     79 	mov	wtmp2, #0x0401
     80 	movk	wtmp2, #0x4010, lsl #16
     81 	dup	vrepchr.16b, chrin
     82 	/* Work with aligned 32-byte chunks */
     83 	bic	src, srcin, #31
     84 	dup	vrepmask.4s, wtmp2
     85 	ands	soff, srcin, #31
     86 	and	cntrem, cntin, #31
     87 	b.eq	.Lloop
     88 
     89 	/*
     90 	 * Input string is not 32-byte aligned. We calculate the syndrome
     91 	 * value for the aligned 32 bytes block containing the first bytes
     92 	 * and mask the irrelevant part.
     93 	 */
     94 
     95 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
     96 	sub	tmp, soff, #32
     97 	adds	cntin, cntin, tmp
     98 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
     99 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
    100 	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
    101 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
    102 	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
    103 	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
    104 	mov	synd, vend.d[0]
    105 	/* Clear the soff*2 lower bits */
    106 	lsl	tmp, soff, #1
    107 	lsr	synd, synd, tmp
    108 	lsl	synd, synd, tmp
    109 	/* The first block can also be the last */
    110 	b.ls	.Lmasklast
    111 	/* Have we found something already? */
    112 	cbnz	synd, .Ltail
    113 
    114 .Lloop:
    115 	ld1	{vdata1.16b, vdata2.16b}, [src], #32
    116 	subs	cntin, cntin, #32
    117 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
    118 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
    119 	/* If we're out of data we finish regardless of the result */
    120 	b.ls	.Lend
    121 	/* Use a fast check for the termination condition */
    122 	orr	vend.16b, vhas_chr1.16b, vhas_chr2.16b
    123 	addp	vend.2d, vend.2d, vend.2d
    124 	mov	synd, vend.d[0]
    125 	/* We're not out of data, loop if we haven't found the character */
    126 	cbz	synd, .Lloop
    127 
    128 .Lend:
    129 	/* Termination condition found, let's calculate the syndrome value */
    130 	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
    131 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
    132 	addp	vend.16b, vhas_chr1.16b, vhas_chr2.16b		/* 256->128 */
    133 	addp	vend.16b, vend.16b, vend.16b			/* 128->64 */
    134 	mov	synd, vend.d[0]
    135 	/* Only do the clear for the last possible block */
    136 	b.hi	.Ltail
    137 
    138 .Lmasklast:
    139 	/* Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits */
    140 	add	tmp, cntrem, soff
    141 	and	tmp, tmp, #31
    142 	sub	tmp, tmp, #32
    143 	neg	tmp, tmp, lsl #1
    144 	lsl	synd, synd, tmp
    145 	lsr	synd, synd, tmp
    146 
    147 .Ltail:
    148 	/* Count the trailing zeros using bit reversing */
    149 	rbit	synd, synd
    150 	/* Compensate the last post-increment */
    151 	sub	src, src, #32
    152 	/* Check that we have found a character */
    153 	cmp	synd, #0
    154 	/* And count the leading zeros */
    155 	clz	synd, synd
    156 	/* Compute the potential result */
    157 	add	result, src, synd, lsr #1
    158 	/* Select result or NULL */
    159 	csel	result, xzr, result, eq
    160 	ret
    161 
    162 .Lzero_length:
    163 	mov	result, xzr
    164 	ret
    165 END(memchr)
    166