Home | History | Annotate | Download | only in bionic
      1 /* Copyright (c) 2014, Linaro Limited
      2    All rights reserved.
      3 
      4    Redistribution and use in source and binary forms, with or without
      5    modification, are permitted provided that the following conditions are met:
      6        * Redistributions of source code must retain the above copyright
      7          notice, this list of conditions and the following disclaimer.
      8        * Redistributions in binary form must reproduce the above copyright
      9          notice, this list of conditions and the following disclaimer in the
     10          documentation and/or other materials provided with the distribution.
     11        * Neither the name of the Linaro nor the
     12          names of its contributors may be used to endorse or promote products
     13          derived from this software without specific prior written permission.
     14 
     15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     19    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     20    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     21    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 */
     27 
     28 /* Assumptions:
     29  *
     30  * ARMv8-a, AArch64
     31  */
     32 
     33 #include <private/bionic_asm.h>
     34 
     35 /* Arguments and results.  */
     36 #define srcin		x0
     37 #define len		x0
     38 
     39 /* Locals and temporaries.  */
     40 #define src		x1
     41 #define data1		x2
     42 #define data2		x3
     43 #define data2a		x4
     44 #define has_nul1	x5
     45 #define has_nul2	x6
     46 #define tmp1		x7
     47 #define tmp2		x8
     48 #define tmp3		x9
     49 #define tmp4		x10
     50 #define zeroones	x11
     51 #define pos		x12
     52 
     53 #define REP8_01 0x0101010101010101
     54 #define REP8_7f 0x7f7f7f7f7f7f7f7f
     55 #define REP8_80 0x8080808080808080
     56 
     57 	/* Start of critial section -- keep to one 64Byte cache line.  */
     58 ENTRY(strlen)
     59 	mov	zeroones, #REP8_01
     60 	bic	src, srcin, #15
     61 	ands	tmp1, srcin, #15
     62 	b.ne	.Lmisaligned
     63 	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
     64 	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
     65 	   can be done in parallel across the entire word.  */
     66 	/* The inner loop deals with two Dwords at a time.  This has a
     67 	   slightly higher start-up cost, but we should win quite quickly,
     68 	   especially on cores with a high number of issue slots per
     69 	   cycle, as we get much better parallelism out of the operations.  */
     70 .Lloop:
     71 	ldp	data1, data2, [src], #16
     72 .Lrealigned:
     73 	sub	tmp1, data1, zeroones
     74 	orr	tmp2, data1, #REP8_7f
     75 	sub	tmp3, data2, zeroones
     76 	orr	tmp4, data2, #REP8_7f
     77 	bic	has_nul1, tmp1, tmp2
     78 	bics	has_nul2, tmp3, tmp4
     79 	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
     80 	b.eq	.Lloop
     81 	/* End of critical section -- keep to one 64Byte cache line.  */
     82 
     83 	sub	len, src, srcin
     84 	cbz	has_nul1, .Lnul_in_data2
     85 #ifdef __AARCH64EB__
     86 	mov	data2, data1
     87 #endif
     88 	sub	len, len, #8
     89 	mov	has_nul2, has_nul1
     90 .Lnul_in_data2:
     91 #ifdef __AARCH64EB__
     92 	/* For big-endian, carry propagation (if the final byte in the
     93 	   string is 0x01) means we cannot use has_nul directly.  The
     94 	   easiest way to get the correct byte is to byte-swap the data
     95 	   and calculate the syndrome a second time.  */
     96 	rev	data2, data2
     97 	sub	tmp1, data2, zeroones
     98 	orr	tmp2, data2, #REP8_7f
     99 	bic	has_nul2, tmp1, tmp2
    100 #endif
    101 	sub	len, len, #8
    102 	rev	has_nul2, has_nul2
    103 	clz	pos, has_nul2
    104 	add	len, len, pos, lsr #3		/* Bits to bytes.  */
    105 	ret
    106 
    107 .Lmisaligned:
    108 	cmp	tmp1, #8
    109 	neg	tmp1, tmp1
    110 	ldp	data1, data2, [src], #16
    111 	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
    112 	mov	tmp2, #~0
    113 #ifdef __AARCH64EB__
    114 	/* Big-endian.  Early bytes are at MSB.  */
    115 	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
    116 #else
    117 	/* Little-endian.  Early bytes are at LSB.  */
    118 	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
    119 #endif
    120 	orr	data1, data1, tmp2
    121 	orr	data2a, data2, tmp2
    122 	csinv	data1, data1, xzr, le
    123 	csel	data2, data2, data2a, le
    124 	b	.Lrealigned
    125 
    126 END(strlen)
    127