Home | History | Annotate | Download | only in string
      1 /*
      2  * Copyright (c) 2014
      3  *      Imagination Technologies Limited.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
     14  *    contributors may be used to endorse or promote products derived from
     15  *    this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
     18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20  * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
     21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 #ifdef __ANDROID__
     31 # include <private/bionic_asm.h>
     32 #elif _LIBC
     33 # include <sysdep.h>
     34 # include <regdef.h>
     35 # include <sys/asm.h>
     36 #elif _COMPILING_NEWLIB
     37 # include "machine/asm.h"
     38 # include "machine/regdef.h"
     39 #else
     40 # include <regdef.h>
     41 # include <sys/asm.h>
     42 #endif
     43 
     44 /* Technically strcmp should not read past the end of the strings being
     45    compared.  We will read a full word that may contain excess bits beyond
     46    the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
     47    read the next word after the end of string.  Setting ENABLE_READAHEAD will
     48    improve performance but is technically illegal based on the definition of
     49    strcmp.  */
     50 #ifdef ENABLE_READAHEAD
     51 # define DELAY_READ
     52 #else
     53 # define DELAY_READ nop
     54 #endif
     55 
     56 /* Testing on a little endian machine showed using CLZ was a
     57    performance loss, so we are not turning it on by default.  */
     58 #if defined(ENABLE_CLZ) && (__mips_isa_rev > 1)
     59 # define USE_CLZ
     60 #endif
     61 
     62 /* Some asm.h files do not have the L macro definition.  */
     63 #ifndef L
     64 # if _MIPS_SIM == _ABIO32
     65 #  define L(label) $L ## label
     66 # else
     67 #  define L(label) .L ## label
     68 # endif
     69 #endif
     70 
     71 /* Some asm.h files do not have the PTR_ADDIU macro definition.  */
     72 #ifndef PTR_ADDIU
     73 # if _MIPS_SIM == _ABIO32
     74 #  define PTR_ADDIU       addiu
     75 # else
     76 #  define PTR_ADDIU       daddiu
     77 # endif
     78 #endif
     79 
     80 /* Allow the routine to be named something else if desired.  */
     81 #ifndef STRCMP_NAME
     82 # define STRCMP_NAME strcmp
     83 #endif
     84 
     85 #ifdef __ANDROID__
     86 LEAF(STRCMP_NAME, 0)
     87 #else
     88 LEAF(STRCMP_NAME)
     89 #endif
     90 	.set	nomips16
     91 	.set	noreorder
     92 
     93 	or	t0, a0, a1
     94 	andi	t0,0x3
     95 	bne	t0, zero, L(byteloop)
     96 
     97 /* Both strings are 4 byte aligned at this point.  */
     98 
     99 	lui	t8, 0x0101
    100 	ori	t8, t8, 0x0101
    101 	lui	t9, 0x7f7f
    102 	ori	t9, 0x7f7f
    103 
    104 #define STRCMP32(OFFSET) \
    105 	lw	v0, OFFSET(a0); \
    106 	lw	v1, OFFSET(a1); \
    107 	subu	t0, v0, t8; \
    108 	bne	v0, v1, L(worddiff); \
    109 	nor	t1, v0, t9; \
    110 	and	t0, t0, t1; \
    111 	bne	t0, zero, L(returnzero)
    112 
    113 L(wordloop):
    114 	STRCMP32(0)
    115 	DELAY_READ
    116 	STRCMP32(4)
    117 	DELAY_READ
    118 	STRCMP32(8)
    119 	DELAY_READ
    120 	STRCMP32(12)
    121 	DELAY_READ
    122 	STRCMP32(16)
    123 	DELAY_READ
    124 	STRCMP32(20)
    125 	DELAY_READ
    126 	STRCMP32(24)
    127 	DELAY_READ
    128 	STRCMP32(28)
    129 	PTR_ADDIU a0, a0, 32
    130 	b	L(wordloop)
    131 	PTR_ADDIU a1, a1, 32
    132 
    133 L(returnzero):
    134 	j	ra
    135 	move	v0, zero
    136 
    137 L(worddiff):
    138 #ifdef USE_CLZ
    139 	subu	t0, v0, t8
    140 	nor	t1, v0, t9
    141 	and	t1, t0, t1
    142 	xor	t0, v0, v1
    143 	or	t0, t0, t1
    144 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    145 	wsbh	t0, t0
    146 	rotr	t0, t0, 16
    147 # endif
    148 	clz	t1, t0
    149 	and	t1, 0xf8
    150 # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    151 	neg	t1
    152 	addu	t1, 24
    153 # endif
    154 	rotrv	v0, v0, t1
    155 	rotrv	v1, v1, t1
    156 	and	v0, v0, 0xff
    157 	and	v1, v1, 0xff
    158 	j	ra
    159 	subu	v0, v0, v1
    160 #else /* USE_CLZ */
    161 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    162 	andi	t0, v0, 0xff
    163 	beq	t0, zero, L(wexit01)
    164 	andi	t1, v1, 0xff
    165 	bne	t0, t1, L(wexit01)
    166 
    167 	srl	t8, v0, 8
    168 	srl	t9, v1, 8
    169 	andi	t8, t8, 0xff
    170 	beq	t8, zero, L(wexit89)
    171 	andi	t9, t9, 0xff
    172 	bne	t8, t9, L(wexit89)
    173 
    174 	srl	t0, v0, 16
    175 	srl	t1, v1, 16
    176 	andi	t0, t0, 0xff
    177 	beq	t0, zero, L(wexit01)
    178 	andi	t1, t1, 0xff
    179 	bne	t0, t1, L(wexit01)
    180 
    181 	srl	t8, v0, 24
    182 	srl	t9, v1, 24
    183 # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
    184 	srl	t0, v0, 24
    185 	beq	t0, zero, L(wexit01)
    186 	srl	t1, v1, 24
    187 	bne	t0, t1, L(wexit01)
    188 
    189 	srl	t8, v0, 16
    190 	srl	t9, v1, 16
    191 	andi	t8, t8, 0xff
    192 	beq	t8, zero, L(wexit89)
    193 	andi	t9, t9, 0xff
    194 	bne	t8, t9, L(wexit89)
    195 
    196 	srl	t0, v0, 8
    197 	srl	t1, v1, 8
    198 	andi	t0, t0, 0xff
    199 	beq	t0, zero, L(wexit01)
    200 	andi	t1, t1, 0xff
    201 	bne	t0, t1, L(wexit01)
    202 
    203 	andi	t8, v0, 0xff
    204 	andi	t9, v1, 0xff
    205 # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
    206 
    207 L(wexit89):
    208 	j	ra
    209 	subu	v0, t8, t9
    210 L(wexit01):
    211 	j	ra
    212 	subu	v0, t0, t1
    213 #endif /* USE_CLZ */
    214 
    215 /* It might seem better to do the 'beq' instruction between the two 'lbu'
    216    instructions so that the nop is not needed but testing showed that this
    217    code is actually faster (based on glibc strcmp test).  */
    218 #define BYTECMP01(OFFSET) \
    219 	lbu	v0, OFFSET(a0); \
    220 	lbu	v1, OFFSET(a1); \
    221 	beq	v0, zero, L(bexit01); \
    222 	nop; \
    223 	bne	v0, v1, L(bexit01)
    224 
    225 #define BYTECMP89(OFFSET) \
    226 	lbu	t8, OFFSET(a0); \
    227 	lbu	t9, OFFSET(a1); \
    228 	beq	t8, zero, L(bexit89); \
    229 	nop;	\
    230 	bne	t8, t9, L(bexit89)
    231 
    232 L(byteloop):
    233 	BYTECMP01(0)
    234 	BYTECMP89(1)
    235 	BYTECMP01(2)
    236 	BYTECMP89(3)
    237 	BYTECMP01(4)
    238 	BYTECMP89(5)
    239 	BYTECMP01(6)
    240 	BYTECMP89(7)
    241 	PTR_ADDIU a0, a0, 8
    242 	b	L(byteloop)
    243 	PTR_ADDIU a1, a1, 8
    244 
    245 L(bexit01):
    246 	j	ra
    247 	subu	v0, v0, v1
    248 L(bexit89):
    249 	j	ra
    250 	subu	v0, t8, t9
    251 
    252 	.set	at
    253 	.set	reorder
    254 
    255 END(STRCMP_NAME)
    256 #ifndef __ANDROID__
    257 # ifdef _LIBC
    258 libc_hidden_builtin_def (STRCMP_NAME)
    259 # endif
    260 #endif
    261