Home | History | Annotate | Download | only in string
      1 /*
      2  * Copyright (c) 2017 Imagination Technologies.
      3  *
      4  * All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  *
     10  *      * Redistributions of source code must retain the above copyright
     11  *        notice, this list of conditions and the following disclaimer.
     12  *      * Redistributions in binary form must reproduce the above copyright
     13  *        notice, this list of conditions and the following disclaimer
     14  *        in the documentation and/or other materials provided with
     15  *        the distribution.
     16  *      * Neither the name of Imagination Technologies nor the names of its
     17  *        contributors may be used to endorse or promote products derived
     18  *        from this software without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 #ifdef __ANDROID__
     34 # include <private/bionic_asm.h>
     35 #elif _LIBC
     36 # include <sysdep.h>
     37 # include <regdef.h>
     38 # include <sys/asm.h>
     39 #elif _COMPILING_NEWLIB
     40 # include "machine/asm.h"
     41 # include "machine/regdef.h"
     42 #else
     43 # include <regdef.h>
     44 # include <sys/asm.h>
     45 #endif
     46 
     47 #if __mips64
     48 # define NSIZE 8
     49 # define LW ld
     50 # define LWR ldr
     51 # define LWL ldl
     52 # define EXT dext
     53 # define SRL dsrl
     54 # define SUBU dsubu
     55 #else
     56 # define NSIZE 4
     57 # define LW lw
     58 # define LWR lwr
     59 # define LWL lwl
     60 # define EXT ext
     61 # define SRL srl
     62 # define SUBU subu
     63 #endif
     64 
     65 /* Technically strcmp should not read past the end of the strings being
     66    compared.  We will read a full word that may contain excess bits beyond
     67    the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
     68    read the next word after the end of string.  Setting ENABLE_READAHEAD will
     69    improve performance but is technically illegal based on the definition of
     70    strcmp.  */
     71 #ifdef ENABLE_READAHEAD
     72 # define DELAY_READ
     73 #else
     74 # define DELAY_READ nop
     75 #endif
     76 
     77 /* Testing on a little endian machine showed using CLZ was a
     78    performance loss, so we are not turning it on by default.  */
     79 #if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) && (!__mips64)
     80 # define USE_CLZ
     81 #endif
     82 
     83 /* Some asm.h files do not have the L macro definition.  */
     84 #ifndef L
     85 # if _MIPS_SIM == _ABIO32
     86 #  define L(label) $L ## label
     87 # else
     88 #  define L(label) .L ## label
     89 # endif
     90 #endif
     91 
     92 /* Some asm.h files do not have the PTR_ADDIU macro definition.  */
     93 #ifndef PTR_ADDIU
     94 # if _MIPS_SIM == _ABIO32
     95 #  define PTR_ADDIU       addiu
     96 # else
     97 #  define PTR_ADDIU       daddiu
     98 # endif
     99 #endif
    100 
    101 /* It might seem better to do the 'beq' instruction between the two 'lbu'
    102    instructions so that the nop is not needed but testing showed that this
    103    code is actually faster (based on glibc strcmp test).  */
    104 #define BYTECMP01(OFFSET) \
    105     lbu $v0, OFFSET($a0); \
    106     lbu $v1, OFFSET($a1); \
    107     beq $v0, $zero, L(bexit01); \
    108     nop; \
    109     bne $v0, $v1, L(bexit01)
    110 
    111 #define BYTECMP89(OFFSET) \
    112     lbu $t8, OFFSET($a0); \
    113     lbu $t9, OFFSET($a1); \
    114     beq $t8, $zero, L(bexit89); \
    115     nop;    \
    116     bne $t8, $t9, L(bexit89)
    117 
    118 /* Allow the routine to be named something else if desired.  */
    119 #ifndef STRNCMP_NAME
    120 # define STRNCMP_NAME strncmp
    121 #endif
    122 
    123 #ifdef __ANDROID__
    124 LEAF(STRNCMP_NAME, 0)
    125 #else
    126 LEAF(STRNCMP_NAME)
    127 #endif
    128     .set    nomips16
    129     .set    noreorder
    130 
    131     srl $t0, $a2, (2 + NSIZE / 4)
    132     beqz  $t0, L(byteloop) #process by bytes if less than (2 * NSIZE)
    133     andi $t1, $a1, (NSIZE - 1)
    134     beqz  $t1, L(exitalign)
    135     or   $t0, $zero, NSIZE
    136     SUBU $t1, $t0, $t1 #process (NSIZE - 1) bytes at max
    137     SUBU $a2, $a2, $t1 #dec count by t1
    138 
    139 L(alignloop): #do by bytes until a1 aligned
    140     BYTECMP01(0)
    141     SUBU $t1, $t1, 0x1
    142     PTR_ADDIU $a0, $a0, 0x1
    143     bne  $t1, $zero, L(alignloop)
    144     PTR_ADDIU $a1, $a1, 0x1
    145 
    146 L(exitalign):
    147 
    148 /* string a1 is NSIZE byte aligned at this point. */
    149 #ifndef __mips1
    150     lui $t8, 0x0101
    151     ori $t8, 0x0101
    152     lui $t9, 0x7f7f
    153     ori $t9, 0x7f7f
    154 #if __mips64
    155     dsll $t0, $t8, 32
    156     or  $t8, $t0
    157     dsll $t1, $t9, 32
    158     or  $t9, $t1
    159 #endif
    160 #endif
    161 
    162 /* hardware or software alignment not supported for mips1
    163    rev6 archs have h/w unaligned support
    164    remainings archs need to implemented with unaligned instructions */
    165 
    166 #if __mips1
    167     andi $t0, $a0, (NSIZE - 1)
    168     bne  $t0, $zero, L(byteloop)
    169 #elif __mips_isa_rev < 6
    170     andi $t0, $a0, (NSIZE - 1)
    171     bne  $t0, $zero, L(uwordloop)
    172 #endif
    173 
    174 #define STRCMPW(OFFSET) \
    175     LW   $v0, (OFFSET)($a0); \
    176     LW   $v1, (OFFSET)($a1); \
    177     SUBU $t0, $v0, $t8; \
    178     bne  $v0, $v1, L(worddiff); \
    179     nor  $t1, $v0, $t9; \
    180     and  $t0, $t0, $t1; \
    181     bne  $t0, $zero, L(returnzero);\
    182 
    183 L(wordloop):
    184     SUBU $t1, $a2, (8 * NSIZE)
    185     bltz $t1, L(onewords)
    186     STRCMPW(0 * NSIZE)
    187     DELAY_READ
    188     STRCMPW(1 * NSIZE)
    189     DELAY_READ
    190     STRCMPW(2 * NSIZE)
    191     DELAY_READ
    192     STRCMPW(3 * NSIZE)
    193     DELAY_READ
    194     STRCMPW(4 * NSIZE)
    195     DELAY_READ
    196     STRCMPW(5 * NSIZE)
    197     DELAY_READ
    198     STRCMPW(6 * NSIZE)
    199     DELAY_READ
    200     STRCMPW(7 * NSIZE)
    201     SUBU $a2, $a2, (8 * NSIZE)
    202     PTR_ADDIU $a0, $a0, (8 * NSIZE)
    203     b   L(wordloop)
    204     PTR_ADDIU $a1, $a1, (8 * NSIZE)
    205 
    206 L(onewords):
    207     SUBU $t1, $a2, NSIZE
    208     bltz $t1, L(byteloop)
    209     STRCMPW(0)
    210     SUBU $a2, $a2, NSIZE
    211     PTR_ADDIU $a0, $a0, NSIZE
    212     b   L(onewords)
    213     PTR_ADDIU $a1, $a1, NSIZE
    214 
    215 #if __mips_isa_rev < 6 && !__mips1
    216 #define USTRCMPW(OFFSET) \
    217     LWR $v0, (OFFSET)($a0); \
    218     LWL $v0, (OFFSET + NSIZE - 1)($a0); \
    219     LW  $v1, (OFFSET)($a1); \
    220     SUBU    $t0, $v0, $t8; \
    221     bne $v0, $v1, L(worddiff); \
    222     nor $t1, $v0, $t9; \
    223     and $t0, $t0, $t1; \
    224     bne $t0, $zero, L(returnzero);\
    225 
    226 L(uwordloop):
    227     SUBU $t1, $a2, (8 * NSIZE)
    228     bltz $t1, L(uonewords)
    229     USTRCMPW(0 * NSIZE)
    230     DELAY_READ
    231     USTRCMPW(1 * NSIZE)
    232     DELAY_READ
    233     USTRCMPW(2 * NSIZE)
    234     DELAY_READ
    235     USTRCMPW(3 * NSIZE)
    236     DELAY_READ
    237     USTRCMPW(4 * NSIZE)
    238     DELAY_READ
    239     USTRCMPW(5 * NSIZE)
    240     DELAY_READ
    241     USTRCMPW(6 * NSIZE)
    242     DELAY_READ
    243     USTRCMPW(7 * NSIZE)
    244     SUBU $a2, $a2, (8 * NSIZE)
    245     PTR_ADDIU $a0, $a0, (8 * NSIZE)
    246     b   L(uwordloop)
    247     PTR_ADDIU $a1, $a1, (8 * NSIZE)
    248 
    249 L(uonewords):
    250     SUBU $t1, $a2, NSIZE
    251     bltz $t1, L(byteloop)
    252     USTRCMPW(0)
    253     SUBU $a2, $a2, NSIZE
    254     PTR_ADDIU $a0, $a0, NSIZE
    255     b   L(uonewords)
    256     PTR_ADDIU $a1, $a1, NSIZE
    257 
    258 #endif
    259 
    260 L(returnzero):
    261     j   $ra
    262     move    $v0, $zero
    263 
    264 #if __mips_isa_rev > 1
    265 #define EXT_COMPARE01(POS) \
    266     EXT $t0, $v0, POS, 8; \
    267     beq $t0, $zero, L(wexit01); \
    268     EXT $t1, $v1, POS, 8; \
    269     bne $t0, $t1, L(wexit01)
    270 #define EXT_COMPARE89(POS) \
    271     EXT $t8, $v0, POS, 8; \
    272     beq $t8, $zero, L(wexit89); \
    273     EXT $t9, $v1, POS, 8; \
    274     bne $t8, $t9, L(wexit89)
    275 #else
    276 #define EXT_COMPARE01(POS) \
    277     SRL  $t0, $v0, POS; \
    278     SRL  $t1, $v1, POS; \
    279     andi $t0, $t0, 0xff; \
    280     beq  $t0, $zero, L(wexit01); \
    281     andi $t1, $t1, 0xff; \
    282     bne  $t0, $t1, L(wexit01)
    283 #define EXT_COMPARE89(POS) \
    284     SRL  $t8, $v0, POS; \
    285     SRL  $t9, $v1, POS; \
    286     andi $t8, $t8, 0xff; \
    287     beq  $t8, $zero, L(wexit89); \
    288     andi $t9, $t9, 0xff; \
    289     bne  $t8, $t9, L(wexit89)
    290 #endif
    291 
    292 L(worddiff):
    293 #ifdef USE_CLZ
    294     SUBU    $t0, $v0, $t8
    295     nor $t1, $v0, $t9
    296     and $t1, $t0, $t1
    297     xor $t0, $v0, $v1
    298     or  $t0, $t0, $t1
    299 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    300     wsbh    $t0, $t0
    301     rotr    $t0, $t0, 16
    302 # endif
    303     clz $t1, $t0
    304     and $t1, 0xf8
    305 # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    306     neg $t1
    307     addu    $t1, 24
    308 # endif
    309     rotrv   $v0, $v0, $t1
    310     rotrv   $v1, $v1, $t1
    311     and $v0, $v0, 0xff
    312     and $v1, $v1, 0xff
    313     j   $ra
    314     SUBU    $v0, $v0, $v1
    315 #else /* USE_CLZ */
    316 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    317     andi    $t0, $v0, 0xff
    318     beq $t0, $zero, L(wexit01)
    319     andi    $t1, $v1, 0xff
    320     bne $t0, $t1, L(wexit01)
    321     EXT_COMPARE89(8)
    322     EXT_COMPARE01(16)
    323 #ifndef __mips64
    324     SRL $t8, $v0, 24
    325     SRL $t9, $v1, 24
    326 #else
    327     EXT_COMPARE89(24)
    328     EXT_COMPARE01(32)
    329     EXT_COMPARE89(40)
    330     EXT_COMPARE01(48)
    331     SRL $t8, $v0, 56
    332     SRL $t9, $v1, 56
    333 #endif
    334 
    335 # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
    336 #ifdef __mips64
    337     SRL $t0, $v0, 56
    338     beq $t0, $zero, L(wexit01)
    339     SRL $t1, $v1, 56
    340     bne $t0, $t1, L(wexit01)
    341     EXT_COMPARE89(48)
    342     EXT_COMPARE01(40)
    343     EXT_COMPARE89(32)
    344     EXT_COMPARE01(24)
    345 #else
    346     SRL $t0, $v0, 24
    347     beq $t0, $zero, L(wexit01)
    348     SRL $t1, $v1, 24
    349     bne $t0, $t1, L(wexit01)
    350 #endif
    351     EXT_COMPARE89(16)
    352     EXT_COMPARE01(8)
    353 
    354     andi    $t8, $v0, 0xff
    355     andi    $t9, $v1, 0xff
    356 # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
    357 
    358 L(wexit89):
    359     j   $ra
    360     SUBU    $v0, $t8, $t9
    361 L(wexit01):
    362     j   $ra
    363     SUBU    $v0, $t0, $t1
    364 #endif /* USE_CLZ */
    365 
    366 L(byteloop):
    367     beq $a2, $zero, L(returnzero)
    368     SUBU $a2, $a2, 1
    369     BYTECMP01(0)
    370     nop
    371     beq $a2, $zero, L(returnzero)
    372     SUBU $a2, $a2, 1
    373     BYTECMP89(1)
    374     nop
    375     beq $a2, $zero, L(returnzero)
    376     SUBU $a2, $a2, 1
    377     BYTECMP01(2)
    378     nop
    379     beq $a2, $zero, L(returnzero)
    380     SUBU $a2, $a2, 1
    381     BYTECMP89(3)
    382     PTR_ADDIU $a0, $a0, 4
    383     b   L(byteloop)
    384     PTR_ADDIU $a1, $a1, 4
    385 
    386 L(bexit01):
    387     j   $ra
    388     SUBU    $v0, $v0, $v1
    389 L(bexit89):
    390     j   $ra
    391     SUBU    $v0, $t8, $t9
    392 
    393     .set    at
    394     .set    reorder
    395 
    396 END(STRNCMP_NAME)
    397 #ifndef __ANDROID__
    398 # ifdef _LIBC
    399 libc_hidden_builtin_def (STRNCMP_NAME)
    400 # endif
    401 #endif
    402