Home | History | Annotate | Download | only in string
      1 /*
      2  * Copyright (c) 2017 Imagination Technologies.
      3  *
      4  * All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  *
     10  *      * Redistributions of source code must retain the above copyright
     11  *        notice, this list of conditions and the following disclaimer.
     12  *      * Redistributions in binary form must reproduce the above copyright
     13  *        notice, this list of conditions and the following disclaimer
     14  *        in the documentation and/or other materials provided with
     15  *        the distribution.
     16  *      * Neither the name of Imagination Technologies nor the names of its
     17  *        contributors may be used to endorse or promote products derived
     18  *        from this software without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 #ifdef __ANDROID__
     34 # include <private/bionic_asm.h>
     35 #elif _LIBC
     36 # include <sysdep.h>
     37 # include <regdef.h>
     38 # include <sys/asm.h>
     39 #elif _COMPILING_NEWLIB
     40 # include "machine/asm.h"
     41 # include "machine/regdef.h"
     42 #else
     43 # include <regdef.h>
     44 # include <sys/asm.h>
     45 #endif
     46 
     47 #if __mips64
     48 # define NSIZE 8
     49 # define LW ld
     50 # define EXT dext
     51 # define SRL dsrl
     52 # define SLL dsll
     53 # define SUBU dsubu
     54 #else
     55 # define NSIZE 4
     56 # define LW lw
     57 # define EXT ext
     58 # define SRL srl
     59 # define SLL sll
     60 # define SUBU subu
     61 #endif
     62 
     63 /* Technically strcmp should not read past the end of the strings being
     64    compared.  We will read a full word that may contain excess bits beyond
     65    the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
     66    read the next word after the end of string.  Setting ENABLE_READAHEAD will
     67    improve performance but is technically illegal based on the definition of
     68    strcmp.  */
     69 #ifdef ENABLE_READAHEAD
     70 # define DELAY_READ
     71 #else
     72 # define DELAY_READ nop
     73 #endif
     74 
     75 /* Testing on a little endian machine showed using CLZ was a
     76    performance loss, so we are not turning it on by default.  */
     77 #if defined(ENABLE_CLZ) && (__mips_isa_rev > 1)
     78 # define USE_CLZ
     79 #endif
     80 
     81 /* Some asm.h files do not have the L macro definition.  */
     82 #ifndef L
     83 # if _MIPS_SIM == _ABIO32
     84 #  define L(label) $L ## label
     85 # else
     86 #  define L(label) .L ## label
     87 # endif
     88 #endif
     89 
     90 /* Some asm.h files do not have the PTR_ADDIU macro definition.  */
     91 #ifndef PTR_ADDIU
     92 # if _MIPS_SIM == _ABIO32
     93 #  define PTR_ADDIU       addiu
     94 # else
     95 #  define PTR_ADDIU       daddiu
     96 # endif
     97 #endif
     98 
     99 /* It might seem better to do the 'beq' instruction between the two 'lbu'
    100    instructions so that the nop is not needed but testing showed that this
    101    code is actually faster (based on glibc strcmp test).  */
    102 #define BYTECMP01(OFFSET) \
    103     lbu $v0, OFFSET($a0); \
    104     lbu $v1, OFFSET($a1); \
    105     beq $v0, $zero, L(bexit01); \
    106     nop; \
    107     bne $v0, $v1, L(bexit01)
    108 
    109 #define BYTECMP89(OFFSET) \
    110     lbu $t8, OFFSET($a0); \
    111     lbu $t9, OFFSET($a1); \
    112     beq $t8, $zero, L(bexit89); \
    113     nop;    \
    114     bne $t8, $t9, L(bexit89)
    115 
    116 /* Allow the routine to be named something else if desired.  */
    117 #ifndef STRCMP_NAME
    118 # define STRCMP_NAME strcmp
    119 #endif
    120 
    121 #ifdef __ANDROID__
    122 LEAF(STRCMP_NAME, 0)
    123 #else
    124 LEAF(STRCMP_NAME)
    125 #endif
    126     .set    nomips16
    127     .set    noreorder
    128 
    129     andi $t1, $a1, (NSIZE - 1)
    130     beqz $t1, L(exitalign)
    131     or   $t0, $zero, NSIZE
    132     SUBU $t1, $t0, $t1 #process (NSIZE - 1) bytes at max
    133 
    134 L(alignloop): #do by bytes until a1 aligned
    135     BYTECMP01(0)
    136     SUBU $t1, $t1, 0x1
    137     PTR_ADDIU $a0, $a0, 0x1
    138     bnez  $t1, L(alignloop)
    139     PTR_ADDIU $a1, $a1, 0x1
    140 
    141 L(exitalign):
    142 
    143 /* string a1 is NSIZE byte aligned at this point. */
    144 
    145     lui $t8, 0x0101
    146     ori $t8, 0x0101
    147     lui $t9, 0x7f7f
    148     ori $t9, 0x7f7f
    149 #if __mips64
    150     dsll $t1, $t8, 32
    151     or  $t8, $t1
    152     dsll $t1, $t9, 32
    153     or  $t9, $t1
    154 #endif
    155 
    156     andi $t2, $a0, (NSIZE - 1) #check if a0 aligned
    157     SUBU $t3, $t0, $t2 #t3 will be used as shifter
    158     bnez $t2, L(uloopenter)
    159     SUBU $a2, $a0, $t2 #bring back a0 to aligned position
    160 
    161 #define STRCMPW(OFFSET) \
    162     LW   $v0, OFFSET($a0); \
    163     LW   $v1, OFFSET($a1); \
    164     SUBU $t0, $v0, $t8; \
    165     bne  $v0, $v1, L(worddiff); \
    166     nor  $t1, $v0, $t9; \
    167     and  $t0, $t0, $t1; \
    168     bne  $t0, $zero, L(returnzero);\
    169 
    170 L(wordloop):
    171     STRCMPW(0 * NSIZE)
    172     DELAY_READ
    173     STRCMPW(1 * NSIZE)
    174     DELAY_READ
    175     STRCMPW(2 * NSIZE)
    176     DELAY_READ
    177     STRCMPW(3 * NSIZE)
    178     DELAY_READ
    179     STRCMPW(4 * NSIZE)
    180     DELAY_READ
    181     STRCMPW(5 * NSIZE)
    182     DELAY_READ
    183     STRCMPW(6 * NSIZE)
    184     DELAY_READ
    185     STRCMPW(7 * NSIZE)
    186     PTR_ADDIU $a0, $a0, (8 * NSIZE)
    187     b   L(wordloop)
    188     PTR_ADDIU $a1, $a1, (8 * NSIZE)
    189 
    190 #define USTRCMPW(OFFSET) \
    191     LW  $v1, OFFSET($a1); \
    192     SUBU    $t0, $v0, $t8; \
    193     nor $t1, $v0, $t9; \
    194     and $t0, $t0, $t1; \
    195     bne $t0, $zero, L(worddiff); \
    196     SRL $v0, $t2; \
    197     LW  $a3, (OFFSET + NSIZE)($a2); \
    198     SUBU    $t0, $v1, $t8; \
    199     SLL $t1, $a3, $t3; \
    200     or $v0, $v0, $t1; \
    201     bne $v0, $v1, L(worddiff); \
    202     nor $t1, $v1, $t9; \
    203     and $t0, $t0, $t1; \
    204     bne $t0, $zero, L(returnzero); \
    205     move $v0, $a3;\
    206 
    207 L(uloopenter):
    208     LW  $v0, 0($a2)
    209     SLL $t2, 3  #multiply by 8
    210     SLL $t3, 3  #multiply by 8
    211     li  $a3, -1 #all 1s
    212     SRL $a3, $t3
    213     or $v0, $a3 #replace with all 1s if zeros in unintented read
    214 
    215 L(uwordloop):
    216     USTRCMPW(0 * NSIZE)
    217     USTRCMPW(1 * NSIZE)
    218     USTRCMPW(2 * NSIZE)
    219     USTRCMPW(3 * NSIZE)
    220     USTRCMPW(4 * NSIZE)
    221     USTRCMPW(5 * NSIZE)
    222     USTRCMPW(6 * NSIZE)
    223     USTRCMPW(7 * NSIZE)
    224     PTR_ADDIU $a2, $a2, (8 * NSIZE)
    225     b   L(uwordloop)
    226     PTR_ADDIU $a1, $a1, (8 * NSIZE)
    227 
    228 L(returnzero):
    229     j   $ra
    230     move    $v0, $zero
    231 
    232 #if __mips_isa_rev > 1
    233 #define EXT_COMPARE01(POS) \
    234     EXT $t0, $v0, POS, 8; \
    235     beq $t0, $zero, L(wexit01); \
    236     EXT $t1, $v1, POS, 8; \
    237     bne $t0, $t1, L(wexit01)
    238 #define EXT_COMPARE89(POS) \
    239     EXT $t8, $v0, POS, 8; \
    240     beq $t8, $zero, L(wexit89); \
    241     EXT $t9, $v1, POS, 8; \
    242     bne $t8, $t9, L(wexit89)
    243 #else
    244 #define EXT_COMPARE01(POS) \
    245     SRL  $t0, $v0, POS; \
    246     SRL  $t1, $v1, POS; \
    247     andi $t0, $t0, 0xff; \
    248     beq  $t0, $zero, L(wexit01); \
    249     andi $t1, $t1, 0xff; \
    250     bne  $t0, $t1, L(wexit01)
    251 #define EXT_COMPARE89(POS) \
    252     SRL  $t8, $v0, POS; \
    253     SRL  $t9, $v1, POS; \
    254     andi $t8, $t8, 0xff; \
    255     beq  $t8, $zero, L(wexit89); \
    256     andi $t9, $t9, 0xff; \
    257     bne  $t8, $t9, L(wexit89)
    258 #endif
    259 
    260 L(worddiff):
    261 #ifdef USE_CLZ
    262     SUBU    $t0, $v0, $t8
    263     nor $t1, $v0, $t9
    264     and $t1, $t0, $t1
    265     xor $t0, $v0, $v1
    266     or  $t0, $t0, $t1
    267 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    268     wsbh    $t0, $t0
    269     rotr    $t0, $t0, 16
    270 # endif
    271     clz $t1, $t0
    272     and $t1, 0xf8
    273 # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    274     neg $t1
    275     addu    $t1, 24
    276 # endif
    277     rotrv   $v0, $v0, $t1
    278     rotrv   $v1, $v1, $t1
    279     and $v0, $v0, 0xff
    280     and $v1, $v1, 0xff
    281     j   $ra
    282     SUBU    $v0, $v0, $v1
    283 #else /* USE_CLZ */
    284 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    285     andi    $t0, $v0, 0xff
    286     beq $t0, $zero, L(wexit01)
    287     andi    $t1, $v1, 0xff
    288     bne $t0, $t1, L(wexit01)
    289     EXT_COMPARE89(8)
    290     EXT_COMPARE01(16)
    291 #ifndef __mips64
    292     SRL $t8, $v0, 24
    293     SRL $t9, $v1, 24
    294 #else
    295     EXT_COMPARE89(24)
    296     EXT_COMPARE01(32)
    297     EXT_COMPARE89(40)
    298     EXT_COMPARE01(48)
    299     SRL $t8, $v0, 56
    300     SRL $t9, $v1, 56
    301 #endif
    302 
    303 # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
    304 #ifdef __mips64
    305     SRL $t0, $v0, 56
    306     beq $t0, $zero, L(wexit01)
    307     SRL $t1, $v1, 56
    308     bne $t0, $t1, L(wexit01)
    309     EXT_COMPARE89(48)
    310     EXT_COMPARE01(40)
    311     EXT_COMPARE89(32)
    312     EXT_COMPARE01(24)
    313 #else
    314     SRL $t0, $v0, 24
    315     beq $t0, $zero, L(wexit01)
    316     SRL $t1, $v1, 24
    317     bne $t0, $t1, L(wexit01)
    318 #endif
    319     EXT_COMPARE89(16)
    320     EXT_COMPARE01(8)
    321 
    322     andi    $t8, $v0, 0xff
    323     andi    $t9, $v1, 0xff
    324 # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
    325 
    326 L(wexit89):
    327     j   $ra
    328     SUBU    $v0, $t8, $t9
    329 L(wexit01):
    330     j   $ra
    331     SUBU    $v0, $t0, $t1
    332 #endif /* USE_CLZ */
    333 
    334 L(byteloop):
    335     BYTECMP01(0)
    336     BYTECMP89(1)
    337     BYTECMP01(2)
    338     BYTECMP89(3)
    339     BYTECMP01(4)
    340     BYTECMP89(5)
    341     BYTECMP01(6)
    342     BYTECMP89(7)
    343     PTR_ADDIU $a0, $a0, 8
    344     b   L(byteloop)
    345     PTR_ADDIU $a1, $a1, 8
    346 
    347 L(bexit01):
    348     j   $ra
    349     SUBU    $v0, $v0, $v1
    350 L(bexit89):
    351     j   $ra
    352     SUBU    $v0, $t8, $t9
    353 
    354     .set    at
    355     .set    reorder
    356 
    357 END(STRCMP_NAME)
    358 #ifndef __ANDROID__
    359 # ifdef _LIBC
    360 libc_hidden_builtin_def (STRCMP_NAME)
    361 # endif
    362 #endif
    363