Home | History | Annotate | Download | only in string
      1 /*
      2  * Copyright (c) 2013
      3  *      MIPS Technologies, Inc., California.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
     14  *    contributors may be used to endorse or promote products derived from
     15  *    this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
     18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
     21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 #ifdef __ANDROID__
     31 # include <private/bionic_asm.h>
     32 # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
     33 #elif _LIBC
     34 # include <sysdep.h>
     35 # include <regdef.h>
     36 # include <sys/asm.h>
     37 # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
     38 #elif _COMPILING_NEWLIB
     39 # include "machine/asm.h"
     40 # include "machine/regdef.h"
     41 # define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
     42 #else
     43 # include <regdef.h>
     44 # include <sys/asm.h>
     45 #endif
     46 
     47 /* Check to see if the MIPS architecture we are compiling for supports
     48    prefetching.  */
     49 
     50 #if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64)
     51 # ifndef DISABLE_PREFETCH
     52 #  define USE_PREFETCH
     53 # endif
     54 #endif
     55 
     56 #if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
     57 # ifndef DISABLE_DOUBLE
     58 #  define USE_DOUBLE
     59 # endif
     60 #endif
     61 
     62 #ifndef USE_DOUBLE
     63 # ifndef DISABLE_DOUBLE_ALIGN
     64 #  define DOUBLE_ALIGN
     65 # endif
     66 #endif
     67 
     68 /* Some asm.h files do not have the L macro definition.  */
     69 #ifndef L
     70 # if _MIPS_SIM == _ABIO32
     71 #  define L(label) $L ## label
     72 # else
     73 #  define L(label) .L ## label
     74 # endif
     75 #endif
     76 
     77 /* Some asm.h files do not have the PTR_ADDIU macro definition.  */
     78 #ifndef PTR_ADDIU
     79 # if _MIPS_SIM == _ABIO32
     80 #  define PTR_ADDIU	addiu
     81 # else
     82 #  define PTR_ADDIU	daddiu
     83 # endif
     84 #endif
     85 
     86 /* New R6 instructions that may not be in asm.h.  */
     87 #ifndef PTR_LSA
     88 # if _MIPS_SIM == _ABIO32
     89 #  define PTR_LSA        lsa
     90 # else
     91 #  define PTR_LSA        dlsa
     92 # endif
     93 #endif
     94 
     95 /* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
     96    or PREFETCH_STORE_STREAMED offers a large performance advantage
     97    but PREPAREFORSTORE has some special restrictions to consider.
     98 
     99    Prefetch with the 'prepare for store' hint does not copy a memory
    100    location into the cache, it just allocates a cache line and zeros
    101    it out.  This means that if you do not write to the entire cache
    102    line before writing it out to memory some data will get zero'ed out
    103    when the cache line is written back to memory and data will be lost.
    104 
    105    There are ifdef'ed sections of this memcpy to make sure that it does not
    106    do prefetches on cache lines that are not going to be completely written.
    107    This code is only needed and only used when PREFETCH_STORE_HINT is set to
    108    PREFETCH_HINT_PREPAREFORSTORE.  This code assumes that cache lines are
    109    less than MAX_PREFETCH_SIZE bytes and if the cache line is larger it will
    110    not work correctly.  */
    111 
    112 #ifdef USE_PREFETCH
    113 # define PREFETCH_HINT_STORE		1
    114 # define PREFETCH_HINT_STORE_STREAMED	5
    115 # define PREFETCH_HINT_STORE_RETAINED	7
    116 # define PREFETCH_HINT_PREPAREFORSTORE	30
    117 
    118 /* If we have not picked out what hints to use at this point use the
    119    standard load and store prefetch hints.  */
    120 # ifndef PREFETCH_STORE_HINT
    121 #  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
    122 # endif
    123 
    124 /* We double everything when USE_DOUBLE is true so we do 2 prefetches to
    125    get 64 bytes in that case.  The assumption is that each individual
    126    prefetch brings in 32 bytes.  */
    127 # ifdef USE_DOUBLE
    128 #  define PREFETCH_CHUNK 64
    129 #  define PREFETCH_FOR_STORE(chunk, reg) \
    130     pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
    131     pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
    132 # else
    133 #  define PREFETCH_CHUNK 32
    134 #  define PREFETCH_FOR_STORE(chunk, reg) \
    135     pref PREFETCH_STORE_HINT, (chunk)*32(reg)
    136 # endif
    137 
    138 /* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
    139    than PREFETCH_CHUNK, the assumed size of each prefetch.  If the real size
    140    of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
    141    hint is used, the code will not work correctly.  If PREPAREFORSTORE is not
    142    used than MAX_PREFETCH_SIZE does not matter.  */
    143 # define MAX_PREFETCH_SIZE 128
    144 /* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
    145    than 5 on a STORE prefetch and that a single prefetch can never be larger
    146    than MAX_PREFETCH_SIZE.  We add the extra 32 when USE_DOUBLE is set because
    147    we actually do two prefetches in that case, one 32 bytes after the other.  */
    148 # ifdef USE_DOUBLE
    149 #  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
    150 # else
    151 #  define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
    152 # endif
    153 
    154 # if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
    155     && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
    156 /* We cannot handle this because the initial prefetches may fetch bytes that
    157    are before the buffer being copied.  We start copies with an offset
    158    of 4 so avoid this situation when using PREPAREFORSTORE.  */
    159 #  error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
    160 # endif
    161 #else /* USE_PREFETCH not defined */
    162 # define PREFETCH_FOR_STORE(offset, reg)
    163 #endif
    164 
    165 #if __mips_isa_rev > 5
    166 # if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
    167 #  undef PREFETCH_STORE_HINT
    168 #  define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
    169 # endif
    170 # define R6_CODE
    171 #endif
    172 
    173 /* We load/store 64 bits at a time when USE_DOUBLE is true.
    174    The C_ prefix stands for CHUNK and is used to avoid macro name
    175    conflicts with system header files.  */
    176 
    177 #ifdef USE_DOUBLE
    178 # define C_ST	sd
    179 # if __MIPSEB
    180 #  define C_STHI	sdl	/* high part is left in big-endian	*/
    181 # else
    182 #  define C_STHI	sdr	/* high part is right in little-endian	*/
    183 # endif
    184 #else
    185 # define C_ST	sw
    186 # if __MIPSEB
    187 #  define C_STHI	swl	/* high part is left in big-endian	*/
    188 # else
    189 #  define C_STHI	swr	/* high part is right in little-endian	*/
    190 # endif
    191 #endif
    192 
    193 /* Bookkeeping values for 32 vs. 64 bit mode.  */
    194 #ifdef USE_DOUBLE
    195 # define NSIZE 8
    196 # define NSIZEMASK 0x3f
    197 # define NSIZEDMASK 0x7f
    198 #else
    199 # define NSIZE 4
    200 # define NSIZEMASK 0x1f
    201 # define NSIZEDMASK 0x3f
    202 #endif
    203 #define UNIT(unit) ((unit)*NSIZE)
    204 #define UNITM1(unit) (((unit)*NSIZE)-1)
    205 
    206 #ifdef __ANDROID__
    207 LEAF(__memset_chk,0)
    208 #else
    209 LEAF(__memset_chk)
    210 #endif
    211 	.set	noreorder
    212         sltu    $t2, $a3, $a2
    213         beq     $t2, $zero, memset
    214         nop
    215         .cpsetup $t9, $t8, __memset_chk
    216         LA      $t9, __memset_chk_fail
    217         jr      $t9
    218         nop
    219         .set	reorder
    220 END(__memset_chk)
    221 
    222 #ifdef __ANDROID__
    223 LEAF(memset,0)
    224 #else
    225 LEAF(memset)
    226 #endif
    227 
    228 	.set	nomips16
    229 	.set	noreorder
    230 /* If the size is less than 2*NSIZE (8 or 16), go to L(lastb).  Regardless of
    231    size, copy dst pointer to v0 for the return value.  */
    232 	slti	$t2,$a2,(2 * NSIZE)
    233 	bne	$t2,$zero,L(lastb)
    234 	move	$v0,$a0
    235 
    236 /* If memset value is not zero, we copy it to all the bytes in a 32 or 64
    237    bit word.  */
    238 	beq	$a1,$zero,L(set0)		/* If memset value is zero no smear  */
    239 	PTR_SUBU $a3,$zero,$a0
    240 	nop
    241 
    242 	/* smear byte into 32 or 64 bit word */
    243 #if ((__mips == 64) || (__mips == 32)) && (__mips_isa_rev >= 2)
    244 # ifdef USE_DOUBLE
    245 	dins	$a1, $a1, 8, 8        /* Replicate fill byte into half-word.  */
    246 	dins	$a1, $a1, 16, 16      /* Replicate fill byte into word.       */
    247 	dins	$a1, $a1, 32, 32      /* Replicate fill byte into dbl word.   */
    248 # else
    249 	ins	$a1, $a1, 8, 8        /* Replicate fill byte into half-word.  */
    250 	ins	$a1, $a1, 16, 16      /* Replicate fill byte into word.       */
    251 # endif
    252 #else
    253 # ifdef USE_DOUBLE
    254         and     $a1,0xff
    255 	dsll	$t2,$a1,8
    256 	or	$a1,$t2
    257 	dsll	$t2,$a1,16
    258 	or	$a1,$t2
    259 	dsll	$t2,$a1,32
    260 	or	$a1,$t2
    261 # else
    262         and     $a1,0xff
    263 	sll	$t2,$a1,8
    264 	or	$a1,$t2
    265 	sll	$t2,$a1,16
    266 	or	$a1,$t2
    267 # endif
    268 #endif
    269 
    270 /* If the destination address is not aligned do a partial store to get it
    271    aligned.  If it is already aligned just jump to L(aligned).  */
    272 L(set0):
    273 #ifndef R6_CODE
    274 	andi	$t2,$a3,(NSIZE-1)		/* word-unaligned address?          */
    275 	beq	$t2,$zero,L(aligned)	/* t2 is the unalignment count      */
    276 	PTR_SUBU $a2,$a2,$t2
    277 	C_STHI	$a1,0($a0)
    278 	PTR_ADDU $a0,$a0,$t2
    279 #else /* R6_CODE */
    280 	andi	$t2,$a0,(NSIZE-1)
    281 	lapc	$t9,L(atable)
    282 	PTR_LSA	$t9,$t2,$t9,2
    283 	jrc	$t9
    284 L(atable):
    285 	bc	L(aligned)
    286 # ifdef USE_DOUBLE
    287 	bc	L(lb7)
    288 	bc	L(lb6)
    289 	bc	L(lb5)
    290 	bc	L(lb4)
    291 # endif
    292 	bc	L(lb3)
    293 	bc	L(lb2)
    294 	bc	L(lb1)
    295 L(lb7):
    296 	sb	$a1,6($a0)
    297 L(lb6):
    298 	sb	$a1,5($a0)
    299 L(lb5):
    300 	sb	$a1,4($a0)
    301 L(lb4):
    302 	sb	$a1,3($a0)
    303 L(lb3):
    304 	sb	$a1,2($a0)
    305 L(lb2):
    306 	sb	$a1,1($a0)
    307 L(lb1):
    308 	sb	$a1,0($a0)
    309 
    310 	li	$t9,NSIZE
    311 	subu	$t2,$t9,$t2
    312 	PTR_SUBU $a2,$a2,$t2
    313 	PTR_ADDU $a0,$a0,$t2
    314 #endif /* R6_CODE */
    315 
    316 L(aligned):
    317 /* If USE_DOUBLE is not set we may still want to align the data on a 16
    318    byte boundry instead of an 8 byte boundry to maximize the opportunity
    319    of proAptiv chips to do memory bonding (combining two sequential 4
    320    byte stores into one 8 byte store).  We know there are at least 4 bytes
    321    left to store or we would have jumped to L(lastb) earlier in the code.  */
    322 #ifdef DOUBLE_ALIGN
    323 	andi	$t2,$a3,4
    324 	beq	$t2,$zero,L(double_aligned)
    325 	PTR_SUBU $a2,$a2,$t2
    326 	sw	$a1,0($a0)
    327 	PTR_ADDU $a0,$a0,$t2
    328 L(double_aligned):
    329 #endif
    330 
    331 /* Now the destination is aligned to (word or double word) aligned address
    332    Set a2 to count how many bytes we have to copy after all the 64/128 byte
    333    chunks are copied and a3 to the dest pointer after all the 64/128 byte
    334    chunks have been copied.  We will loop, incrementing a0 until it equals
    335    a3.  */
    336 	andi	$t8,$a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
    337 	beq	$a2,$t8,L(chkw)	 /* if a2==t8, no 64-byte/128-byte chunks */
    338 	PTR_SUBU $a3,$a2,$t8	 /* subtract from a2 the reminder */
    339 	PTR_ADDU $a3,$a0,$a3	 /* Now a3 is the final dst after loop */
    340 
    341 /* When in the loop we may prefetch with the 'prepare to store' hint,
    342    in this case the a0+x should not be past the "t0-32" address.  This
    343    means: for x=128 the last "safe" a0 address is "t0-160".  Alternatively,
    344    for x=64 the last "safe" a0 address is "t0-96" In the current version we
    345    will use "prefetch hint,128(a0)", so "t0-160" is the limit.  */
    346 #if defined(USE_PREFETCH) \
    347     && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
    348 	PTR_ADDU $t0,$a0,$a2		/* t0 is the "past the end" address */
    349 	PTR_SUBU $t9,$t0,PREFETCH_LIMIT	/* t9 is the "last safe pref" address */
    350 #endif
    351 #if defined(USE_PREFETCH) \
    352     && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
    353 	PREFETCH_FOR_STORE (1, $a0)
    354 	PREFETCH_FOR_STORE (2, $a0)
    355 	PREFETCH_FOR_STORE (3, $a0)
    356 #endif
    357 
    358 L(loop16w):
    359 #if defined(USE_PREFETCH) \
    360     && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
    361 	sltu	$v1,$t9,$a0		/* If a0 > t9 don't use next prefetch */
    362 	bgtz	$v1,L(skip_pref)
    363 	nop
    364 #endif
    365 #ifndef R6_CODE
    366 	PREFETCH_FOR_STORE (4, $a0)
    367 	PREFETCH_FOR_STORE (5, $a0)
    368 #else
    369 	PREFETCH_FOR_STORE (2, $a0)
    370 #endif
    371 L(skip_pref):
    372 	C_ST	$a1,UNIT(0)($a0)
    373 	C_ST	$a1,UNIT(1)($a0)
    374 	C_ST	$a1,UNIT(2)($a0)
    375 	C_ST	$a1,UNIT(3)($a0)
    376 	C_ST	$a1,UNIT(4)($a0)
    377 	C_ST	$a1,UNIT(5)($a0)
    378 	C_ST	$a1,UNIT(6)($a0)
    379 	C_ST	$a1,UNIT(7)($a0)
    380 	C_ST	$a1,UNIT(8)($a0)
    381 	C_ST	$a1,UNIT(9)($a0)
    382 	C_ST	$a1,UNIT(10)($a0)
    383 	C_ST	$a1,UNIT(11)($a0)
    384 	C_ST	$a1,UNIT(12)($a0)
    385 	C_ST	$a1,UNIT(13)($a0)
    386 	C_ST	$a1,UNIT(14)($a0)
    387 	C_ST	$a1,UNIT(15)($a0)
    388 	PTR_ADDIU $a0,$a0,UNIT(16)	/* adding 64/128 to dest */
    389 	bne	$a0,$a3,L(loop16w)
    390 	nop
    391 	move	$a2,$t8
    392 
    393 /* Here we have dest word-aligned but less than 64-bytes or 128 bytes to go.
    394    Check for a 32(64) byte chunk and copy if if there is one.  Otherwise
    395    jump down to L(chk1w) to handle the tail end of the copy.  */
    396 L(chkw):
    397 	andi	$t8,$a2,NSIZEMASK	/* is there a 32-byte/64-byte chunk.  */
    398 				/* the t8 is the reminder count past 32-bytes */
    399 	beq	$a2,$t8,L(chk1w)/* when a2==t8, no 32-byte chunk */
    400 	nop
    401 	C_ST	$a1,UNIT(0)($a0)
    402 	C_ST	$a1,UNIT(1)($a0)
    403 	C_ST	$a1,UNIT(2)($a0)
    404 	C_ST	$a1,UNIT(3)($a0)
    405 	C_ST	$a1,UNIT(4)($a0)
    406 	C_ST	$a1,UNIT(5)($a0)
    407 	C_ST	$a1,UNIT(6)($a0)
    408 	C_ST	$a1,UNIT(7)($a0)
    409 	PTR_ADDIU $a0,$a0,UNIT(8)
    410 
    411 /* Here we have less than 32(64) bytes to set.  Set up for a loop to
    412    copy one word (or double word) at a time.  Set a2 to count how many
    413    bytes we have to copy after all the word (or double word) chunks are
    414    copied and a3 to the dest pointer after all the (d)word chunks have
    415    been copied.  We will loop, incrementing a0 until a0 equals a3.  */
    416 L(chk1w):
    417 	andi	$a2,$t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */
    418 	beq	$a2,$t8,L(lastb)
    419 	PTR_SUBU $a3,$t8,$a2	/* a3 is count of bytes in one (d)word chunks */
    420 	PTR_ADDU $a3,$a0,$a3	/* a3 is the dst address after loop */
    421 
    422 /* copying in words (4-byte or 8 byte chunks) */
    423 L(wordCopy_loop):
    424 	PTR_ADDIU $a0,$a0,UNIT(1)
    425 	bne	$a0,$a3,L(wordCopy_loop)
    426 	C_ST	$a1,UNIT(-1)($a0)
    427 
    428 /* Copy the last 8 (or 16) bytes */
    429 L(lastb):
    430 	blez	$a2,L(leave)
    431 	PTR_ADDU $a3,$a0,$a2       /* a3 is the last dst address */
    432 L(lastbloop):
    433 	PTR_ADDIU $a0,$a0,1
    434 	bne	$a0,$a3,L(lastbloop)
    435 	sb	$a1,-1($a0)
    436 L(leave):
    437 	j	$ra
    438 	nop
    439 
    440 	.set	at
    441 	.set	reorder
    442 END(memset)
    443 #ifndef __ANDROID__
    444 # ifdef _LIBC
    445 libc_hidden_builtin_def (memset)
    446 libc_hidden_builtin_def (__memset_chk)
    447 # endif
    448 #endif
    449