Home | History | Annotate | Download | only in string
      1 /*
      2  * Copyright (c) 2009
      3  *      MIPS Technologies, Inc., California.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
     14  *    contributors may be used to endorse or promote products derived from
     15  *    this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
     18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
     21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 /************************************************************************
     31  *
     32  *  memset.S, version "64h" with 1 cache line horizon for "pref 30" and 14 nops
     33  *  Version: "043009"
     34  *
     35  ************************************************************************/
     36 
     37 
     38 /************************************************************************
     39  *  Include files
     40  ************************************************************************/
     41 
     42 #include <private/bionic_asm.h>
     43 
     44 /*
     45  * This routine could be optimized for MIPS64. The current code only
     46  * uses MIPS32 instructions.
     47  */
     48 
     49 #if defined(__MIPSEB__)
     50 #  define SWHI	swl		/* high part is left in big-endian	*/
     51 #  define SWLO	swr		/* low part is right in big-endian	*/
     52 #endif
     53 
     54 #if defined(__MIPSEL__)
     55 #  define SWHI	swr		/* high part is right in little-endian	*/
     56 #  define SWLO	swl		/* low part is left in little-endian	*/
     57 #endif
     58 
     59 #if !(defined(XGPROF) || defined(XPROF))
     60 #undef SETUP_GP
     61 #define SETUP_GP
     62 #endif
     63 
     64 #ifdef NDEBUG
     65 #define DBG #
     66 #else
     67 #define DBG
     68 #endif
     69 
     70 /*
     71  * void _memset16(uint16_t* dst, uint16_t value, size_t size);
     72  */
     73 
     74 LEAF(_memset16,0)
     75 	.set noreorder
     76 DBG	/* Check parameters */
     77 DBG	andi	t0,a0,1			# a0 must be halfword aligned
     78 DBG	tne	t0,zero
     79 DBG	andi	t2,a2,1			# a2 must be even
     80 DBG	tne	t2,zero
     81 
     82 #ifdef FIXARGS
     83 	# ensure count is even
     84 #if (__mips==32) && (__mips_isa_rev>=2)
     85 	ins	a2,zero,0,1
     86 #else
     87 	ori	a2,1
     88 	xori	a2,1
     89 #endif
     90 #endif
     91 
     92 #if (__mips==32) && (__mips_isa_rev>=2)
     93 	ins	a1,a1,16,16
     94 #else
     95 	andi	a1,0xffff
     96 	sll	t3,a1,16
     97 	or	a1,t3
     98 #endif
     99 
    100 	beqz	a2,.Ldone
    101 	 andi	t1,a0,2
    102 	beqz	t1,.Lalignok
    103 	 addu	t0,a0,a2		# t0 is the "past the end" address
    104 	sh	a1,0(a0)		# store one halfword to get aligned
    105 	addu	a0,2
    106 	subu	a2,2
    107 .Lalignok:
    108 	slti	t1,a2,4			# .Laligned for 4 or more bytes
    109 	beqz	t1,.Laligned
    110 	 sne	t1,a2,2			# one more halfword?
    111 	bnez	t1,.Ldone
    112 	 nop
    113 	sh	a1,0(a0)
    114 .Ldone:
    115 	j	ra
    116 	 nop
    117 	.set reorder
    118 END(_memset16)
    119 
    120 /*
    121  * void _memset32(uint32_t* dst, uint32_t value, size_t size);
    122  */
    123 
    124 LEAF(_memset32,0)
    125 	.set noreorder
    126 DBG	/* Check parameters */
    127 DBG	andi	t0,a0,3			# a0 must be word aligned
    128 DBG	tne	t0,zero
    129 DBG	andi	t2,a2,3			# a2 must be a multiple of 4 bytes
    130 DBG	tne	t2,zero
    131 
    132 #ifdef FIXARGS
    133 	# ensure count is a multiple of 4
    134 #if (__mips==32) && (__mips_isa_rev>=2)
    135 	ins	$a2,$0,0,2
    136 #else
    137 	ori	a2,3
    138 	xori	a2,3
    139 #endif
    140 #endif
    141 
    142 	bnez	a2,.Laligned		# any work to do?
    143 	 addu	t0,a0,a2		# t0 is the "past the end" address
    144 
    145 	j	ra
    146 	 nop
    147 	.set reorder
    148 END(_memset32)
    149 
    150 LEAF(memset,0)
    151 
    152 	.set	noreorder
    153 	.set	noat
    154 
    155 	addu	t0,a0,a2		# t0 is the "past the end" address
    156 	slti	AT,a2,4			# is a2 less than 4?
    157 	bne	AT,zero,.Llast4		# if yes, go to last4
    158 	 move	v0,a0			# memset returns the dst pointer
    159 
    160 	beq	a1,zero,.Lset0
    161 	 subu	v1,zero,a0
    162 
    163 	# smear byte into 32 bit word
    164 #if (__mips==32) && (__mips_isa_rev>=2)
    165 	ins     a1, a1, 8, 8        # Replicate fill byte into half-word.
    166 	ins     a1, a1, 16, 16      # Replicate fill byte into word.
    167 #else
    168 	and	a1,0xff
    169 	sll	AT,a1,8
    170 	or	a1,AT
    171 	sll	AT,a1,16
    172 	or	a1,AT
    173 #endif
    174 
    175 .Lset0:
    176 	andi	v1,v1,0x3		# word-unaligned address?
    177 	beq	v1,zero,.Laligned	# v1 is the unalignment count
    178 	 subu	a2,a2,v1
    179 	SWHI	a1,0(a0)
    180 	addu	a0,a0,v1
    181 
    182 # Here we have the "word-aligned" a0 (until the "last4")
    183 .Laligned:
    184 	andi	t8,a2,0x3f	# any 64-byte chunks?
    185 				# t8 is the byte count past 64-byte chunks
    186 	beq	a2,t8,.Lchk8w	# when a2==t8, no 64-byte chunks
    187 				# There will be at most 1 32-byte chunk then
    188 	 subu	a3,a2,t8	# subtract from a2 the reminder
    189 				# Here a3 counts bytes in 16w chunks
    190 	addu	a3,a0,a3	# Now a3 is the final dst after 64-byte chunks
    191 
    192 # Find out, if there are any 64-byte chunks after which will be still at least
    193 # 96 bytes left. The value "96" is calculated as needed buffer for
    194 # "pref 30,64(a0)" prefetch, which can be used as "pref 30,0(a0)" after
    195 # incrementing "a0" by 64.
    196 # For "a2" below 160 there will be no such "pref 30 safe" 64-byte chunk.
    197 #
    198 	sltiu	v1,a2,160
    199 	bgtz	v1,.Lloop16w_nopref30	# skip "pref 30,0(a0)"
    200 	 subu	t7,a2,96	# subtract "pref 30 unsafe" region
    201 		# below we have at least 1 64-byte chunk which is "pref 30 safe"
    202 	andi	t6,t7,0x3f	# t6 is past "64-byte safe chunks" reminder
    203 	subu	t5,t7,t6	# subtract from t7 the reminder
    204 				# Here t5 counts bytes in 16w "safe" chunks
    205 	addu	t4,a0,t5	# Now t4 is the dst after 64-byte "safe" chunks
    206 
    207 # Don't use "pref 30,0(a0)" for a0 in a "middle" of a cache line
    208 #	pref	30,0(a0)
    209 # Here we are in the region, where it is safe to use "pref 30,64(a0)"
    210 .Lloop16w:
    211 	addiu	a0,a0,64
    212 	pref	30,-32(a0)	# continue setting up the dest, addr 64-32
    213 	sw	a1,-64(a0)
    214 	sw	a1,-60(a0)
    215 	sw	a1,-56(a0)
    216 	sw	a1,-52(a0)
    217 	sw	a1,-48(a0)
    218 	sw	a1,-44(a0)
    219 	sw	a1,-40(a0)
    220 	sw	a1,-36(a0)
    221 	nop
    222 	nop			# the extra nop instructions help to balance
    223 	nop			# cycles needed for "store" + "fill" + "evict"
    224 	nop			# For 64byte store there are needed 8 fill
    225 	nop			# and 8 evict cycles, i.e. at least 32 instr.
    226 	nop
    227 	nop
    228 	pref	30,0(a0)	# continue setting up the dest, addr 64-0
    229 	sw	a1,-32(a0)
    230 	sw	a1,-28(a0)
    231 	sw	a1,-24(a0)
    232 	sw	a1,-20(a0)
    233 	sw	a1,-16(a0)
    234 	sw	a1,-12(a0)
    235 	sw	a1,-8(a0)
    236 	sw	a1,-4(a0)
    237 	nop
    238 	nop
    239 	nop
    240 	nop			# NOTE: adding 14 nop-s instead of 12 nop-s
    241 	nop			# gives better results for "fast" memory
    242 	nop
    243 	bne	a0,t4,.Lloop16w
    244 	 nop
    245 
    246 	beq	a0,a3,.Lchk8w	# maybe no more 64-byte chunks?
    247 	 nop			# this "delayed slot" is useless ...
    248 
    249 .Lloop16w_nopref30:	# there could be up to 3 "64-byte nopref30" chunks
    250 	addiu	a0,a0,64
    251 	sw	a1,-64(a0)
    252 	sw	a1,-60(a0)
    253 	sw	a1,-56(a0)
    254 	sw	a1,-52(a0)
    255 	sw	a1,-48(a0)
    256 	sw	a1,-44(a0)
    257 	sw	a1,-40(a0)
    258 	sw	a1,-36(a0)
    259 	sw	a1,-32(a0)
    260 	sw	a1,-28(a0)
    261 	sw	a1,-24(a0)
    262 	sw	a1,-20(a0)
    263 	sw	a1,-16(a0)
    264 	sw	a1,-12(a0)
    265 	sw	a1,-8(a0)
    266 	bne	a0,a3,.Lloop16w_nopref30
    267 	 sw	a1,-4(a0)
    268 
    269 .Lchk8w:		# t8 here is the byte count past 64-byte chunks
    270 
    271 	andi	t7,t8,0x1f	# is there a 32-byte chunk?
    272 				# the t7 is the reminder count past 32-bytes
    273 	beq	t8,t7,.Lchk1w	# when t8==t7, no 32-byte chunk
    274 	 move	a2,t7
    275 
    276 	sw	a1,0(a0)
    277 	sw	a1,4(a0)
    278 	sw	a1,8(a0)
    279 	sw	a1,12(a0)
    280 	sw	a1,16(a0)
    281 	sw	a1,20(a0)
    282 	sw	a1,24(a0)
    283 	sw	a1,28(a0)
    284 	addiu	a0,a0,32
    285 
    286 .Lchk1w:
    287 	andi	t8,a2,0x3	# now t8 is the reminder past 1w chunks
    288 	beq	a2,t8,.Llast4aligned
    289 	 subu	a3,a2,t8	# a3 is the count of bytes in 1w chunks
    290 	addu	a3,a0,a3	# now a3 is the dst address past the 1w chunks
    291 
    292 # copying in words (4-byte chunks)
    293 .LwordCopy_loop:
    294 	addiu	a0,a0,4
    295 	bne	a0,a3,.LwordCopy_loop
    296 	 sw	a1,-4(a0)
    297 
    298 # store last 0-3 bytes
    299 # this will repeat the last store if the memset finishes on a word boundary
    300 .Llast4aligned:
    301 	j	ra
    302 	 SWLO	a1,-1(t0)
    303 
    304 .Llast4:
    305 	beq	a0,t0,.Llast4e
    306 .Llast4l:
    307 	 addiu	a0,a0,1
    308 	bne	a0,t0,.Llast4l
    309 	 sb	a1,-1(a0)
    310 .Llast4e:
    311 	j	ra
    312 	 nop
    313 
    314 	.set	at
    315 	.set	reorder
    316 
    317 END(memset)
    318 
    319 
    320 /************************************************************************
    321  *  Implementation : Static functions
    322  ************************************************************************/
    323