Home | History | Annotate | Download | only in string
      1 /*
      2  * Copyright (c) 2009
      3  *      MIPS Technologies, Inc., California.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
     14  *    contributors may be used to endorse or promote products derived from
     15  *    this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
     18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
     21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 /************************************************************************
     31  *
     32  *  memcpy.S
     33  *  Version: "043009"
     34  *
     35  ************************************************************************/
     36 
     37 
     38 /************************************************************************
     39  *  Include files
     40  ************************************************************************/
     41 
     42 #include "machine/asm.h"
     43 
     44 
     45 /*
     46  * This routine could be optimized for MIPS64. The current code only
     47  * uses MIPS32 instructions.
     48  */
     49 #if defined(__MIPSEB__)
     50 #  define LWHI	lwl		/* high part is left in big-endian	*/
     51 #  define SWHI	swl		/* high part is left in big-endian	*/
     52 #  define LWLO	lwr		/* low part is right in big-endian	*/
     53 #  define SWLO	swr		/* low part is right in big-endian	*/
     54 #endif
     55 
     56 #if defined(__MIPSEL__)
     57 #  define LWHI	lwr		/* high part is right in little-endian	*/
     58 #  define SWHI	swr		/* high part is right in little-endian	*/
     59 #  define LWLO	lwl		/* low part is left in big-endian	*/
     60 #  define SWLO	swl		/* low part is left in big-endian	*/
     61 #endif
     62 
     63 LEAF(memcpy,0)
     64 
     65 	.set	noreorder
     66 	.set	noat
     67 /*
     68  * Below we handle the case where memcpy is called with overlapping src and dst.
     69  * Although memcpy is not required to handle this case, some parts of Android like Skia
     70  * rely on such usage. We call memmove to handle such cases.
     71  */
     72 	subu	t0,a0,a1
     73 	sra	AT,t0,31
     74 	xor	t1,t0,AT
     75 	subu	t0,t1,AT
     76 	sltu	AT,t0,a2
     77 	beq	AT,zero,.Lmemcpy
     78 	 la	t9,memmove
     79 	jr	t9
     80 	 nop
     81 .Lmemcpy:
     82 	slti	AT,a2,8
     83 	bne	AT,zero,.Llast8
     84 	 move	v0,a0	# memcpy returns the dst pointer
     85 
     86 # Test if the src and dst are word-aligned, or can be made word-aligned
     87 	xor	t8,a1,a0
     88 	andi	t8,t8,0x3		# t8 is a0/a1 word-displacement
     89 
     90 	bne	t8,zero,.Lunaligned
     91 	 negu	a3,a0
     92 
     93 	andi	a3,a3,0x3	# we need to copy a3 bytes to make a0/a1 aligned
     94 	beq	a3,zero,.Lchk16w # when a3=0 then the dst (a0) is word-aligned
     95 	 subu	a2,a2,a3	# now a2 is the remining bytes count
     96 
     97 	LWHI	t8,0(a1)
     98 	addu	a1,a1,a3
     99 	SWHI	t8,0(a0)
    100 	addu	a0,a0,a3
    101 
    102 # Now the dst/src are mutually word-aligned with word-aligned addresses
    103 .Lchk16w:
    104 	andi	t8,a2,0x3f	# any whole 64-byte chunks?
    105 				# t8 is the byte count after 64-byte chunks
    106 
    107 	beq	a2,t8,.Lchk8w	# if a2==t8, no 64-byte chunks
    108 				# There will be at most 1 32-byte chunk after it
    109 	 subu	a3,a2,t8	# subtract from a2 the reminder
    110                                 # Here a3 counts bytes in 16w chunks
    111 	addu	a3,a0,a3	# Now a3 is the final dst after 64-byte chunks
    112 
    113 	addu	t0,a0,a2	# t0 is the "past the end" address
    114 
    115 # When in the loop we exercise "pref 30,x(a0)", the a0+x should not be past
    116 # the "t0-32" address
    117 # This means: for x=128 the last "safe" a0 address is "t0-160"
    118 # Alternatively, for x=64 the last "safe" a0 address is "t0-96"
    119 # In the current version we will use "pref 30,128(a0)", so "t0-160" is the limit
    120 	subu	t9,t0,160	# t9 is the "last safe pref 30,128(a0)" address
    121 
    122 	pref    0,0(a1)		# bring the first line of src, addr 0
    123 	pref    0,32(a1)	# bring the second line of src, addr 32
    124 	pref    0,64(a1)	# bring the third line of src, addr 64
    125 	pref	30,32(a0)	# safe, as we have at least 64 bytes ahead
    126 # In case the a0 > t9 don't use "pref 30" at all
    127 	sgtu	v1,a0,t9
    128 	bgtz	v1,.Lloop16w	# skip "pref 30,64(a0)" for too short arrays
    129 	 nop
    130 # otherwise, start with using pref30
    131 	pref	30,64(a0)
    132 .Lloop16w:
    133 	pref	0,96(a1)
    134 	lw	t0,0(a1)
    135 	bgtz	v1,.Lskip_pref30_96	# skip "pref 30,96(a0)"
    136 	 lw	t1,4(a1)
    137 	pref    30,96(a0)   # continue setting up the dest, addr 96
    138 .Lskip_pref30_96:
    139 	lw	t2,8(a1)
    140 	lw	t3,12(a1)
    141 	lw	t4,16(a1)
    142 	lw	t5,20(a1)
    143 	lw	t6,24(a1)
    144 	lw	t7,28(a1)
    145         pref    0,128(a1)    # bring the next lines of src, addr 128
    146 
    147 	sw	t0,0(a0)
    148 	sw	t1,4(a0)
    149 	sw	t2,8(a0)
    150 	sw	t3,12(a0)
    151 	sw	t4,16(a0)
    152 	sw	t5,20(a0)
    153 	sw	t6,24(a0)
    154 	sw	t7,28(a0)
    155 
    156 	lw	t0,32(a1)
    157 	bgtz	v1,.Lskip_pref30_128	# skip "pref 30,128(a0)"
    158 	 lw	t1,36(a1)
    159 	pref    30,128(a0)   # continue setting up the dest, addr 128
    160 .Lskip_pref30_128:
    161 	lw	t2,40(a1)
    162 	lw	t3,44(a1)
    163 	lw	t4,48(a1)
    164 	lw	t5,52(a1)
    165 	lw	t6,56(a1)
    166 	lw	t7,60(a1)
    167         pref    0, 160(a1)    # bring the next lines of src, addr 160
    168 
    169 	sw	t0,32(a0)
    170 	sw	t1,36(a0)
    171 	sw	t2,40(a0)
    172 	sw	t3,44(a0)
    173 	sw	t4,48(a0)
    174 	sw	t5,52(a0)
    175 	sw	t6,56(a0)
    176 	sw	t7,60(a0)
    177 
    178 	addiu	a0,a0,64	# adding 64 to dest
    179 	sgtu	v1,a0,t9
    180 	bne	a0,a3,.Lloop16w
    181 	 addiu	a1,a1,64	# adding 64 to src
    182 	move	a2,t8
    183 
    184 # Here we have src and dest word-aligned but less than 64-bytes to go
    185 
    186 .Lchk8w:
    187 	pref 0, 0x0(a1)
    188 	andi	t8,a2,0x1f	# is there a 32-byte chunk?
    189 				# the t8 is the reminder count past 32-bytes
    190 	beq	a2,t8,.Lchk1w	# when a2=t8, no 32-byte chunk
    191 	 nop
    192 
    193 	lw	t0,0(a1)
    194 	lw	t1,4(a1)
    195 	lw	t2,8(a1)
    196 	lw	t3,12(a1)
    197 	lw	t4,16(a1)
    198 	lw	t5,20(a1)
    199 	lw	t6,24(a1)
    200 	lw	t7,28(a1)
    201 	addiu	a1,a1,32
    202 
    203 	sw	t0,0(a0)
    204 	sw	t1,4(a0)
    205 	sw	t2,8(a0)
    206 	sw	t3,12(a0)
    207 	sw	t4,16(a0)
    208 	sw	t5,20(a0)
    209 	sw	t6,24(a0)
    210 	sw	t7,28(a0)
    211 	addiu	a0,a0,32
    212 
    213 .Lchk1w:
    214 	andi	a2,t8,0x3	# now a2 is the reminder past 1w chunks
    215 	beq	a2,t8,.Llast8
    216 	 subu	a3,t8,a2	# a3 is count of bytes in 1w chunks
    217 	addu	a3,a0,a3	# now a3 is the dst address past the 1w chunks
    218 
    219 # copying in words (4-byte chunks)
    220 .LwordCopy_loop:
    221 	lw	t3,0(a1)	# the first t3 may be equal t0 ... optimize?
    222 	addiu	a1,a1,4
    223 	addiu	a0,a0,4
    224 	bne	a0,a3,.LwordCopy_loop
    225 	 sw	t3,-4(a0)
    226 
    227 # For the last (<8) bytes
    228 .Llast8:
    229 	blez	a2,.Lleave
    230 	 addu	a3,a0,a2	# a3 is the last dst address
    231 .Llast8loop:
    232 	lb	v1,0(a1)
    233 	addiu	a1,a1,1
    234 	addiu	a0,a0,1
    235 	bne	a0,a3,.Llast8loop
    236 	 sb	v1,-1(a0)
    237 
    238 .Lleave:
    239 	j	ra
    240 	 nop
    241 
    242 #
    243 # UNALIGNED case
    244 #
    245 
    246 .Lunaligned:
    247 	# got here with a3="negu a0"
    248 	andi	a3,a3,0x3	# test if the a0 is word aligned
    249 	beqz	a3,.Lua_chk16w
    250 	 subu	a2,a2,a3	# bytes left after initial a3 bytes
    251 
    252 	LWHI	v1,0(a1)
    253 	LWLO	v1,3(a1)
    254 	addu	a1,a1,a3	# a3 may be here 1, 2 or 3
    255 	SWHI	v1,0(a0)
    256 	addu	a0,a0,a3	# below the dst will be word aligned (NOTE1)
    257 
    258 .Lua_chk16w:
    259 	andi	t8,a2,0x3f	# any whole 64-byte chunks?
    260 				# t8 is the byte count after 64-byte chunks
    261 	beq	a2,t8,.Lua_chk8w # if a2==t8, no 64-byte chunks
    262 				# There will be at most 1 32-byte chunk after it
    263 	 subu	a3,a2,t8	# subtract from a2 the reminder
    264                                 # Here a3 counts bytes in 16w chunks
    265 	addu	a3,a0,a3	# Now a3 is the final dst after 64-byte chunks
    266 
    267 	addu	t0,a0,a2	# t0 is the "past the end" address
    268 
    269 	subu	t9,t0,160	# t9 is the "last safe pref 30,128(a0)" address
    270 
    271 	pref    0,0(a1)		# bring the first line of src, addr 0
    272 	pref    0,32(a1)	# bring the second line of src, addr 32
    273 	pref    0,64(a1)	# bring the third line of src, addr 64
    274 	pref	30,32(a0)	# safe, as we have at least 64 bytes ahead
    275 # In case the a0 > t9 don't use "pref 30" at all
    276 	sgtu	v1,a0,t9
    277 	bgtz	v1,.Lua_loop16w	# skip "pref 30,64(a0)" for too short arrays
    278 	 nop
    279 # otherwise, start with using pref30
    280 	pref	30,64(a0)
    281 .Lua_loop16w:
    282 	pref	0,96(a1)
    283 	LWHI	t0,0(a1)
    284 	LWLO	t0,3(a1)
    285 	LWHI	t1,4(a1)
    286 	bgtz	v1,.Lua_skip_pref30_96
    287 	 LWLO	t1,7(a1)
    288 	pref    30,96(a0)   # continue setting up the dest, addr 96
    289 .Lua_skip_pref30_96:
    290 	LWHI	t2,8(a1)
    291 	LWLO	t2,11(a1)
    292 	LWHI	t3,12(a1)
    293 	LWLO	t3,15(a1)
    294 	LWHI	t4,16(a1)
    295 	LWLO	t4,19(a1)
    296 	LWHI	t5,20(a1)
    297 	LWLO	t5,23(a1)
    298 	LWHI	t6,24(a1)
    299 	LWLO	t6,27(a1)
    300 	LWHI	t7,28(a1)
    301 	LWLO	t7,31(a1)
    302         pref    0,128(a1)    # bring the next lines of src, addr 128
    303 
    304 	sw	t0,0(a0)
    305 	sw	t1,4(a0)
    306 	sw	t2,8(a0)
    307 	sw	t3,12(a0)
    308 	sw	t4,16(a0)
    309 	sw	t5,20(a0)
    310 	sw	t6,24(a0)
    311 	sw	t7,28(a0)
    312 
    313 	LWHI	t0,32(a1)
    314 	LWLO	t0,35(a1)
    315 	LWHI	t1,36(a1)
    316 	bgtz	v1,.Lua_skip_pref30_128
    317 	LWLO	t1,39(a1)
    318 	pref    30,128(a0)   # continue setting up the dest, addr 128
    319 .Lua_skip_pref30_128:
    320 	LWHI	t2,40(a1)
    321 	LWLO	t2,43(a1)
    322 	LWHI	t3,44(a1)
    323 	LWLO	t3,47(a1)
    324 	LWHI	t4,48(a1)
    325 	LWLO	t4,51(a1)
    326 	LWHI	t5,52(a1)
    327 	LWLO	t5,55(a1)
    328 	LWHI	t6,56(a1)
    329 	LWLO	t6,59(a1)
    330 	LWHI	t7,60(a1)
    331 	LWLO	t7,63(a1)
    332         pref    0, 160(a1)    # bring the next lines of src, addr 160
    333 
    334 	sw	t0,32(a0)
    335 	sw	t1,36(a0)
    336 	sw	t2,40(a0)
    337 	sw	t3,44(a0)
    338 	sw	t4,48(a0)
    339 	sw	t5,52(a0)
    340 	sw	t6,56(a0)
    341 	sw	t7,60(a0)
    342 
    343 	addiu	a0,a0,64	# adding 64 to dest
    344 	sgtu	v1,a0,t9
    345 	bne	a0,a3,.Lua_loop16w
    346 	 addiu	a1,a1,64	# adding 64 to src
    347 	move	a2,t8
    348 
    349 # Here we have src and dest word-aligned but less than 64-bytes to go
    350 
    351 .Lua_chk8w:
    352 	pref 0, 0x0(a1)
    353 	andi	t8,a2,0x1f	# is there a 32-byte chunk?
    354 				# the t8 is the reminder count
    355 	beq	a2,t8,.Lua_chk1w # when a2=t8, no 32-byte chunk
    356 	 nop
    357 
    358 	LWHI	t0,0(a1)
    359 	LWLO	t0,3(a1)
    360 	LWHI	t1,4(a1)
    361 	LWLO	t1,7(a1)
    362 	LWHI	t2,8(a1)
    363 	LWLO	t2,11(a1)
    364 	LWHI	t3,12(a1)
    365 	LWLO	t3,15(a1)
    366 	LWHI	t4,16(a1)
    367 	LWLO	t4,19(a1)
    368 	LWHI	t5,20(a1)
    369 	LWLO	t5,23(a1)
    370 	LWHI	t6,24(a1)
    371 	LWLO	t6,27(a1)
    372 	LWHI	t7,28(a1)
    373 	LWLO	t7,31(a1)
    374 	addiu	a1,a1,32
    375 
    376 	sw	t0,0(a0)
    377 	sw	t1,4(a0)
    378 	sw	t2,8(a0)
    379 	sw	t3,12(a0)
    380 	sw	t4,16(a0)
    381 	sw	t5,20(a0)
    382 	sw	t6,24(a0)
    383 	sw	t7,28(a0)
    384 	addiu	a0,a0,32
    385 
    386 .Lua_chk1w:
    387 	andi	a2,t8,0x3	# now a2 is the reminder past 1w chunks
    388 	beq	a2,t8,.Lua_smallCopy
    389 	 subu	a3,t8,a2	# a3 is count of bytes in 1w chunks
    390 	addu	a3,a0,a3	# now a3 is the dst address past the 1w chunks
    391 
    392 # copying in words (4-byte chunks)
    393 .Lua_wordCopy_loop:
    394 	LWHI	v1,0(a1)
    395 	LWLO	v1,3(a1)
    396 	addiu	a1,a1,4
    397 	addiu	a0,a0,4		# note: dst=a0 is word aligned here, see NOTE1
    398 	bne	a0,a3,.Lua_wordCopy_loop
    399 	 sw	v1,-4(a0)
    400 
    401 # Now less than 4 bytes (value in a2) left to copy
    402 .Lua_smallCopy:
    403 	beqz	a2,.Lleave
    404 	addu	a3,a0,a2	# a3 is the last dst address
    405 .Lua_smallCopy_loop:
    406 	lb	v1,0(a1)
    407 	addiu	a1,a1,1
    408 	addiu	a0,a0,1
    409 	bne	a0,a3,.Lua_smallCopy_loop
    410 	 sb	v1,-1(a0)
    411 
    412 	j	ra
    413 	 nop
    414 
    415 	.set	at
    416 	.set	reorder
    417 
    418 END(memcpy)
    419 
    420 
    421 /************************************************************************
    422  *  Implementation : Static functions
    423  ************************************************************************/
    424