Home | History | Annotate | Download | only in bionic
      1 /*	$OpenBSD: memcpy.S,v 1.1.1.1 2006/10/10 22:07:10 miod Exp $	*/
      2 /*	$NetBSD: memcpy.S,v 1.2 2006/04/22 23:53:47 uwe Exp $	*/
      3 
      4 /*
      5  * Copyright (c) 2000 SHIMIZU Ryo <ryo (at) misakimix.org>
      6  * All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. The name of the author may not be used to endorse or promote products
     17  *    derived from this software without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 
     31 #include <machine/asm.h>
     32 
     33 #if !defined(MEMCOPY) && !defined(MEMMOVE) && !defined(BCOPY)
     34 #define MEMCOPY
     35 #endif
     36 
     37 #if defined(MEMCOPY) || defined(MEMMOVE)
     38 #define	REG_DST0	r3
     39 #define	REG_SRC		r5
     40 #define	REG_DST		r4
     41 #else
     42 #define	REG_SRC		r4
     43 #define	REG_DST		r5
     44 #endif
     45 
     46 #define	REG_LEN		r6
     47 
     48 #if defined(MEMCOPY)
     49 ENTRY(memcpy)
     50 #elif defined(MEMMOVE)
     51 ENTRY(memmove)
     52 #elif defined(BCOPY)
     53 ENTRY(bcopy)
     54 #endif
     55 #ifdef REG_DST0
     56 	mov	REG_DST,REG_DST0
     57 #endif
     58 	cmp/eq	REG_DST,REG_SRC	/* if ( src == dst ) return; */
     59 	bt/s	bcopy_return
     60 	cmp/hi	REG_DST,REG_SRC
     61 	bf/s	bcopy_overlap
     62 
     63 	mov	REG_SRC,r0
     64 	xor	REG_DST,r0
     65 	and	#3,r0
     66 	mov	r0,r1
     67 	tst	r0,r0		/* (src ^ dst) & 3         */
     68 	bf/s	word_align
     69 
     70 longword_align:
     71 	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
     72 	bt/s	bcopy_return
     73 
     74 
     75 	mov	REG_SRC,r0
     76 	tst	#1,r0		/* if ( src & 1 )          */
     77 	bt	1f
     78 	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
     79 	add	#-1,REG_LEN
     80 	mov.b	r0,@REG_DST
     81 	add	#1,REG_DST
     82 1:
     83 
     84 
     85 	mov	#1,r0
     86 	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
     87 	bf/s	1f
     88 	mov	REG_SRC,r0
     89 	tst	#2,r0		/*      (src & 2) {        */
     90 	bt	1f
     91 	mov.w	@REG_SRC+,r0	/*        *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
     92 	add	#-2,REG_LEN	/*        len -= 2;                                              */
     93 	mov.w	r0,@REG_DST
     94 	add	#2,REG_DST	/* }                       */
     95 1:
     96 
     97 
     98 	mov	#3,r1
     99 	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
    100 	bf/s	no_align_delay
    101 	tst	REG_LEN,REG_LEN
    102 2:
    103 	mov.l	@REG_SRC+,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
    104 	add	#-4,REG_LEN	/*   len -= 4;                                                   */
    105 	mov.l	r0,@REG_DST
    106 	cmp/hi	r1,REG_LEN
    107 	bt/s	2b
    108 	add	#4,REG_DST	/* }                       */
    109 
    110 	bra	no_align_delay
    111 	tst	REG_LEN,REG_LEN
    112 
    113 
    114 word_align:
    115 	mov	r1,r0
    116 	tst	#1,r0
    117 	bf/s	no_align_delay
    118 	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
    119 	bt	bcopy_return
    120 
    121 
    122 	mov	REG_SRC,r0	/* if ( src & 1 )          */
    123 	tst	#1,r0
    124 	bt	1f
    125 	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
    126 	add	#-1,REG_LEN
    127 	mov.b	r0,@REG_DST
    128 	add	#1,REG_DST
    129 1:
    130 
    131 
    132 	mov	#1,r1
    133 	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
    134 	bf/s	no_align_delay
    135 	tst	REG_LEN,REG_LEN
    136 2:
    137 	mov.w	@REG_SRC+,r0	/*   *((unsigned short*)dst)++ = *((unsigned short*)src)++;      */
    138 	add	#-2,REG_LEN	/*   len -= 2;                                                   */
    139 	mov.w	r0,@REG_DST
    140 	cmp/hi	r1,REG_LEN
    141 	bt/s	2b
    142 	add	#2,REG_DST	/* }                       */
    143 
    144 
    145 no_align:
    146 	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
    147 no_align_delay:
    148 	bt	bcopy_return
    149 1:
    150 	mov.b	@REG_SRC+,r0	/*    *dst++ = *src++;     */
    151 	add	#-1,REG_LEN	/*    len--;               */
    152 	mov.b	r0,@REG_DST
    153 	tst	REG_LEN,REG_LEN
    154 	bf/s	1b
    155 	add	#1,REG_DST	/* }                       */
    156 bcopy_return:
    157 	rts
    158 #ifdef REG_DST0
    159 	mov	REG_DST0,r0
    160 #else
    161 	nop
    162 #endif
    163 
    164 
    165 bcopy_overlap:
    166 	add	REG_LEN,REG_SRC
    167 	add	REG_LEN,REG_DST
    168 
    169 	mov	REG_SRC,r0
    170 	xor	REG_DST,r0
    171 	and	#3,r0
    172 	mov	r0,r1
    173 	tst	r0,r0		/* (src ^ dst) & 3         */
    174 	bf/s	ov_word_align
    175 
    176 ov_longword_align:
    177 	tst	REG_LEN,REG_LEN	/* if ( len==0 ) return;   */
    178 	bt/s	bcopy_return
    179 
    180 
    181 	mov	REG_SRC,r0
    182 	tst	#1,r0		/* if ( src & 1 )          */
    183 	bt	1f
    184 	add	#-1,REG_SRC	/*    *--dst = *--src;     */
    185 	mov.b	@REG_SRC,r0
    186 	mov.b	r0,@-REG_DST
    187 	add	#-1,REG_LEN
    188 1:
    189 
    190 
    191 	mov	#1,r0
    192 	cmp/hi	r0,REG_LEN	/* if ( (len > 1) &&       */
    193 	bf/s	1f
    194 	mov	REG_SRC,r0
    195 	tst	#2,r0		/*      (src & 2) {        */
    196 	bt	1f
    197 	add	#-2,REG_SRC	/*        *--((unsigned short*)dst) = *--((unsigned short*)src); */
    198 	mov.w	@REG_SRC,r0
    199 	add	#-2,REG_LEN	/*        len -= 2;                                              */
    200 	mov.w	r0,@-REG_DST	/* }                       */
    201 1:
    202 
    203 
    204 	mov	#3,r1
    205 	cmp/hi	r1,REG_LEN	/* while ( len > 3 ) {     */
    206 	bf/s	ov_no_align_delay
    207 	tst	REG_LEN,REG_LEN
    208 2:
    209 	add	#-4,REG_SRC
    210 	mov.l	@REG_SRC,r0	/*   *((unsigned long*)dst)++ = *((unsigned long*)src)++;        */
    211 	add	#-4,REG_LEN	/*   len -= 4;                                                   */
    212 	cmp/hi	r1,REG_LEN
    213 	bt/s	2b
    214 	mov.l	r0,@-REG_DST	/* }                       */
    215 
    216 	bra	ov_no_align_delay
    217 	tst	REG_LEN,REG_LEN
    218 
    219 
    220 ov_word_align:
    221 	mov	r1,r0
    222 	tst	#1,r0
    223 	bf/s	ov_no_align_delay
    224 	tst	REG_LEN,REG_LEN	/* if ( len == 0 ) return; */
    225 	bt	bcopy_return
    226 
    227 
    228 	mov	REG_SRC,r0	/* if ( src & 1 )          */
    229 	tst	#1,r0
    230 	bt	1f
    231 	add	#-1,REG_SRC
    232 	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
    233 	add	#-1,REG_LEN
    234 	mov.b	r0,@-REG_DST
    235 1:
    236 
    237 
    238 	mov	#1,r1
    239 	cmp/hi	r1,REG_LEN	/* while ( len > 1 ) {     */
    240 	bf/s	ov_no_align_delay
    241 	tst	REG_LEN,REG_LEN
    242 2:
    243 	add	#-2,REG_SRC
    244 	mov.w	@REG_SRC,r0	/*   *--((unsigned short*)dst) = *--((unsigned short*)src);      */
    245 	add	#-2,REG_LEN	/*   len -= 2;                                                   */
    246 	cmp/hi	r1,REG_LEN
    247 	bt/s	2b
    248 	mov.w	r0,@-REG_DST	/* }                       */
    249 
    250 
    251 ov_no_align:
    252 	tst	REG_LEN,REG_LEN	/* while ( len!= ) {       */
    253 ov_no_align_delay:
    254 	bt	9f
    255 1:
    256 	add	#-1,REG_SRC
    257 	mov.b	@REG_SRC,r0	/*    *--dst = *--src;     */
    258 	add	#-1,REG_LEN	/*    len--;               */
    259 	tst	REG_LEN,REG_LEN
    260 	bf/s	1b
    261 	mov.b	r0,@-REG_DST	/* }                       */
    262 9:
    263 	rts
    264 #ifdef REG_DST0
    265 	mov	REG_DST0,r0
    266 #else
    267 	nop
    268 #endif
    269