Home | History | Annotate | Download | only in bionic
      1 /*	$OpenBSD: _memcpy.S,v 1.6 2016/08/06 19:16:09 guenther Exp $	*/
      2 /*	$NetBSD: _memcpy.S,v 1.4 2003/04/05 23:08:52 bjh21 Exp $	*/
      3 
      4 /*-
      5  * Copyright (c) 1997 The NetBSD Foundation, Inc.
      6  * All rights reserved.
      7  *
      8  * This code is derived from software contributed to The NetBSD Foundation
      9  * by Neil A. Carson and Mark Brinicombe
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30  * POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 #include <private/bionic_asm.h>
     34 
     35   .syntax unified
     36 
     37 /*
     38  * This is one fun bit of code ...
     39  * Some easy listening music is suggested while trying to understand this
     40  * code e.g. Iron Maiden
     41  *
     42  * For anyone attempting to understand it :
     43  *
     44  * The core code is implemented here with simple stubs for memcpy()
     45  * memmove() and bcopy().
     46  *
     47  * All local labels are prefixed with Lmemcpy_
     48  * Following the prefix a label starting f is used in the forward copy code
     49  * while a label using b is used in the backwards copy code
     50  * The source and destination addresses determine whether a forward or
     51  * backward copy is performed.
     52  * Separate bits of code are used to deal with the following situations
     53  * for both the forward and backwards copy.
     54  * unaligned source address
     55  * unaligned destination address
     56  * Separate copy routines are used to produce an optimised result for each
     57  * of these cases.
     58  * The copy code will use LDM/STM instructions to copy up to 32 bytes at
     59  * a time where possible.
     60  *
     61  * Note: r12 (aka ip) can be trashed during the function along with
     62  * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
     63  * Additional registers are preserved prior to use i.e. r4, r5 & lr
     64  *
     65  * Apologies for the state of the comments ;-)
     66  */
     67 
     68 ENTRY_PRIVATE(bsd_safe_memcpy)
     69 	/* Determine copy direction */
     70 	cmp	r1, r0
     71 	bcc	.Lmemcpy_backwards
     72 
     73 	moveq	r0, #0			/* Quick abort for len=0 */
     74 	moveq	pc, lr
     75 
     76 	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
     77 	subs	r2, r2, #4
     78 	blt	.Lmemcpy_fl4		/* less than 4 bytes */
     79 	ands	r12, r0, #3
     80 	bne	.Lmemcpy_fdestul	/* oh unaligned destination addr */
     81 	ands	r12, r1, #3
     82 	bne	.Lmemcpy_fsrcul		/* oh unaligned source addr */
     83 
     84 .Lmemcpy_ft8:
     85 	/* We have aligned source and destination */
     86 	subs	r2, r2, #8
     87 	blt	.Lmemcpy_fl12		/* less than 12 bytes (4 from above) */
     88 	subs	r2, r2, #0x14
     89 	blt	.Lmemcpy_fl32		/* less than 32 bytes (12 from above) */
     90 	stmdb	sp!, {r4}		/* borrow r4 */
     91 
     92 	/* blat 32 bytes at a time */
     93 	/* XXX for really big copies perhaps we should use more registers */
     94 .Lmemcpy_floop32:
     95 	ldmia	r1!, {r3, r4, r12, lr}
     96 	stmia	r0!, {r3, r4, r12, lr}
     97 	ldmia	r1!, {r3, r4, r12, lr}
     98 	stmia	r0!, {r3, r4, r12, lr}
     99 	subs	r2, r2, #0x20
    100 	bge	.Lmemcpy_floop32
    101 
    102 	cmn	r2, #0x10
    103 	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
    104 	stmiage	r0!, {r3, r4, r12, lr}
    105 	subge	r2, r2, #0x10
    106 	ldmia	sp!, {r4}		/* return r4 */
    107 
    108 .Lmemcpy_fl32:
    109 	adds	r2, r2, #0x14
    110 
    111 	/* blat 12 bytes at a time */
    112 .Lmemcpy_floop12:
    113 	ldmiage	r1!, {r3, r12, lr}
    114 	stmiage	r0!, {r3, r12, lr}
    115 	subsge	r2, r2, #0x0c
    116 	bge	.Lmemcpy_floop12
    117 
    118 .Lmemcpy_fl12:
    119 	adds	r2, r2, #8
    120 	blt	.Lmemcpy_fl4
    121 
    122 	subs	r2, r2, #4
    123 	ldrlt	r3, [r1], #4
    124 	strlt	r3, [r0], #4
    125 	ldmiage	r1!, {r3, r12}
    126 	stmiage	r0!, {r3, r12}
    127 	subge	r2, r2, #4
    128 
    129 .Lmemcpy_fl4:
    130 	/* less than 4 bytes to go */
    131 	adds	r2, r2, #4
    132 	ldmiaeq	sp!, {r0, pc}		/* done */
    133 
    134 	/* copy the crud byte at a time */
    135 	cmp	r2, #2
    136 	ldrb	r3, [r1], #1
    137 	strb	r3, [r0], #1
    138 	ldrbge	r3, [r1], #1
    139 	strbge	r3, [r0], #1
    140 	ldrbgt	r3, [r1], #1
    141 	strbgt	r3, [r0], #1
    142 	ldmia	sp!, {r0, pc}
    143 
    144 	/* erg - unaligned destination */
    145 .Lmemcpy_fdestul:
    146 	rsb	r12, r12, #4
    147 	cmp	r12, #2
    148 
    149 	/* align destination with byte copies */
    150 	ldrb	r3, [r1], #1
    151 	strb	r3, [r0], #1
    152 	ldrbge	r3, [r1], #1
    153 	strbge	r3, [r0], #1
    154 	ldrbgt	r3, [r1], #1
    155 	strbgt	r3, [r0], #1
    156 	subs	r2, r2, r12
    157 	blt	.Lmemcpy_fl4		/* less the 4 bytes */
    158 
    159 	ands	r12, r1, #3
    160 	beq	.Lmemcpy_ft8		/* we have an aligned source */
    161 
    162 	/* erg - unaligned source */
    163 	/* This is where it gets nasty ... */
    164 .Lmemcpy_fsrcul:
    165 	bic	r1, r1, #3
    166 	ldr	lr, [r1], #4
    167 	cmp	r12, #2
    168 	bgt	.Lmemcpy_fsrcul3
    169 	beq	.Lmemcpy_fsrcul2
    170 	cmp	r2, #0x0c
    171 	blt	.Lmemcpy_fsrcul1loop4
    172 	sub	r2, r2, #0x0c
    173 	stmdb	sp!, {r4, r5}
    174 
    175 .Lmemcpy_fsrcul1loop16:
    176 	mov	r3, lr, lsr #8
    177 	ldmia	r1!, {r4, r5, r12, lr}
    178 	orr	r3, r3, r4, lsl #24
    179 	mov	r4, r4, lsr #8
    180 	orr	r4, r4, r5, lsl #24
    181 	mov	r5, r5, lsr #8
    182 	orr	r5, r5, r12, lsl #24
    183 	mov	r12, r12, lsr #8
    184 	orr	r12, r12, lr, lsl #24
    185 	stmia	r0!, {r3-r5, r12}
    186 	subs	r2, r2, #0x10
    187 	bge	.Lmemcpy_fsrcul1loop16
    188 	ldmia	sp!, {r4, r5}
    189 	adds	r2, r2, #0x0c
    190 	blt	.Lmemcpy_fsrcul1l4
    191 
    192 .Lmemcpy_fsrcul1loop4:
    193 	mov	r12, lr, lsr #8
    194 	ldr	lr, [r1], #4
    195 	orr	r12, r12, lr, lsl #24
    196 	str	r12, [r0], #4
    197 	subs	r2, r2, #4
    198 	bge	.Lmemcpy_fsrcul1loop4
    199 
    200 .Lmemcpy_fsrcul1l4:
    201 	sub	r1, r1, #3
    202 	b	.Lmemcpy_fl4
    203 
    204 .Lmemcpy_fsrcul2:
    205 	cmp	r2, #0x0c
    206 	blt	.Lmemcpy_fsrcul2loop4
    207 	sub	r2, r2, #0x0c
    208 	stmdb	sp!, {r4, r5}
    209 
    210 .Lmemcpy_fsrcul2loop16:
    211 	mov	r3, lr, lsr #16
    212 	ldmia	r1!, {r4, r5, r12, lr}
    213 	orr	r3, r3, r4, lsl #16
    214 	mov	r4, r4, lsr #16
    215 	orr	r4, r4, r5, lsl #16
    216 	mov	r5, r5, lsr #16
    217 	orr	r5, r5, r12, lsl #16
    218 	mov	r12, r12, lsr #16
    219 	orr	r12, r12, lr, lsl #16
    220 	stmia	r0!, {r3-r5, r12}
    221 	subs	r2, r2, #0x10
    222 	bge	.Lmemcpy_fsrcul2loop16
    223 	ldmia	sp!, {r4, r5}
    224 	adds	r2, r2, #0x0c
    225 	blt	.Lmemcpy_fsrcul2l4
    226 
    227 .Lmemcpy_fsrcul2loop4:
    228 	mov	r12, lr, lsr #16
    229 	ldr	lr, [r1], #4
    230 	orr	r12, r12, lr, lsl #16
    231 	str	r12, [r0], #4
    232 	subs	r2, r2, #4
    233 	bge	.Lmemcpy_fsrcul2loop4
    234 
    235 .Lmemcpy_fsrcul2l4:
    236 	sub	r1, r1, #2
    237 	b	.Lmemcpy_fl4
    238 
    239 .Lmemcpy_fsrcul3:
    240 	cmp	r2, #0x0c
    241 	blt	.Lmemcpy_fsrcul3loop4
    242 	sub	r2, r2, #0x0c
    243 	stmdb	sp!, {r4, r5}
    244 
    245 .Lmemcpy_fsrcul3loop16:
    246 	mov	r3, lr, lsr #24
    247 	ldmia	r1!, {r4, r5, r12, lr}
    248 	orr	r3, r3, r4, lsl #8
    249 	mov	r4, r4, lsr #24
    250 	orr	r4, r4, r5, lsl #8
    251 	mov	r5, r5, lsr #24
    252 	orr	r5, r5, r12, lsl #8
    253 	mov	r12, r12, lsr #24
    254 	orr	r12, r12, lr, lsl #8
    255 	stmia	r0!, {r3-r5, r12}
    256 	subs	r2, r2, #0x10
    257 	bge	.Lmemcpy_fsrcul3loop16
    258 	ldmia	sp!, {r4, r5}
    259 	adds	r2, r2, #0x0c
    260 	blt	.Lmemcpy_fsrcul3l4
    261 
    262 .Lmemcpy_fsrcul3loop4:
    263 	mov	r12, lr, lsr #24
    264 	ldr	lr, [r1], #4
    265 	orr	r12, r12, lr, lsl #8
    266 	str	r12, [r0], #4
    267 	subs	r2, r2, #4
    268 	bge	.Lmemcpy_fsrcul3loop4
    269 
    270 .Lmemcpy_fsrcul3l4:
    271 	sub	r1, r1, #1
    272 	b	.Lmemcpy_fl4
    273 
    274 .Lmemcpy_backwards:
    275 	add	r1, r1, r2
    276 	add	r0, r0, r2
    277 	subs	r2, r2, #4
    278 	blt	.Lmemcpy_bl4		/* less than 4 bytes */
    279 	ands	r12, r0, #3
    280 	bne	.Lmemcpy_bdestul	/* oh unaligned destination addr */
    281 	ands	r12, r1, #3
    282 	bne	.Lmemcpy_bsrcul		/* oh unaligned source addr */
    283 
    284 .Lmemcpy_bt8:
    285 	/* We have aligned source and destination */
    286 	subs	r2, r2, #8
    287 	blt	.Lmemcpy_bl12		/* less than 12 bytes (4 from above) */
    288 	stmdb	sp!, {r4, lr}
    289 	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
    290 	blt	.Lmemcpy_bl32
    291 
    292 	/* blat 32 bytes at a time */
    293 	/* XXX for really big copies perhaps we should use more registers */
    294 .Lmemcpy_bloop32:
    295 	ldmdb	r1!, {r3, r4, r12, lr}
    296 	stmdb	r0!, {r3, r4, r12, lr}
    297 	ldmdb	r1!, {r3, r4, r12, lr}
    298 	stmdb	r0!, {r3, r4, r12, lr}
    299 	subs	r2, r2, #0x20
    300 	bge	.Lmemcpy_bloop32
    301 
    302 .Lmemcpy_bl32:
    303 	cmn	r2, #0x10
    304 	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
    305 	stmdbge	r0!, {r3, r4, r12, lr}
    306 	subge	r2, r2, #0x10
    307 	adds	r2, r2, #0x14
    308 	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
    309 	stmdbge	r0!, {r3, r12, lr}
    310 	subge	r2, r2, #0x0c
    311 	ldmia	sp!, {r4, lr}
    312 
    313 .Lmemcpy_bl12:
    314 	adds	r2, r2, #8
    315 	blt	.Lmemcpy_bl4
    316 	subs	r2, r2, #4
    317 	ldrlt	r3, [r1, #-4]!
    318 	strlt	r3, [r0, #-4]!
    319 	ldmdbge	r1!, {r3, r12}
    320 	stmdbge	r0!, {r3, r12}
    321 	subge	r2, r2, #4
    322 
    323 .Lmemcpy_bl4:
    324 	/* less than 4 bytes to go */
    325 	adds	r2, r2, #4
    326 	moveq	pc, lr			/* done */
    327 
    328 	/* copy the crud byte at a time */
    329 	cmp	r2, #2
    330 	ldrb	r3, [r1, #-1]!
    331 	strb	r3, [r0, #-1]!
    332 	ldrbge	r3, [r1, #-1]!
    333 	strbge	r3, [r0, #-1]!
    334 	ldrbgt	r3, [r1, #-1]!
    335 	strbgt	r3, [r0, #-1]!
    336 	mov	pc, lr
    337 
    338 	/* erg - unaligned destination */
    339 .Lmemcpy_bdestul:
    340 	cmp	r12, #2
    341 
    342 	/* align destination with byte copies */
    343 	ldrb	r3, [r1, #-1]!
    344 	strb	r3, [r0, #-1]!
    345 	ldrbge	r3, [r1, #-1]!
    346 	strbge	r3, [r0, #-1]!
    347 	ldrbgt	r3, [r1, #-1]!
    348 	strbgt	r3, [r0, #-1]!
    349 	subs	r2, r2, r12
    350 	blt	.Lmemcpy_bl4		/* less than 4 bytes to go */
    351 	ands	r12, r1, #3
    352 	beq	.Lmemcpy_bt8		/* we have an aligned source */
    353 
    354 	/* erg - unaligned source */
    355 	/* This is where it gets nasty ... */
    356 .Lmemcpy_bsrcul:
    357 	bic	r1, r1, #3
    358 	ldr	r3, [r1, #0]
    359 	cmp	r12, #2
    360 	blt	.Lmemcpy_bsrcul1
    361 	beq	.Lmemcpy_bsrcul2
    362 	cmp	r2, #0x0c
    363 	blt	.Lmemcpy_bsrcul3loop4
    364 	sub	r2, r2, #0x0c
    365 	stmdb	sp!, {r4, r5, lr}
    366 
    367 .Lmemcpy_bsrcul3loop16:
    368 	mov	lr, r3, lsl #8
    369 	ldmdb	r1!, {r3-r5, r12}
    370 	orr	lr, lr, r12, lsr #24
    371 	mov	r12, r12, lsl #8
    372 	orr	r12, r12, r5, lsr #24
    373 	mov	r5, r5, lsl #8
    374 	orr	r5, r5, r4, lsr #24
    375 	mov	r4, r4, lsl #8
    376 	orr	r4, r4, r3, lsr #24
    377 	stmdb	r0!, {r4, r5, r12, lr}
    378 	subs	r2, r2, #0x10
    379 	bge	.Lmemcpy_bsrcul3loop16
    380 	ldmia	sp!, {r4, r5, lr}
    381 	adds	r2, r2, #0x0c
    382 	blt	.Lmemcpy_bsrcul3l4
    383 
    384 .Lmemcpy_bsrcul3loop4:
    385 	mov	r12, r3, lsl #8
    386 	ldr	r3, [r1, #-4]!
    387 	orr	r12, r12, r3, lsr #24
    388 	str	r12, [r0, #-4]!
    389 	subs	r2, r2, #4
    390 	bge	.Lmemcpy_bsrcul3loop4
    391 
    392 .Lmemcpy_bsrcul3l4:
    393 	add	r1, r1, #3
    394 	b	.Lmemcpy_bl4
    395 
    396 .Lmemcpy_bsrcul2:
    397 	cmp	r2, #0x0c
    398 	blt	.Lmemcpy_bsrcul2loop4
    399 	sub	r2, r2, #0x0c
    400 	stmdb	sp!, {r4, r5, lr}
    401 
    402 .Lmemcpy_bsrcul2loop16:
    403 	mov	lr, r3, lsl #16
    404 	ldmdb	r1!, {r3-r5, r12}
    405 	orr	lr, lr, r12, lsr #16
    406 	mov	r12, r12, lsl #16
    407 	orr	r12, r12, r5, lsr #16
    408 	mov	r5, r5, lsl #16
    409 	orr	r5, r5, r4, lsr #16
    410 	mov	r4, r4, lsl #16
    411 	orr	r4, r4, r3, lsr #16
    412 	stmdb	r0!, {r4, r5, r12, lr}
    413 	subs	r2, r2, #0x10
    414 	bge	.Lmemcpy_bsrcul2loop16
    415 	ldmia	sp!, {r4, r5, lr}
    416 	adds	r2, r2, #0x0c
    417 	blt	.Lmemcpy_bsrcul2l4
    418 
    419 .Lmemcpy_bsrcul2loop4:
    420 	mov	r12, r3, lsl #16
    421 	ldr	r3, [r1, #-4]!
    422 	orr	r12, r12, r3, lsr #16
    423 	str	r12, [r0, #-4]!
    424 	subs	r2, r2, #4
    425 	bge	.Lmemcpy_bsrcul2loop4
    426 
    427 .Lmemcpy_bsrcul2l4:
    428 	add	r1, r1, #2
    429 	b	.Lmemcpy_bl4
    430 
    431 .Lmemcpy_bsrcul1:
    432 	cmp	r2, #0x0c
    433 	blt	.Lmemcpy_bsrcul1loop4
    434 	sub	r2, r2, #0x0c
    435 	stmdb	sp!, {r4, r5, lr}
    436 
    437 .Lmemcpy_bsrcul1loop32:
    438 	mov	lr, r3, lsl #24
    439 	ldmdb	r1!, {r3-r5, r12}
    440 	orr	lr, lr, r12, lsr #8
    441 	mov	r12, r12, lsl #24
    442 	orr	r12, r12, r5, lsr #8
    443 	mov	r5, r5, lsl #24
    444 	orr	r5, r5, r4, lsr #8
    445 	mov	r4, r4, lsl #24
    446 	orr	r4, r4, r3, lsr #8
    447 	stmdb	r0!, {r4, r5, r12, lr}
    448 	subs	r2, r2, #0x10
    449 	bge	.Lmemcpy_bsrcul1loop32
    450 	ldmia	sp!, {r4, r5, lr}
    451 	adds	r2, r2, #0x0c
    452 	blt	.Lmemcpy_bsrcul1l4
    453 
    454 .Lmemcpy_bsrcul1loop4:
    455 	mov	r12, r3, lsl #24
    456 	ldr	r3, [r1, #-4]!
    457 	orr	r12, r12, r3, lsr #8
    458 	str	r12, [r0, #-4]!
    459 	subs	r2, r2, #4
    460 	bge	.Lmemcpy_bsrcul1loop4
    461 
    462 .Lmemcpy_bsrcul1l4:
    463 	add	r1, r1, #1
    464 	b	.Lmemcpy_bl4
    465 END(bsd_safe_memcpy)
    466 
    467 ENTRY(memmove_generic)
    468         stmfd   sp!, {r0, lr}
    469         bl      bsd_safe_memcpy
    470         ldmfd   sp!, {r0, pc}
    471 END(memmove_generic)
    472