1 /* $OpenBSD: _memcpy.S,v 1.6 2016/08/06 19:16:09 guenther Exp $ */ 2 /* $NetBSD: _memcpy.S,v 1.4 2003/04/05 23:08:52 bjh21 Exp $ */ 3 4 /*- 5 * Copyright (c) 1997 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Neil A. Carson and Mark Brinicombe 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <private/bionic_asm.h> 34 35 .syntax unified 36 37 /* 38 * This is one fun bit of code ... 39 * Some easy listening music is suggested while trying to understand this 40 * code e.g. Iron Maiden 41 * 42 * For anyone attempting to understand it : 43 * 44 * The core code is implemented here with simple stubs for memcpy() 45 * memmove() and bcopy(). 46 * 47 * All local labels are prefixed with Lmemcpy_ 48 * Following the prefix a label starting f is used in the forward copy code 49 * while a label using b is used in the backwards copy code 50 * The source and destination addresses determine whether a forward or 51 * backward copy is performed. 52 * Separate bits of code are used to deal with the following situations 53 * for both the forward and backwards copy. 54 * unaligned source address 55 * unaligned destination address 56 * Separate copy routines are used to produce an optimised result for each 57 * of these cases. 58 * The copy code will use LDM/STM instructions to copy up to 32 bytes at 59 * a time where possible. 60 * 61 * Note: r12 (aka ip) can be trashed during the function along with 62 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 63 * Additional registers are preserved prior to use i.e. r4, r5 & lr 64 * 65 * Apologies for the state of the comments ;-) 66 */ 67 68 ENTRY_PRIVATE(bsd_safe_memcpy) 69 /* Determine copy direction */ 70 cmp r1, r0 71 bcc .Lmemcpy_backwards 72 73 moveq r0, #0 /* Quick abort for len=0 */ 74 moveq pc, lr 75 76 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 77 subs r2, r2, #4 78 blt .Lmemcpy_fl4 /* less than 4 bytes */ 79 ands r12, r0, #3 80 bne .Lmemcpy_fdestul /* oh unaligned destination addr */ 81 ands r12, r1, #3 82 bne .Lmemcpy_fsrcul /* oh unaligned source addr */ 83 84 .Lmemcpy_ft8: 85 /* We have aligned source and destination */ 86 subs r2, r2, #8 87 blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */ 88 subs r2, r2, #0x14 89 blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ 90 stmdb sp!, {r4} /* borrow r4 */ 91 92 /* blat 32 bytes at a time */ 93 /* XXX for really big copies perhaps we should use more registers */ 94 .Lmemcpy_floop32: 95 ldmia r1!, {r3, r4, r12, lr} 96 stmia r0!, {r3, r4, r12, lr} 97 ldmia r1!, {r3, r4, r12, lr} 98 stmia r0!, {r3, r4, r12, lr} 99 subs r2, r2, #0x20 100 bge .Lmemcpy_floop32 101 102 cmn r2, #0x10 103 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 104 stmiage r0!, {r3, r4, r12, lr} 105 subge r2, r2, #0x10 106 ldmia sp!, {r4} /* return r4 */ 107 108 .Lmemcpy_fl32: 109 adds r2, r2, #0x14 110 111 /* blat 12 bytes at a time */ 112 .Lmemcpy_floop12: 113 ldmiage r1!, {r3, r12, lr} 114 stmiage r0!, {r3, r12, lr} 115 subsge r2, r2, #0x0c 116 bge .Lmemcpy_floop12 117 118 .Lmemcpy_fl12: 119 adds r2, r2, #8 120 blt .Lmemcpy_fl4 121 122 subs r2, r2, #4 123 ldrlt r3, [r1], #4 124 strlt r3, [r0], #4 125 ldmiage r1!, {r3, r12} 126 stmiage r0!, {r3, r12} 127 subge r2, r2, #4 128 129 .Lmemcpy_fl4: 130 /* less than 4 bytes to go */ 131 adds r2, r2, #4 132 ldmiaeq sp!, {r0, pc} /* done */ 133 134 /* copy the crud byte at a time */ 135 cmp r2, #2 136 ldrb r3, [r1], #1 137 strb r3, [r0], #1 138 ldrbge r3, [r1], #1 139 strbge r3, [r0], #1 140 ldrbgt r3, [r1], #1 141 strbgt r3, [r0], #1 142 ldmia sp!, {r0, pc} 143 144 /* erg - unaligned destination */ 145 .Lmemcpy_fdestul: 146 rsb r12, r12, #4 147 cmp r12, #2 148 149 /* align destination with byte copies */ 150 ldrb r3, [r1], #1 151 strb r3, [r0], #1 152 ldrbge r3, [r1], #1 153 strbge r3, [r0], #1 154 ldrbgt r3, [r1], #1 155 strbgt r3, [r0], #1 156 subs r2, r2, r12 157 blt .Lmemcpy_fl4 /* less the 4 bytes */ 158 159 ands r12, r1, #3 160 beq .Lmemcpy_ft8 /* we have an aligned source */ 161 162 /* erg - unaligned source */ 163 /* This is where it gets nasty ... */ 164 .Lmemcpy_fsrcul: 165 bic r1, r1, #3 166 ldr lr, [r1], #4 167 cmp r12, #2 168 bgt .Lmemcpy_fsrcul3 169 beq .Lmemcpy_fsrcul2 170 cmp r2, #0x0c 171 blt .Lmemcpy_fsrcul1loop4 172 sub r2, r2, #0x0c 173 stmdb sp!, {r4, r5} 174 175 .Lmemcpy_fsrcul1loop16: 176 mov r3, lr, lsr #8 177 ldmia r1!, {r4, r5, r12, lr} 178 orr r3, r3, r4, lsl #24 179 mov r4, r4, lsr #8 180 orr r4, r4, r5, lsl #24 181 mov r5, r5, lsr #8 182 orr r5, r5, r12, lsl #24 183 mov r12, r12, lsr #8 184 orr r12, r12, lr, lsl #24 185 stmia r0!, {r3-r5, r12} 186 subs r2, r2, #0x10 187 bge .Lmemcpy_fsrcul1loop16 188 ldmia sp!, {r4, r5} 189 adds r2, r2, #0x0c 190 blt .Lmemcpy_fsrcul1l4 191 192 .Lmemcpy_fsrcul1loop4: 193 mov r12, lr, lsr #8 194 ldr lr, [r1], #4 195 orr r12, r12, lr, lsl #24 196 str r12, [r0], #4 197 subs r2, r2, #4 198 bge .Lmemcpy_fsrcul1loop4 199 200 .Lmemcpy_fsrcul1l4: 201 sub r1, r1, #3 202 b .Lmemcpy_fl4 203 204 .Lmemcpy_fsrcul2: 205 cmp r2, #0x0c 206 blt .Lmemcpy_fsrcul2loop4 207 sub r2, r2, #0x0c 208 stmdb sp!, {r4, r5} 209 210 .Lmemcpy_fsrcul2loop16: 211 mov r3, lr, lsr #16 212 ldmia r1!, {r4, r5, r12, lr} 213 orr r3, r3, r4, lsl #16 214 mov r4, r4, lsr #16 215 orr r4, r4, r5, lsl #16 216 mov r5, r5, lsr #16 217 orr r5, r5, r12, lsl #16 218 mov r12, r12, lsr #16 219 orr r12, r12, lr, lsl #16 220 stmia r0!, {r3-r5, r12} 221 subs r2, r2, #0x10 222 bge .Lmemcpy_fsrcul2loop16 223 ldmia sp!, {r4, r5} 224 adds r2, r2, #0x0c 225 blt .Lmemcpy_fsrcul2l4 226 227 .Lmemcpy_fsrcul2loop4: 228 mov r12, lr, lsr #16 229 ldr lr, [r1], #4 230 orr r12, r12, lr, lsl #16 231 str r12, [r0], #4 232 subs r2, r2, #4 233 bge .Lmemcpy_fsrcul2loop4 234 235 .Lmemcpy_fsrcul2l4: 236 sub r1, r1, #2 237 b .Lmemcpy_fl4 238 239 .Lmemcpy_fsrcul3: 240 cmp r2, #0x0c 241 blt .Lmemcpy_fsrcul3loop4 242 sub r2, r2, #0x0c 243 stmdb sp!, {r4, r5} 244 245 .Lmemcpy_fsrcul3loop16: 246 mov r3, lr, lsr #24 247 ldmia r1!, {r4, r5, r12, lr} 248 orr r3, r3, r4, lsl #8 249 mov r4, r4, lsr #24 250 orr r4, r4, r5, lsl #8 251 mov r5, r5, lsr #24 252 orr r5, r5, r12, lsl #8 253 mov r12, r12, lsr #24 254 orr r12, r12, lr, lsl #8 255 stmia r0!, {r3-r5, r12} 256 subs r2, r2, #0x10 257 bge .Lmemcpy_fsrcul3loop16 258 ldmia sp!, {r4, r5} 259 adds r2, r2, #0x0c 260 blt .Lmemcpy_fsrcul3l4 261 262 .Lmemcpy_fsrcul3loop4: 263 mov r12, lr, lsr #24 264 ldr lr, [r1], #4 265 orr r12, r12, lr, lsl #8 266 str r12, [r0], #4 267 subs r2, r2, #4 268 bge .Lmemcpy_fsrcul3loop4 269 270 .Lmemcpy_fsrcul3l4: 271 sub r1, r1, #1 272 b .Lmemcpy_fl4 273 274 .Lmemcpy_backwards: 275 add r1, r1, r2 276 add r0, r0, r2 277 subs r2, r2, #4 278 blt .Lmemcpy_bl4 /* less than 4 bytes */ 279 ands r12, r0, #3 280 bne .Lmemcpy_bdestul /* oh unaligned destination addr */ 281 ands r12, r1, #3 282 bne .Lmemcpy_bsrcul /* oh unaligned source addr */ 283 284 .Lmemcpy_bt8: 285 /* We have aligned source and destination */ 286 subs r2, r2, #8 287 blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */ 288 stmdb sp!, {r4, lr} 289 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 290 blt .Lmemcpy_bl32 291 292 /* blat 32 bytes at a time */ 293 /* XXX for really big copies perhaps we should use more registers */ 294 .Lmemcpy_bloop32: 295 ldmdb r1!, {r3, r4, r12, lr} 296 stmdb r0!, {r3, r4, r12, lr} 297 ldmdb r1!, {r3, r4, r12, lr} 298 stmdb r0!, {r3, r4, r12, lr} 299 subs r2, r2, #0x20 300 bge .Lmemcpy_bloop32 301 302 .Lmemcpy_bl32: 303 cmn r2, #0x10 304 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 305 stmdbge r0!, {r3, r4, r12, lr} 306 subge r2, r2, #0x10 307 adds r2, r2, #0x14 308 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 309 stmdbge r0!, {r3, r12, lr} 310 subge r2, r2, #0x0c 311 ldmia sp!, {r4, lr} 312 313 .Lmemcpy_bl12: 314 adds r2, r2, #8 315 blt .Lmemcpy_bl4 316 subs r2, r2, #4 317 ldrlt r3, [r1, #-4]! 318 strlt r3, [r0, #-4]! 319 ldmdbge r1!, {r3, r12} 320 stmdbge r0!, {r3, r12} 321 subge r2, r2, #4 322 323 .Lmemcpy_bl4: 324 /* less than 4 bytes to go */ 325 adds r2, r2, #4 326 moveq pc, lr /* done */ 327 328 /* copy the crud byte at a time */ 329 cmp r2, #2 330 ldrb r3, [r1, #-1]! 331 strb r3, [r0, #-1]! 332 ldrbge r3, [r1, #-1]! 333 strbge r3, [r0, #-1]! 334 ldrbgt r3, [r1, #-1]! 335 strbgt r3, [r0, #-1]! 336 mov pc, lr 337 338 /* erg - unaligned destination */ 339 .Lmemcpy_bdestul: 340 cmp r12, #2 341 342 /* align destination with byte copies */ 343 ldrb r3, [r1, #-1]! 344 strb r3, [r0, #-1]! 345 ldrbge r3, [r1, #-1]! 346 strbge r3, [r0, #-1]! 347 ldrbgt r3, [r1, #-1]! 348 strbgt r3, [r0, #-1]! 349 subs r2, r2, r12 350 blt .Lmemcpy_bl4 /* less than 4 bytes to go */ 351 ands r12, r1, #3 352 beq .Lmemcpy_bt8 /* we have an aligned source */ 353 354 /* erg - unaligned source */ 355 /* This is where it gets nasty ... */ 356 .Lmemcpy_bsrcul: 357 bic r1, r1, #3 358 ldr r3, [r1, #0] 359 cmp r12, #2 360 blt .Lmemcpy_bsrcul1 361 beq .Lmemcpy_bsrcul2 362 cmp r2, #0x0c 363 blt .Lmemcpy_bsrcul3loop4 364 sub r2, r2, #0x0c 365 stmdb sp!, {r4, r5, lr} 366 367 .Lmemcpy_bsrcul3loop16: 368 mov lr, r3, lsl #8 369 ldmdb r1!, {r3-r5, r12} 370 orr lr, lr, r12, lsr #24 371 mov r12, r12, lsl #8 372 orr r12, r12, r5, lsr #24 373 mov r5, r5, lsl #8 374 orr r5, r5, r4, lsr #24 375 mov r4, r4, lsl #8 376 orr r4, r4, r3, lsr #24 377 stmdb r0!, {r4, r5, r12, lr} 378 subs r2, r2, #0x10 379 bge .Lmemcpy_bsrcul3loop16 380 ldmia sp!, {r4, r5, lr} 381 adds r2, r2, #0x0c 382 blt .Lmemcpy_bsrcul3l4 383 384 .Lmemcpy_bsrcul3loop4: 385 mov r12, r3, lsl #8 386 ldr r3, [r1, #-4]! 387 orr r12, r12, r3, lsr #24 388 str r12, [r0, #-4]! 389 subs r2, r2, #4 390 bge .Lmemcpy_bsrcul3loop4 391 392 .Lmemcpy_bsrcul3l4: 393 add r1, r1, #3 394 b .Lmemcpy_bl4 395 396 .Lmemcpy_bsrcul2: 397 cmp r2, #0x0c 398 blt .Lmemcpy_bsrcul2loop4 399 sub r2, r2, #0x0c 400 stmdb sp!, {r4, r5, lr} 401 402 .Lmemcpy_bsrcul2loop16: 403 mov lr, r3, lsl #16 404 ldmdb r1!, {r3-r5, r12} 405 orr lr, lr, r12, lsr #16 406 mov r12, r12, lsl #16 407 orr r12, r12, r5, lsr #16 408 mov r5, r5, lsl #16 409 orr r5, r5, r4, lsr #16 410 mov r4, r4, lsl #16 411 orr r4, r4, r3, lsr #16 412 stmdb r0!, {r4, r5, r12, lr} 413 subs r2, r2, #0x10 414 bge .Lmemcpy_bsrcul2loop16 415 ldmia sp!, {r4, r5, lr} 416 adds r2, r2, #0x0c 417 blt .Lmemcpy_bsrcul2l4 418 419 .Lmemcpy_bsrcul2loop4: 420 mov r12, r3, lsl #16 421 ldr r3, [r1, #-4]! 422 orr r12, r12, r3, lsr #16 423 str r12, [r0, #-4]! 424 subs r2, r2, #4 425 bge .Lmemcpy_bsrcul2loop4 426 427 .Lmemcpy_bsrcul2l4: 428 add r1, r1, #2 429 b .Lmemcpy_bl4 430 431 .Lmemcpy_bsrcul1: 432 cmp r2, #0x0c 433 blt .Lmemcpy_bsrcul1loop4 434 sub r2, r2, #0x0c 435 stmdb sp!, {r4, r5, lr} 436 437 .Lmemcpy_bsrcul1loop32: 438 mov lr, r3, lsl #24 439 ldmdb r1!, {r3-r5, r12} 440 orr lr, lr, r12, lsr #8 441 mov r12, r12, lsl #24 442 orr r12, r12, r5, lsr #8 443 mov r5, r5, lsl #24 444 orr r5, r5, r4, lsr #8 445 mov r4, r4, lsl #24 446 orr r4, r4, r3, lsr #8 447 stmdb r0!, {r4, r5, r12, lr} 448 subs r2, r2, #0x10 449 bge .Lmemcpy_bsrcul1loop32 450 ldmia sp!, {r4, r5, lr} 451 adds r2, r2, #0x0c 452 blt .Lmemcpy_bsrcul1l4 453 454 .Lmemcpy_bsrcul1loop4: 455 mov r12, r3, lsl #24 456 ldr r3, [r1, #-4]! 457 orr r12, r12, r3, lsr #8 458 str r12, [r0, #-4]! 459 subs r2, r2, #4 460 bge .Lmemcpy_bsrcul1loop4 461 462 .Lmemcpy_bsrcul1l4: 463 add r1, r1, #1 464 b .Lmemcpy_bl4 465 END(bsd_safe_memcpy) 466 467 ENTRY(memmove_generic) 468 stmfd sp!, {r0, lr} 469 bl bsd_safe_memcpy 470 ldmfd sp!, {r0, pc} 471 END(memmove_generic) 472