1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56 #if !defined(STPCPY) && !defined(STRCPY) 57 #error "Either STPCPY or STRCPY must be defined." 58 #endif 59 60 #include <private/bionic_asm.h> 61 62 .syntax unified 63 64 .thumb 65 .thumb_func 66 67 #if defined(STPCPY) 68 .macro m_push 69 push {r4, r5, lr} 70 .cfi_def_cfa_offset 12 71 .cfi_rel_offset r4, 0 72 .cfi_rel_offset r5, 4 73 .cfi_rel_offset lr, 8 74 .endm // m_push 75 #else 76 .macro m_push 77 push {r0, r4, r5, lr} 78 .cfi_def_cfa_offset 16 79 .cfi_rel_offset r0, 0 80 .cfi_rel_offset r4, 4 81 .cfi_rel_offset r5, 8 82 .cfi_rel_offset lr, 12 83 .endm // m_push 84 #endif 85 86 #if defined(STPCPY) 87 .macro m_ret inst 88 \inst {r4, r5, pc} 89 .endm // m_ret 90 #else 91 .macro m_ret inst 92 \inst {r0, r4, r5, pc} 93 .endm // m_ret 94 #endif 95 96 .macro m_copy_byte reg, cmd, label 97 ldrb \reg, [r1], #1 98 strb \reg, [r0], #1 99 \cmd \reg, \label 100 .endm // m_copy_byte 101 102 #if defined(STPCPY) 103 ENTRY(stpcpy) 104 #else 105 ENTRY(strcpy) 106 #endif 107 // Unroll the first 8 bytes that will be copied. 108 m_push 109 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish 110 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish 111 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish 112 m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish 113 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish 114 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish 115 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish 116 m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue 117 118 .Lstringcopy_finish: 119 #if defined(STPCPY) 120 sub r0, r0, #1 121 #endif 122 m_ret inst=pop 123 124 .Lstringcopy_continue: 125 pld [r1, #0] 126 ands r3, r0, #7 127 bne .Lstringcopy_align_dst 128 129 .Lstringcopy_check_src_align: 130 // At this point dst is aligned to a double word, check if src 131 // is also aligned to a double word. 132 ands r3, r1, #7 133 bne .Lstringcopy_unaligned_copy 134 135 .p2align 2 136 .Lstringcopy_mainloop: 137 ldmia r1!, {r2, r3} 138 139 pld [r1, #64] 140 141 sub ip, r2, #0x01010101 142 bic ip, ip, r2 143 ands ip, ip, #0x80808080 144 bne .Lstringcopy_zero_in_first_register 145 146 sub ip, r3, #0x01010101 147 bic ip, ip, r3 148 ands ip, ip, #0x80808080 149 bne .Lstringcopy_zero_in_second_register 150 151 stmia r0!, {r2, r3} 152 b .Lstringcopy_mainloop 153 154 .Lstringcopy_zero_in_first_register: 155 lsls lr, ip, #17 156 itt ne 157 strbne r2, [r0] 158 m_ret inst=popne 159 itt cs 160 #if defined(STPCPY) 161 strhcs r2, [r0], #1 162 #else 163 strhcs r2, [r0] 164 #endif 165 m_ret inst=popcs 166 lsls ip, ip, #1 167 itt eq 168 #if defined(STPCPY) 169 streq r2, [r0], #3 170 #else 171 streq r2, [r0] 172 #endif 173 m_ret inst=popeq 174 strh r2, [r0], #2 175 lsr r3, r2, #16 176 strb r3, [r0] 177 m_ret inst=pop 178 179 .Lstringcopy_zero_in_second_register: 180 lsls lr, ip, #17 181 ittt ne 182 stmiane r0!, {r2} 183 strbne r3, [r0] 184 m_ret inst=popne 185 ittt cs 186 strcs r2, [r0], #4 187 #if defined(STPCPY) 188 strhcs r3, [r0], #1 189 #else 190 strhcs r3, [r0] 191 #endif 192 m_ret inst=popcs 193 lsls ip, ip, #1 194 #if defined(STPCPY) 195 ittt eq 196 #else 197 itt eq 198 #endif 199 stmiaeq r0, {r2, r3} 200 #if defined(STPCPY) 201 addeq r0, r0, #7 202 #endif 203 m_ret inst=popeq 204 stmia r0!, {r2} 205 strh r3, [r0], #2 206 lsr r4, r3, #16 207 strb r4, [r0] 208 m_ret inst=pop 209 210 .Lstringcopy_align_dst: 211 // Align to a double word (64 bits). 212 rsb r3, r3, #8 213 lsls ip, r3, #31 214 beq .Lstringcopy_align_to_32 215 216 ldrb r2, [r1], #1 217 strb r2, [r0], #1 218 cbz r2, .Lstringcopy_complete 219 220 .Lstringcopy_align_to_32: 221 bcc .Lstringcopy_align_to_64 222 223 ldrb r4, [r1], #1 224 strb r4, [r0], #1 225 cmp r4, #0 226 #if defined(STPCPY) 227 itt eq 228 subeq r0, r0, #1 229 #else 230 it eq 231 #endif 232 m_ret inst=popeq 233 ldrb r5, [r1], #1 234 strb r5, [r0], #1 235 cmp r5, #0 236 #if defined(STPCPY) 237 itt eq 238 subeq r0, r0, #1 239 #else 240 it eq 241 #endif 242 m_ret inst=popeq 243 244 .Lstringcopy_align_to_64: 245 tst r3, #4 246 beq .Lstringcopy_check_src_align 247 ldr r2, [r1], #4 248 249 sub ip, r2, #0x01010101 250 bic ip, ip, r2 251 ands ip, ip, #0x80808080 252 bne .Lstringcopy_zero_in_first_register 253 stmia r0!, {r2} 254 b .Lstringcopy_check_src_align 255 256 .Lstringcopy_complete: 257 #if defined(STPCPY) 258 sub r0, r0, #1 259 #endif 260 m_ret inst=pop 261 262 .Lstringcopy_unaligned_copy: 263 // Dst is aligned to a double word, while src is at an unknown alignment. 264 // There are 7 different versions of the unaligned copy code 265 // to prevent overreading the src. The mainloop of every single version 266 // will store 64 bits per loop. The difference is how much of src can 267 // be read without potentially crossing a page boundary. 268 tbb [pc, r3] 269 .Lstringcopy_unaligned_branchtable: 270 .byte 0 271 .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2) 272 .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2) 273 .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2) 274 .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2) 275 .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2) 276 .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2) 277 .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2) 278 279 .p2align 2 280 // Can read 7 bytes before possibly crossing a page. 281 .Lstringcopy_unalign7: 282 ldr r2, [r1], #4 283 284 sub ip, r2, #0x01010101 285 bic ip, ip, r2 286 ands ip, ip, #0x80808080 287 bne .Lstringcopy_zero_in_first_register 288 289 ldrb r3, [r1] 290 cbz r3, .Lstringcopy_unalign7_copy5bytes 291 ldrb r4, [r1, #1] 292 cbz r4, .Lstringcopy_unalign7_copy6bytes 293 ldrb r5, [r1, #2] 294 cbz r5, .Lstringcopy_unalign7_copy7bytes 295 296 ldr r3, [r1], #4 297 pld [r1, #64] 298 299 lsrs ip, r3, #24 300 stmia r0!, {r2, r3} 301 #if defined(STPCPY) 302 beq .Lstringcopy_finish 303 #else 304 beq .Lstringcopy_unalign_return 305 #endif 306 b .Lstringcopy_unalign7 307 308 .Lstringcopy_unalign7_copy5bytes: 309 stmia r0!, {r2} 310 strb r3, [r0] 311 .Lstringcopy_unalign_return: 312 m_ret inst=pop 313 314 .Lstringcopy_unalign7_copy6bytes: 315 stmia r0!, {r2} 316 strb r3, [r0], #1 317 strb r4, [r0] 318 m_ret inst=pop 319 320 .Lstringcopy_unalign7_copy7bytes: 321 stmia r0!, {r2} 322 strb r3, [r0], #1 323 strb r4, [r0], #1 324 strb r5, [r0] 325 m_ret inst=pop 326 327 .p2align 2 328 // Can read 6 bytes before possibly crossing a page. 329 .Lstringcopy_unalign6: 330 ldr r2, [r1], #4 331 332 sub ip, r2, #0x01010101 333 bic ip, ip, r2 334 ands ip, ip, #0x80808080 335 bne .Lstringcopy_zero_in_first_register 336 337 ldrb r4, [r1] 338 cbz r4, .Lstringcopy_unalign_copy5bytes 339 ldrb r5, [r1, #1] 340 cbz r5, .Lstringcopy_unalign_copy6bytes 341 342 ldr r3, [r1], #4 343 pld [r1, #64] 344 345 tst r3, #0xff0000 346 beq .Lstringcopy_unalign6_copy7bytes 347 lsrs ip, r3, #24 348 stmia r0!, {r2, r3} 349 #if defined(STPCPY) 350 beq .Lstringcopy_finish 351 #else 352 beq .Lstringcopy_unalign_return 353 #endif 354 b .Lstringcopy_unalign6 355 356 .Lstringcopy_unalign6_copy7bytes: 357 stmia r0!, {r2} 358 strh r3, [r0], #2 359 lsr r3, #16 360 strb r3, [r0] 361 m_ret inst=pop 362 363 .p2align 2 364 // Can read 5 bytes before possibly crossing a page. 365 .Lstringcopy_unalign5: 366 ldr r2, [r1], #4 367 368 sub ip, r2, #0x01010101 369 bic ip, ip, r2 370 ands ip, ip, #0x80808080 371 bne .Lstringcopy_zero_in_first_register 372 373 ldrb r4, [r1] 374 cbz r4, .Lstringcopy_unalign_copy5bytes 375 376 ldr r3, [r1], #4 377 378 pld [r1, #64] 379 380 sub ip, r3, #0x01010101 381 bic ip, ip, r3 382 ands ip, ip, #0x80808080 383 bne .Lstringcopy_zero_in_second_register 384 385 stmia r0!, {r2, r3} 386 b .Lstringcopy_unalign5 387 388 .Lstringcopy_unalign_copy5bytes: 389 stmia r0!, {r2} 390 strb r4, [r0] 391 m_ret inst=pop 392 393 .Lstringcopy_unalign_copy6bytes: 394 stmia r0!, {r2} 395 strb r4, [r0], #1 396 strb r5, [r0] 397 m_ret inst=pop 398 399 .p2align 2 400 // Can read 4 bytes before possibly crossing a page. 401 .Lstringcopy_unalign4: 402 ldmia r1!, {r2} 403 404 sub ip, r2, #0x01010101 405 bic ip, ip, r2 406 ands ip, ip, #0x80808080 407 bne .Lstringcopy_zero_in_first_register 408 409 ldmia r1!, {r3} 410 pld [r1, #64] 411 412 sub ip, r3, #0x01010101 413 bic ip, ip, r3 414 ands ip, ip, #0x80808080 415 bne .Lstringcopy_zero_in_second_register 416 417 stmia r0!, {r2, r3} 418 b .Lstringcopy_unalign4 419 420 .p2align 2 421 // Can read 3 bytes before possibly crossing a page. 422 .Lstringcopy_unalign3: 423 ldrb r2, [r1] 424 cbz r2, .Lstringcopy_unalign3_copy1byte 425 ldrb r3, [r1, #1] 426 cbz r3, .Lstringcopy_unalign3_copy2bytes 427 ldrb r4, [r1, #2] 428 cbz r4, .Lstringcopy_unalign3_copy3bytes 429 430 ldr r2, [r1], #4 431 ldr r3, [r1], #4 432 433 pld [r1, #64] 434 435 lsrs lr, r2, #24 436 beq .Lstringcopy_unalign_copy4bytes 437 438 sub ip, r3, #0x01010101 439 bic ip, ip, r3 440 ands ip, ip, #0x80808080 441 bne .Lstringcopy_zero_in_second_register 442 443 stmia r0!, {r2, r3} 444 b .Lstringcopy_unalign3 445 446 .Lstringcopy_unalign3_copy1byte: 447 strb r2, [r0] 448 m_ret inst=pop 449 450 .Lstringcopy_unalign3_copy2bytes: 451 strb r2, [r0], #1 452 strb r3, [r0] 453 m_ret inst=pop 454 455 .Lstringcopy_unalign3_copy3bytes: 456 strb r2, [r0], #1 457 strb r3, [r0], #1 458 strb r4, [r0] 459 m_ret inst=pop 460 461 .p2align 2 462 // Can read 2 bytes before possibly crossing a page. 463 .Lstringcopy_unalign2: 464 ldrb r2, [r1] 465 cbz r2, .Lstringcopy_unalign_copy1byte 466 ldrb r3, [r1, #1] 467 cbz r3, .Lstringcopy_unalign_copy2bytes 468 469 ldr r2, [r1], #4 470 ldr r3, [r1], #4 471 pld [r1, #64] 472 473 tst r2, #0xff0000 474 beq .Lstringcopy_unalign_copy3bytes 475 lsrs ip, r2, #24 476 beq .Lstringcopy_unalign_copy4bytes 477 478 sub ip, r3, #0x01010101 479 bic ip, ip, r3 480 ands ip, ip, #0x80808080 481 bne .Lstringcopy_zero_in_second_register 482 483 stmia r0!, {r2, r3} 484 b .Lstringcopy_unalign2 485 486 .p2align 2 487 // Can read 1 byte before possibly crossing a page. 488 .Lstringcopy_unalign1: 489 ldrb r2, [r1] 490 cbz r2, .Lstringcopy_unalign_copy1byte 491 492 ldr r2, [r1], #4 493 ldr r3, [r1], #4 494 495 pld [r1, #64] 496 497 sub ip, r2, #0x01010101 498 bic ip, ip, r2 499 ands ip, ip, #0x80808080 500 bne .Lstringcopy_zero_in_first_register 501 502 sub ip, r3, #0x01010101 503 bic ip, ip, r3 504 ands ip, ip, #0x80808080 505 bne .Lstringcopy_zero_in_second_register 506 507 stmia r0!, {r2, r3} 508 b .Lstringcopy_unalign1 509 510 .Lstringcopy_unalign_copy1byte: 511 strb r2, [r0] 512 m_ret inst=pop 513 514 .Lstringcopy_unalign_copy2bytes: 515 strb r2, [r0], #1 516 strb r3, [r0] 517 m_ret inst=pop 518 519 .Lstringcopy_unalign_copy3bytes: 520 strh r2, [r0], #2 521 lsr r2, #16 522 strb r2, [r0] 523 m_ret inst=pop 524 525 .Lstringcopy_unalign_copy4bytes: 526 stmia r0, {r2} 527 #if defined(STPCPY) 528 add r0, r0, #3 529 #endif 530 m_ret inst=pop 531 #if defined(STPCPY) 532 END(stpcpy) 533 #else 534 END(strcpy) 535 #endif 536