1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56 #if !defined(STPCPY) && !defined(STRCPY) 57 #error "Either STPCPY or STRCPY must be defined." 58 #endif 59 60 #include <private/bionic_asm.h> 61 62 .syntax unified 63 64 .thumb 65 .thumb_func 66 67 #if defined(STPCPY) 68 .macro m_push 69 push {r4, r5, lr} 70 .cfi_def_cfa_offset 12 71 .cfi_rel_offset r4, 0 72 .cfi_rel_offset r5, 4 73 .cfi_rel_offset lr, 8 74 .endm // m_push 75 #else 76 .macro m_push 77 push {r0, r4, r5, lr} 78 .cfi_def_cfa_offset 16 79 .cfi_rel_offset r0, 0 80 .cfi_rel_offset r4, 4 81 .cfi_rel_offset r5, 8 82 .cfi_rel_offset lr, 12 83 .endm // m_push 84 #endif 85 86 #if defined(STPCPY) 87 .macro m_pop 88 pop {r4, r5, pc} 89 .endm // m_pop 90 #else 91 .macro m_pop 92 pop {r0, r4, r5, pc} 93 .endm // m_pop 94 #endif 95 96 .macro m_copy_byte reg, cmd, label 97 ldrb \reg, [r1], #1 98 strb \reg, [r0], #1 99 \cmd \reg, \label 100 .endm // m_copy_byte 101 102 #if defined(STPCPY) 103 ENTRY(stpcpy) 104 #else 105 ENTRY(strcpy) 106 #endif 107 // For short copies, hard-code checking the first 8 bytes since this 108 // new code doesn't win until after about 8 bytes. 109 m_push 110 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish 111 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish 112 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish 113 m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish 114 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish 115 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish 116 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish 117 m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue 118 119 .Lstringcopy_finish: 120 #if defined(STPCPY) 121 sub r0, r0, #1 122 #endif 123 m_pop 124 125 .Lstringcopy_continue: 126 pld [r1, #0] 127 ands r3, r0, #7 128 beq .Lstringcopy_check_src_align 129 130 // Align to a double word (64 bits). 131 rsb r3, r3, #8 132 lsls ip, r3, #31 133 beq .Lstringcopy_align_to_32 134 135 ldrb r2, [r1], #1 136 strb r2, [r0], #1 137 cbz r2, .Lstringcopy_complete 138 139 .Lstringcopy_align_to_32: 140 bcc .Lstringcopy_align_to_64 141 142 ldrb r2, [r1], #1 143 strb r2, [r0], #1 144 cbz r2, .Lstringcopy_complete 145 ldrb r2, [r1], #1 146 strb r2, [r0], #1 147 cbz r2, .Lstringcopy_complete 148 149 .Lstringcopy_align_to_64: 150 tst r3, #4 151 beq .Lstringcopy_check_src_align 152 // Read one byte at a time since we don't have any idea about the alignment 153 // of the source and we don't want to read into a different page. 154 ldrb r2, [r1], #1 155 strb r2, [r0], #1 156 cbz r2, .Lstringcopy_complete 157 ldrb r2, [r1], #1 158 strb r2, [r0], #1 159 cbz r2, .Lstringcopy_complete 160 ldrb r2, [r1], #1 161 strb r2, [r0], #1 162 cbz r2, .Lstringcopy_complete 163 ldrb r2, [r1], #1 164 strb r2, [r0], #1 165 cbz r2, .Lstringcopy_complete 166 167 .Lstringcopy_check_src_align: 168 // At this point dst is aligned to a double word, check if src 169 // is also aligned to a double word. 170 ands r3, r1, #7 171 bne .Lstringcopy_unaligned_copy 172 173 .p2align 2 174 .Lstringcopy_mainloop: 175 ldrd r2, r3, [r1], #8 176 177 pld [r1, #64] 178 179 sub ip, r2, #0x01010101 180 bic ip, ip, r2 181 ands ip, ip, #0x80808080 182 bne .Lstringcopy_zero_in_first_register 183 184 sub ip, r3, #0x01010101 185 bic ip, ip, r3 186 ands ip, ip, #0x80808080 187 bne .Lstringcopy_zero_in_second_register 188 189 strd r2, r3, [r0], #8 190 b .Lstringcopy_mainloop 191 192 .Lstringcopy_complete: 193 #if defined(STPCPY) 194 sub r0, r0, #1 195 #endif 196 m_pop 197 198 .Lstringcopy_zero_in_first_register: 199 lsls lr, ip, #17 200 bne .Lstringcopy_copy1byte 201 bcs .Lstringcopy_copy2bytes 202 lsls ip, ip, #1 203 bne .Lstringcopy_copy3bytes 204 205 .Lstringcopy_copy4bytes: 206 // Copy 4 bytes to the destiniation. 207 #if defined(STPCPY) 208 str r2, [r0], #3 209 #else 210 str r2, [r0] 211 #endif 212 m_pop 213 214 .Lstringcopy_copy1byte: 215 strb r2, [r0] 216 m_pop 217 218 .Lstringcopy_copy2bytes: 219 #if defined(STPCPY) 220 strh r2, [r0], #1 221 #else 222 strh r2, [r0] 223 #endif 224 m_pop 225 226 .Lstringcopy_copy3bytes: 227 strh r2, [r0], #2 228 lsr r2, #16 229 strb r2, [r0] 230 m_pop 231 232 .Lstringcopy_zero_in_second_register: 233 lsls lr, ip, #17 234 bne .Lstringcopy_copy5bytes 235 bcs .Lstringcopy_copy6bytes 236 lsls ip, ip, #1 237 bne .Lstringcopy_copy7bytes 238 239 // Copy 8 bytes to the destination. 240 strd r2, r3, [r0] 241 #if defined(STPCPY) 242 add r0, r0, #7 243 #endif 244 m_pop 245 246 .Lstringcopy_copy5bytes: 247 str r2, [r0], #4 248 strb r3, [r0] 249 m_pop 250 251 .Lstringcopy_copy6bytes: 252 str r2, [r0], #4 253 #if defined(STPCPY) 254 strh r3, [r0], #1 255 #else 256 strh r3, [r0] 257 #endif 258 m_pop 259 260 .Lstringcopy_copy7bytes: 261 str r2, [r0], #4 262 strh r3, [r0], #2 263 lsr r3, #16 264 strb r3, [r0] 265 m_pop 266 267 .Lstringcopy_unaligned_copy: 268 // Dst is aligned to a double word, while src is at an unknown alignment. 269 // There are 7 different versions of the unaligned copy code 270 // to prevent overreading the src. The mainloop of every single version 271 // will store 64 bits per loop. The difference is how much of src can 272 // be read without potentially crossing a page boundary. 273 tbb [pc, r3] 274 .Lstringcopy_unaligned_branchtable: 275 .byte 0 276 .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2) 277 .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2) 278 .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2) 279 .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2) 280 .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2) 281 .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2) 282 .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2) 283 284 .p2align 2 285 // Can read 7 bytes before possibly crossing a page. 286 .Lstringcopy_unalign7: 287 ldr r2, [r1], #4 288 289 sub ip, r2, #0x01010101 290 bic ip, ip, r2 291 ands ip, ip, #0x80808080 292 bne .Lstringcopy_zero_in_first_register 293 294 ldrb r3, [r1] 295 cbz r3, .Lstringcopy_unalign7_copy5bytes 296 ldrb r4, [r1, #1] 297 cbz r4, .Lstringcopy_unalign7_copy6bytes 298 ldrb r5, [r1, #2] 299 cbz r5, .Lstringcopy_unalign7_copy7bytes 300 301 ldr r3, [r1], #4 302 pld [r1, #64] 303 304 lsrs ip, r3, #24 305 strd r2, r3, [r0], #8 306 #if defined(STPCPY) 307 beq .Lstringcopy_finish 308 #else 309 beq .Lstringcopy_unalign_return 310 #endif 311 b .Lstringcopy_unalign7 312 313 .Lstringcopy_unalign7_copy5bytes: 314 str r2, [r0], #4 315 strb r3, [r0] 316 .Lstringcopy_unalign_return: 317 m_pop 318 319 .Lstringcopy_unalign7_copy6bytes: 320 str r2, [r0], #4 321 strb r3, [r0], #1 322 strb r4, [r0] 323 m_pop 324 325 .Lstringcopy_unalign7_copy7bytes: 326 str r2, [r0], #4 327 strb r3, [r0], #1 328 strb r4, [r0], #1 329 strb r5, [r0] 330 m_pop 331 332 .p2align 2 333 // Can read 6 bytes before possibly crossing a page. 334 .Lstringcopy_unalign6: 335 ldr r2, [r1], #4 336 337 sub ip, r2, #0x01010101 338 bic ip, ip, r2 339 ands ip, ip, #0x80808080 340 bne .Lstringcopy_zero_in_first_register 341 342 ldrb r4, [r1] 343 cbz r4, .Lstringcopy_unalign_copy5bytes 344 ldrb r5, [r1, #1] 345 cbz r5, .Lstringcopy_unalign_copy6bytes 346 347 ldr r3, [r1], #4 348 pld [r1, #64] 349 350 tst r3, #0xff0000 351 beq .Lstringcopy_copy7bytes 352 lsrs ip, r3, #24 353 strd r2, r3, [r0], #8 354 #if defined(STPCPY) 355 beq .Lstringcopy_finish 356 #else 357 beq .Lstringcopy_unalign_return 358 #endif 359 b .Lstringcopy_unalign6 360 361 .p2align 2 362 // Can read 5 bytes before possibly crossing a page. 363 .Lstringcopy_unalign5: 364 ldr r2, [r1], #4 365 366 sub ip, r2, #0x01010101 367 bic ip, ip, r2 368 ands ip, ip, #0x80808080 369 bne .Lstringcopy_zero_in_first_register 370 371 ldrb r4, [r1] 372 cbz r4, .Lstringcopy_unalign_copy5bytes 373 374 ldr r3, [r1], #4 375 376 pld [r1, #64] 377 378 sub ip, r3, #0x01010101 379 bic ip, ip, r3 380 ands ip, ip, #0x80808080 381 bne .Lstringcopy_zero_in_second_register 382 383 strd r2, r3, [r0], #8 384 b .Lstringcopy_unalign5 385 386 .Lstringcopy_unalign_copy5bytes: 387 str r2, [r0], #4 388 strb r4, [r0] 389 m_pop 390 391 .Lstringcopy_unalign_copy6bytes: 392 str r2, [r0], #4 393 strb r4, [r0], #1 394 strb r5, [r0] 395 m_pop 396 397 .p2align 2 398 // Can read 4 bytes before possibly crossing a page. 399 .Lstringcopy_unalign4: 400 ldr r2, [r1], #4 401 402 sub ip, r2, #0x01010101 403 bic ip, ip, r2 404 ands ip, ip, #0x80808080 405 bne .Lstringcopy_zero_in_first_register 406 407 ldr r3, [r1], #4 408 pld [r1, #64] 409 410 sub ip, r3, #0x01010101 411 bic ip, ip, r3 412 ands ip, ip, #0x80808080 413 bne .Lstringcopy_zero_in_second_register 414 415 strd r2, r3, [r0], #8 416 b .Lstringcopy_unalign4 417 418 .p2align 2 419 // Can read 3 bytes before possibly crossing a page. 420 .Lstringcopy_unalign3: 421 ldrb r2, [r1] 422 cbz r2, .Lstringcopy_unalign3_copy1byte 423 ldrb r3, [r1, #1] 424 cbz r3, .Lstringcopy_unalign3_copy2bytes 425 ldrb r4, [r1, #2] 426 cbz r4, .Lstringcopy_unalign3_copy3bytes 427 428 ldr r2, [r1], #4 429 ldr r3, [r1], #4 430 431 pld [r1, #64] 432 433 lsrs lr, r2, #24 434 beq .Lstringcopy_copy4bytes 435 436 sub ip, r3, #0x01010101 437 bic ip, ip, r3 438 ands ip, ip, #0x80808080 439 bne .Lstringcopy_zero_in_second_register 440 441 strd r2, r3, [r0], #8 442 b .Lstringcopy_unalign3 443 444 .Lstringcopy_unalign3_copy1byte: 445 strb r2, [r0] 446 m_pop 447 448 .Lstringcopy_unalign3_copy2bytes: 449 strb r2, [r0], #1 450 strb r3, [r0] 451 m_pop 452 453 .Lstringcopy_unalign3_copy3bytes: 454 strb r2, [r0], #1 455 strb r3, [r0], #1 456 strb r4, [r0] 457 m_pop 458 459 .p2align 2 460 // Can read 2 bytes before possibly crossing a page. 461 .Lstringcopy_unalign2: 462 ldrb r2, [r1] 463 cbz r2, .Lstringcopy_unalign_copy1byte 464 ldrb r4, [r1, #1] 465 cbz r4, .Lstringcopy_unalign_copy2bytes 466 467 ldr r2, [r1], #4 468 ldr r3, [r1], #4 469 pld [r1, #64] 470 471 tst r2, #0xff0000 472 beq .Lstringcopy_copy3bytes 473 lsrs ip, r2, #24 474 beq .Lstringcopy_copy4bytes 475 476 sub ip, r3, #0x01010101 477 bic ip, ip, r3 478 ands ip, ip, #0x80808080 479 bne .Lstringcopy_zero_in_second_register 480 481 strd r2, r3, [r0], #8 482 b .Lstringcopy_unalign2 483 484 .p2align 2 485 // Can read 1 byte before possibly crossing a page. 486 .Lstringcopy_unalign1: 487 ldrb r2, [r1] 488 cbz r2, .Lstringcopy_unalign_copy1byte 489 490 ldr r2, [r1], #4 491 ldr r3, [r1], #4 492 493 pld [r1, #64] 494 495 sub ip, r2, #0x01010101 496 bic ip, ip, r2 497 ands ip, ip, #0x80808080 498 bne .Lstringcopy_zero_in_first_register 499 500 sub ip, r3, #0x01010101 501 bic ip, ip, r3 502 ands ip, ip, #0x80808080 503 bne .Lstringcopy_zero_in_second_register 504 505 strd r2, r3, [r0], #8 506 b .Lstringcopy_unalign1 507 508 .Lstringcopy_unalign_copy1byte: 509 strb r2, [r0] 510 m_pop 511 512 .Lstringcopy_unalign_copy2bytes: 513 strb r2, [r0], #1 514 strb r4, [r0] 515 m_pop 516 #if defined(STPCPY) 517 END(stpcpy) 518 #else 519 END(strcpy) 520 #endif 521