1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56 #include <private/bionic_asm.h> 57 58 .syntax unified 59 60 .thumb 61 .thumb_func 62 63 .macro m_push 64 push {r0, r4, r5, lr} 65 .endm // m_push 66 67 .macro m_pop 68 pop {r0, r4, r5, pc} 69 .endm // m_pop 70 71 .macro m_copy_byte reg, cmd, label 72 ldrb \reg, [r1], #1 73 strb \reg, [r0], #1 74 \cmd \reg, \label 75 .endm // m_copy_byte 76 77 ENTRY(strcpy) 78 // For short copies, hard-code checking the first 8 bytes since this 79 // new code doesn't win until after about 8 bytes. 80 m_push 81 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish 82 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish 83 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish 84 m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish 85 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish 86 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish 87 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish 88 m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue 89 90 strcpy_finish: 91 m_pop 92 93 strcpy_continue: 94 pld [r1, #0] 95 ands r3, r0, #7 96 beq strcpy_check_src_align 97 98 // Align to a double word (64 bits). 99 rsb r3, r3, #8 100 lsls ip, r3, #31 101 beq strcpy_align_to_32 102 103 ldrb r2, [r1], #1 104 strb r2, [r0], #1 105 cbz r2, strcpy_complete 106 107 strcpy_align_to_32: 108 bcc strcpy_align_to_64 109 110 ldrb r2, [r1], #1 111 strb r2, [r0], #1 112 cbz r2, strcpy_complete 113 ldrb r2, [r1], #1 114 strb r2, [r0], #1 115 cbz r2, strcpy_complete 116 117 strcpy_align_to_64: 118 tst r3, #4 119 beq strcpy_check_src_align 120 ldr r2, [r1], #4 121 122 sub ip, r2, #0x01010101 123 bic ip, ip, r2 124 ands ip, ip, #0x80808080 125 bne strcpy_zero_in_first_register 126 str r2, [r0], #4 127 128 strcpy_check_src_align: 129 // At this point dst is aligned to a double word, check if src 130 // is also aligned to a double word. 131 ands r3, r1, #7 132 bne strcpy_unaligned_copy 133 134 .p2align 2 135 strcpy_mainloop: 136 ldrd r2, r3, [r1], #8 137 138 pld [r1, #64] 139 140 sub ip, r2, #0x01010101 141 bic ip, ip, r2 142 ands ip, ip, #0x80808080 143 bne strcpy_zero_in_first_register 144 145 sub ip, r3, #0x01010101 146 bic ip, ip, r3 147 ands ip, ip, #0x80808080 148 bne strcpy_zero_in_second_register 149 150 strd r2, r3, [r0], #8 151 b strcpy_mainloop 152 153 strcpy_complete: 154 m_pop 155 156 strcpy_zero_in_first_register: 157 lsls lr, ip, #17 158 bne strcpy_copy1byte 159 bcs strcpy_copy2bytes 160 lsls ip, ip, #1 161 bne strcpy_copy3bytes 162 163 strcpy_copy4bytes: 164 // Copy 4 bytes to the destiniation. 165 str r2, [r0] 166 m_pop 167 168 strcpy_copy1byte: 169 strb r2, [r0] 170 m_pop 171 172 strcpy_copy2bytes: 173 strh r2, [r0] 174 m_pop 175 176 strcpy_copy3bytes: 177 strh r2, [r0], #2 178 lsr r2, #16 179 strb r2, [r0] 180 m_pop 181 182 strcpy_zero_in_second_register: 183 lsls lr, ip, #17 184 bne strcpy_copy5bytes 185 bcs strcpy_copy6bytes 186 lsls ip, ip, #1 187 bne strcpy_copy7bytes 188 189 // Copy 8 bytes to the destination. 190 strd r2, r3, [r0] 191 m_pop 192 193 strcpy_copy5bytes: 194 str r2, [r0], #4 195 strb r3, [r0] 196 m_pop 197 198 strcpy_copy6bytes: 199 str r2, [r0], #4 200 strh r3, [r0] 201 m_pop 202 203 strcpy_copy7bytes: 204 str r2, [r0], #4 205 strh r3, [r0], #2 206 lsr r3, #16 207 strb r3, [r0] 208 m_pop 209 210 strcpy_unaligned_copy: 211 // Dst is aligned to a double word, while src is at an unknown alignment. 212 // There are 7 different versions of the unaligned copy code 213 // to prevent overreading the src. The mainloop of every single version 214 // will store 64 bits per loop. The difference is how much of src can 215 // be read without potentially crossing a page boundary. 216 tbb [pc, r3] 217 strcpy_unaligned_branchtable: 218 .byte 0 219 .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) 220 .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) 221 .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) 222 .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) 223 .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) 224 .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) 225 .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) 226 227 .p2align 2 228 // Can read 7 bytes before possibly crossing a page. 229 strcpy_unalign7: 230 ldr r2, [r1], #4 231 232 sub ip, r2, #0x01010101 233 bic ip, ip, r2 234 ands ip, ip, #0x80808080 235 bne strcpy_zero_in_first_register 236 237 ldrb r3, [r1] 238 cbz r3, strcpy_unalign7_copy5bytes 239 ldrb r4, [r1, #1] 240 cbz r4, strcpy_unalign7_copy6bytes 241 ldrb r5, [r1, #2] 242 cbz r5, strcpy_unalign7_copy7bytes 243 244 ldr r3, [r1], #4 245 pld [r1, #64] 246 247 lsrs ip, r3, #24 248 strd r2, r3, [r0], #8 249 beq strcpy_unalign_return 250 b strcpy_unalign7 251 252 strcpy_unalign7_copy5bytes: 253 str r2, [r0], #4 254 strb r3, [r0] 255 strcpy_unalign_return: 256 m_pop 257 258 strcpy_unalign7_copy6bytes: 259 str r2, [r0], #4 260 strb r3, [r0], #1 261 strb r4, [r0], #1 262 m_pop 263 264 strcpy_unalign7_copy7bytes: 265 str r2, [r0], #4 266 strb r3, [r0], #1 267 strb r4, [r0], #1 268 strb r5, [r0], #1 269 m_pop 270 271 .p2align 2 272 // Can read 6 bytes before possibly crossing a page. 273 strcpy_unalign6: 274 ldr r2, [r1], #4 275 276 sub ip, r2, #0x01010101 277 bic ip, ip, r2 278 ands ip, ip, #0x80808080 279 bne strcpy_zero_in_first_register 280 281 ldrb r4, [r1] 282 cbz r4, strcpy_unalign_copy5bytes 283 ldrb r5, [r1, #1] 284 cbz r5, strcpy_unalign_copy6bytes 285 286 ldr r3, [r1], #4 287 pld [r1, #64] 288 289 tst r3, #0xff0000 290 beq strcpy_copy7bytes 291 lsrs ip, r3, #24 292 strd r2, r3, [r0], #8 293 beq strcpy_unalign_return 294 b strcpy_unalign6 295 296 .p2align 2 297 // Can read 5 bytes before possibly crossing a page. 298 strcpy_unalign5: 299 ldr r2, [r1], #4 300 301 sub ip, r2, #0x01010101 302 bic ip, ip, r2 303 ands ip, ip, #0x80808080 304 bne strcpy_zero_in_first_register 305 306 ldrb r4, [r1] 307 cbz r4, strcpy_unalign_copy5bytes 308 309 ldr r3, [r1], #4 310 311 pld [r1, #64] 312 313 sub ip, r3, #0x01010101 314 bic ip, ip, r3 315 ands ip, ip, #0x80808080 316 bne strcpy_zero_in_second_register 317 318 strd r2, r3, [r0], #8 319 b strcpy_unalign5 320 321 strcpy_unalign_copy5bytes: 322 str r2, [r0], #4 323 strb r4, [r0] 324 m_pop 325 326 strcpy_unalign_copy6bytes: 327 str r2, [r0], #4 328 strb r4, [r0], #1 329 strb r5, [r0] 330 m_pop 331 332 .p2align 2 333 // Can read 4 bytes before possibly crossing a page. 334 strcpy_unalign4: 335 ldr r2, [r1], #4 336 337 sub ip, r2, #0x01010101 338 bic ip, ip, r2 339 ands ip, ip, #0x80808080 340 bne strcpy_zero_in_first_register 341 342 ldr r3, [r1], #4 343 pld [r1, #64] 344 345 sub ip, r3, #0x01010101 346 bic ip, ip, r3 347 ands ip, ip, #0x80808080 348 bne strcpy_zero_in_second_register 349 350 strd r2, r3, [r0], #8 351 b strcpy_unalign4 352 353 .p2align 2 354 // Can read 3 bytes before possibly crossing a page. 355 strcpy_unalign3: 356 ldrb r2, [r1] 357 cbz r2, strcpy_unalign3_copy1byte 358 ldrb r3, [r1, #1] 359 cbz r3, strcpy_unalign3_copy2bytes 360 ldrb r4, [r1, #2] 361 cbz r4, strcpy_unalign3_copy3bytes 362 363 ldr r2, [r1], #4 364 ldr r3, [r1], #4 365 366 pld [r1, #64] 367 368 lsrs lr, r2, #24 369 beq strcpy_copy4bytes 370 371 sub ip, r3, #0x01010101 372 bic ip, ip, r3 373 ands ip, ip, #0x80808080 374 bne strcpy_zero_in_second_register 375 376 strd r2, r3, [r0], #8 377 b strcpy_unalign3 378 379 strcpy_unalign3_copy1byte: 380 strb r2, [r0] 381 m_pop 382 383 strcpy_unalign3_copy2bytes: 384 strb r2, [r0], #1 385 strb r3, [r0] 386 m_pop 387 388 strcpy_unalign3_copy3bytes: 389 strb r2, [r0], #1 390 strb r3, [r0], #1 391 strb r4, [r0] 392 m_pop 393 394 .p2align 2 395 // Can read 2 bytes before possibly crossing a page. 396 strcpy_unalign2: 397 ldrb r2, [r1] 398 cbz r2, strcpy_unalign_copy1byte 399 ldrb r4, [r1, #1] 400 cbz r4, strcpy_unalign_copy2bytes 401 402 ldr r2, [r1], #4 403 ldr r3, [r1], #4 404 pld [r1, #64] 405 406 tst r2, #0xff0000 407 beq strcpy_copy3bytes 408 lsrs ip, r2, #24 409 beq strcpy_copy4bytes 410 411 sub ip, r3, #0x01010101 412 bic ip, ip, r3 413 ands ip, ip, #0x80808080 414 bne strcpy_zero_in_second_register 415 416 strd r2, r3, [r0], #8 417 b strcpy_unalign2 418 419 .p2align 2 420 // Can read 1 byte before possibly crossing a page. 421 strcpy_unalign1: 422 ldrb r2, [r1] 423 cbz r2, strcpy_unalign_copy1byte 424 425 ldr r2, [r1], #4 426 ldr r3, [r1], #4 427 428 pld [r1, #64] 429 430 sub ip, r2, #0x01010101 431 bic ip, ip, r2 432 ands ip, ip, #0x80808080 433 bne strcpy_zero_in_first_register 434 435 sub ip, r3, #0x01010101 436 bic ip, ip, r3 437 ands ip, ip, #0x80808080 438 bne strcpy_zero_in_second_register 439 440 strd r2, r3, [r0], #8 441 b strcpy_unalign1 442 443 strcpy_unalign_copy1byte: 444 strb r2, [r0] 445 m_pop 446 447 strcpy_unalign_copy2bytes: 448 strb r2, [r0], #1 449 strb r4, [r0] 450 m_pop 451 END(strcpy) 452