Home | History | Annotate | Download | only in bionic
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 /*
     29  * Copyright (c) 2013 ARM Ltd
     30  * All rights reserved.
     31  *
     32  * Redistribution and use in source and binary forms, with or without
     33  * modification, are permitted provided that the following conditions
     34  * are met:
     35  * 1. Redistributions of source code must retain the above copyright
     36  *    notice, this list of conditions and the following disclaimer.
     37  * 2. Redistributions in binary form must reproduce the above copyright
     38  *    notice, this list of conditions and the following disclaimer in the
     39  *    documentation and/or other materials provided with the distribution.
     40  * 3. The name of the company may not be used to endorse or promote
     41  *    products derived from this software without specific prior written
     42  *    permission.
     43  *
     44  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
     45  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     46  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     47  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     48  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     49  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     50  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     51  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     52  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     53  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     54  */
     55 
     56 #if !defined(STPCPY) && !defined(STRCPY)
     57 #error "Either STPCPY or STRCPY must be defined."
     58 #endif
     59 
     60 #include <private/bionic_asm.h>
     61 
     62     .syntax unified
     63 
     64     .thumb
     65     .thumb_func
     66 
     67 #if defined(STPCPY)
     68     .macro m_push
     69     push    {r4, r5, lr}
     70     .cfi_def_cfa_offset 12
     71     .cfi_rel_offset r4, 0
     72     .cfi_rel_offset r5, 4
     73     .cfi_rel_offset lr, 8
     74     .endm // m_push
     75 #else
     76     .macro m_push
     77     push    {r0, r4, r5, lr}
     78     .cfi_def_cfa_offset 16
     79     .cfi_rel_offset r0, 0
     80     .cfi_rel_offset r4, 4
     81     .cfi_rel_offset r5, 8
     82     .cfi_rel_offset lr, 12
     83     .endm // m_push
     84 #endif
     85 
     86 #if defined(STPCPY)
     87     .macro m_pop
     88     pop     {r4, r5, pc}
     89     .endm // m_pop
     90 #else
     91     .macro m_pop
     92     pop     {r0, r4, r5, pc}
     93     .endm // m_pop
     94 #endif
     95 
     96     .macro m_copy_byte reg, cmd, label
     97     ldrb    \reg, [r1], #1
     98     strb    \reg, [r0], #1
     99     \cmd    \reg, \label
    100     .endm // m_copy_byte
    101 
    102 #if defined(STPCPY)
    103 ENTRY(stpcpy)
    104 #else
    105 ENTRY(strcpy)
    106 #endif
    107     // For short copies, hard-code checking the first 8 bytes since this
    108     // new code doesn't win until after about 8 bytes.
    109     m_push
    110     m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
    111     m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
    112     m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
    113     m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish
    114     m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
    115     m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
    116     m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
    117     m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue
    118 
    119 .Lstringcopy_finish:
    120 #if defined(STPCPY)
    121     sub     r0, r0, #1
    122 #endif
    123     m_pop
    124 
    125 .Lstringcopy_continue:
    126     pld     [r1, #0]
    127     ands    r3, r0, #7
    128     beq     .Lstringcopy_check_src_align
    129 
    130     // Align to a double word (64 bits).
    131     rsb     r3, r3, #8
    132     lsls    ip, r3, #31
    133     beq     .Lstringcopy_align_to_32
    134 
    135     ldrb    r2, [r1], #1
    136     strb    r2, [r0], #1
    137     cbz     r2, .Lstringcopy_complete
    138 
    139 .Lstringcopy_align_to_32:
    140     bcc     .Lstringcopy_align_to_64
    141 
    142     ldrb    r2, [r1], #1
    143     strb    r2, [r0], #1
    144     cbz     r2, .Lstringcopy_complete
    145     ldrb    r2, [r1], #1
    146     strb    r2, [r0], #1
    147     cbz     r2, .Lstringcopy_complete
    148 
    149 .Lstringcopy_align_to_64:
    150     tst     r3, #4
    151     beq     .Lstringcopy_check_src_align
    152     // Read one byte at a time since we don't have any idea about the alignment
    153     // of the source and we don't want to read into a different page.
    154     ldrb    r2, [r1], #1
    155     strb    r2, [r0], #1
    156     cbz     r2, .Lstringcopy_complete
    157     ldrb    r2, [r1], #1
    158     strb    r2, [r0], #1
    159     cbz     r2, .Lstringcopy_complete
    160     ldrb    r2, [r1], #1
    161     strb    r2, [r0], #1
    162     cbz     r2, .Lstringcopy_complete
    163     ldrb    r2, [r1], #1
    164     strb    r2, [r0], #1
    165     cbz     r2, .Lstringcopy_complete
    166 
    167 .Lstringcopy_check_src_align:
    168     // At this point dst is aligned to a double word, check if src
    169     // is also aligned to a double word.
    170     ands    r3, r1, #7
    171     bne     .Lstringcopy_unaligned_copy
    172 
    173     .p2align 2
    174 .Lstringcopy_mainloop:
    175     ldrd    r2, r3, [r1], #8
    176 
    177     pld     [r1, #64]
    178 
    179     sub     ip, r2, #0x01010101
    180     bic     ip, ip, r2
    181     ands    ip, ip, #0x80808080
    182     bne     .Lstringcopy_zero_in_first_register
    183 
    184     sub     ip, r3, #0x01010101
    185     bic     ip, ip, r3
    186     ands    ip, ip, #0x80808080
    187     bne     .Lstringcopy_zero_in_second_register
    188 
    189     strd    r2, r3, [r0], #8
    190     b       .Lstringcopy_mainloop
    191 
    192 .Lstringcopy_complete:
    193 #if defined(STPCPY)
    194     sub     r0, r0, #1
    195 #endif
    196     m_pop
    197 
    198 .Lstringcopy_zero_in_first_register:
    199     lsls    lr, ip, #17
    200     bne     .Lstringcopy_copy1byte
    201     bcs     .Lstringcopy_copy2bytes
    202     lsls    ip, ip, #1
    203     bne     .Lstringcopy_copy3bytes
    204 
    205 .Lstringcopy_copy4bytes:
    206     // Copy 4 bytes to the destiniation.
    207 #if defined(STPCPY)
    208     str     r2, [r0], #3
    209 #else
    210     str     r2, [r0]
    211 #endif
    212     m_pop
    213 
    214 .Lstringcopy_copy1byte:
    215     strb    r2, [r0]
    216     m_pop
    217 
    218 .Lstringcopy_copy2bytes:
    219 #if defined(STPCPY)
    220     strh    r2, [r0], #1
    221 #else
    222     strh    r2, [r0]
    223 #endif
    224     m_pop
    225 
    226 .Lstringcopy_copy3bytes:
    227     strh    r2, [r0], #2
    228     lsr     r2, #16
    229     strb    r2, [r0]
    230     m_pop
    231 
    232 .Lstringcopy_zero_in_second_register:
    233     lsls    lr, ip, #17
    234     bne     .Lstringcopy_copy5bytes
    235     bcs     .Lstringcopy_copy6bytes
    236     lsls    ip, ip, #1
    237     bne     .Lstringcopy_copy7bytes
    238 
    239     // Copy 8 bytes to the destination.
    240     strd    r2, r3, [r0]
    241 #if defined(STPCPY)
    242     add     r0, r0, #7
    243 #endif
    244     m_pop
    245 
    246 .Lstringcopy_copy5bytes:
    247     str     r2, [r0], #4
    248     strb    r3, [r0]
    249     m_pop
    250 
    251 .Lstringcopy_copy6bytes:
    252     str     r2, [r0], #4
    253 #if defined(STPCPY)
    254     strh    r3, [r0], #1
    255 #else
    256     strh    r3, [r0]
    257 #endif
    258     m_pop
    259 
    260 .Lstringcopy_copy7bytes:
    261     str     r2, [r0], #4
    262     strh    r3, [r0], #2
    263     lsr     r3, #16
    264     strb    r3, [r0]
    265     m_pop
    266 
    267 .Lstringcopy_unaligned_copy:
    268     // Dst is aligned to a double word, while src is at an unknown alignment.
    269     // There are 7 different versions of the unaligned copy code
    270     // to prevent overreading the src. The mainloop of every single version
    271     // will store 64 bits per loop. The difference is how much of src can
    272     // be read without potentially crossing a page boundary.
    273     tbb     [pc, r3]
    274 .Lstringcopy_unaligned_branchtable:
    275     .byte 0
    276     .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2)
    277     .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2)
    278     .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2)
    279     .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2)
    280     .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2)
    281     .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2)
    282     .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2)
    283 
    284     .p2align 2
    285     // Can read 7 bytes before possibly crossing a page.
    286 .Lstringcopy_unalign7:
    287     ldr     r2, [r1], #4
    288 
    289     sub     ip, r2, #0x01010101
    290     bic     ip, ip, r2
    291     ands    ip, ip, #0x80808080
    292     bne     .Lstringcopy_zero_in_first_register
    293 
    294     ldrb    r3, [r1]
    295     cbz     r3, .Lstringcopy_unalign7_copy5bytes
    296     ldrb    r4, [r1, #1]
    297     cbz     r4, .Lstringcopy_unalign7_copy6bytes
    298     ldrb    r5, [r1, #2]
    299     cbz     r5, .Lstringcopy_unalign7_copy7bytes
    300 
    301     ldr     r3, [r1], #4
    302     pld     [r1, #64]
    303 
    304     lsrs    ip, r3, #24
    305     strd    r2, r3, [r0], #8
    306 #if defined(STPCPY)
    307     beq     .Lstringcopy_finish
    308 #else
    309     beq     .Lstringcopy_unalign_return
    310 #endif
    311     b       .Lstringcopy_unalign7
    312 
    313 .Lstringcopy_unalign7_copy5bytes:
    314     str     r2, [r0], #4
    315     strb    r3, [r0]
    316 .Lstringcopy_unalign_return:
    317     m_pop
    318 
    319 .Lstringcopy_unalign7_copy6bytes:
    320     str     r2, [r0], #4
    321     strb    r3, [r0], #1
    322     strb    r4, [r0]
    323     m_pop
    324 
    325 .Lstringcopy_unalign7_copy7bytes:
    326     str     r2, [r0], #4
    327     strb    r3, [r0], #1
    328     strb    r4, [r0], #1
    329     strb    r5, [r0]
    330     m_pop
    331 
    332     .p2align 2
    333     // Can read 6 bytes before possibly crossing a page.
    334 .Lstringcopy_unalign6:
    335     ldr     r2, [r1], #4
    336 
    337     sub     ip, r2, #0x01010101
    338     bic     ip, ip, r2
    339     ands    ip, ip, #0x80808080
    340     bne     .Lstringcopy_zero_in_first_register
    341 
    342     ldrb    r4, [r1]
    343     cbz     r4, .Lstringcopy_unalign_copy5bytes
    344     ldrb    r5, [r1, #1]
    345     cbz     r5, .Lstringcopy_unalign_copy6bytes
    346 
    347     ldr     r3, [r1], #4
    348     pld     [r1, #64]
    349 
    350     tst     r3, #0xff0000
    351     beq     .Lstringcopy_copy7bytes
    352     lsrs    ip, r3, #24
    353     strd    r2, r3, [r0], #8
    354 #if defined(STPCPY)
    355     beq     .Lstringcopy_finish
    356 #else
    357     beq     .Lstringcopy_unalign_return
    358 #endif
    359     b       .Lstringcopy_unalign6
    360 
    361     .p2align 2
    362     // Can read 5 bytes before possibly crossing a page.
    363 .Lstringcopy_unalign5:
    364     ldr     r2, [r1], #4
    365 
    366     sub     ip, r2, #0x01010101
    367     bic     ip, ip, r2
    368     ands    ip, ip, #0x80808080
    369     bne     .Lstringcopy_zero_in_first_register
    370 
    371     ldrb    r4, [r1]
    372     cbz     r4, .Lstringcopy_unalign_copy5bytes
    373 
    374     ldr     r3, [r1], #4
    375 
    376     pld     [r1, #64]
    377 
    378     sub     ip, r3, #0x01010101
    379     bic     ip, ip, r3
    380     ands    ip, ip, #0x80808080
    381     bne     .Lstringcopy_zero_in_second_register
    382 
    383     strd    r2, r3, [r0], #8
    384     b       .Lstringcopy_unalign5
    385 
    386 .Lstringcopy_unalign_copy5bytes:
    387     str     r2, [r0], #4
    388     strb    r4, [r0]
    389     m_pop
    390 
    391 .Lstringcopy_unalign_copy6bytes:
    392     str     r2, [r0], #4
    393     strb    r4, [r0], #1
    394     strb    r5, [r0]
    395     m_pop
    396 
    397     .p2align 2
    398     // Can read 4 bytes before possibly crossing a page.
    399 .Lstringcopy_unalign4:
    400     ldr     r2, [r1], #4
    401 
    402     sub     ip, r2, #0x01010101
    403     bic     ip, ip, r2
    404     ands    ip, ip, #0x80808080
    405     bne     .Lstringcopy_zero_in_first_register
    406 
    407     ldr     r3, [r1], #4
    408     pld     [r1, #64]
    409 
    410     sub     ip, r3, #0x01010101
    411     bic     ip, ip, r3
    412     ands    ip, ip, #0x80808080
    413     bne     .Lstringcopy_zero_in_second_register
    414 
    415     strd    r2, r3, [r0], #8
    416     b       .Lstringcopy_unalign4
    417 
    418     .p2align 2
    419     // Can read 3 bytes before possibly crossing a page.
    420 .Lstringcopy_unalign3:
    421     ldrb    r2, [r1]
    422     cbz     r2, .Lstringcopy_unalign3_copy1byte
    423     ldrb    r3, [r1, #1]
    424     cbz     r3, .Lstringcopy_unalign3_copy2bytes
    425     ldrb    r4, [r1, #2]
    426     cbz     r4, .Lstringcopy_unalign3_copy3bytes
    427 
    428     ldr     r2, [r1], #4
    429     ldr     r3, [r1], #4
    430 
    431     pld     [r1, #64]
    432 
    433     lsrs    lr, r2, #24
    434     beq     .Lstringcopy_copy4bytes
    435 
    436     sub     ip, r3, #0x01010101
    437     bic     ip, ip, r3
    438     ands    ip, ip, #0x80808080
    439     bne     .Lstringcopy_zero_in_second_register
    440 
    441     strd    r2, r3, [r0], #8
    442     b       .Lstringcopy_unalign3
    443 
    444 .Lstringcopy_unalign3_copy1byte:
    445     strb    r2, [r0]
    446     m_pop
    447 
    448 .Lstringcopy_unalign3_copy2bytes:
    449     strb    r2, [r0], #1
    450     strb    r3, [r0]
    451     m_pop
    452 
    453 .Lstringcopy_unalign3_copy3bytes:
    454     strb    r2, [r0], #1
    455     strb    r3, [r0], #1
    456     strb    r4, [r0]
    457     m_pop
    458 
    459     .p2align 2
    460     // Can read 2 bytes before possibly crossing a page.
    461 .Lstringcopy_unalign2:
    462     ldrb    r2, [r1]
    463     cbz     r2, .Lstringcopy_unalign_copy1byte
    464     ldrb    r4, [r1, #1]
    465     cbz     r4, .Lstringcopy_unalign_copy2bytes
    466 
    467     ldr     r2, [r1], #4
    468     ldr     r3, [r1], #4
    469     pld     [r1, #64]
    470 
    471     tst     r2, #0xff0000
    472     beq     .Lstringcopy_copy3bytes
    473     lsrs    ip, r2, #24
    474     beq     .Lstringcopy_copy4bytes
    475 
    476     sub     ip, r3, #0x01010101
    477     bic     ip, ip, r3
    478     ands    ip, ip, #0x80808080
    479     bne     .Lstringcopy_zero_in_second_register
    480 
    481     strd    r2, r3, [r0], #8
    482     b       .Lstringcopy_unalign2
    483 
    484     .p2align 2
    485     // Can read 1 byte before possibly crossing a page.
    486 .Lstringcopy_unalign1:
    487     ldrb    r2, [r1]
    488     cbz     r2, .Lstringcopy_unalign_copy1byte
    489 
    490     ldr     r2, [r1], #4
    491     ldr     r3, [r1], #4
    492 
    493     pld     [r1, #64]
    494 
    495     sub     ip, r2, #0x01010101
    496     bic     ip, ip, r2
    497     ands    ip, ip, #0x80808080
    498     bne     .Lstringcopy_zero_in_first_register
    499 
    500     sub     ip, r3, #0x01010101
    501     bic     ip, ip, r3
    502     ands    ip, ip, #0x80808080
    503     bne     .Lstringcopy_zero_in_second_register
    504 
    505     strd    r2, r3, [r0], #8
    506     b       .Lstringcopy_unalign1
    507 
    508 .Lstringcopy_unalign_copy1byte:
    509     strb    r2, [r0]
    510     m_pop
    511 
    512 .Lstringcopy_unalign_copy2bytes:
    513     strb    r2, [r0], #1
    514     strb    r4, [r0]
    515     m_pop
    516 #if defined(STPCPY)
    517 END(stpcpy)
    518 #else
    519 END(strcpy)
    520 #endif
    521