Home | History | Annotate | Download | only in bionic
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 /*
     29  * Copyright (c) 2013 ARM Ltd
     30  * All rights reserved.
     31  *
     32  * Redistribution and use in source and binary forms, with or without
     33  * modification, are permitted provided that the following conditions
     34  * are met:
     35  * 1. Redistributions of source code must retain the above copyright
     36  *    notice, this list of conditions and the following disclaimer.
     37  * 2. Redistributions in binary form must reproduce the above copyright
     38  *    notice, this list of conditions and the following disclaimer in the
     39  *    documentation and/or other materials provided with the distribution.
     40  * 3. The name of the company may not be used to endorse or promote
     41  *    products derived from this software without specific prior written
     42  *    permission.
     43  *
     44  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
     45  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     46  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     47  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     48  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     49  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     50  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     51  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     52  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     53  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     54  */
     55 
     56 #include <private/bionic_asm.h>
     57 
     58     .syntax unified
     59 
     60     .thumb
     61     .thumb_func
     62 
     63     .macro m_push
     64     push    {r0, r4, r5, lr}
     65     .endm // m_push
     66 
     67     .macro m_pop
     68     pop     {r0, r4, r5, pc}
     69     .endm // m_pop
     70 
     71     .macro m_copy_byte reg, cmd, label
     72     ldrb    \reg, [r1], #1
     73     strb    \reg, [r0], #1
     74     \cmd    \reg, \label
     75     .endm // m_copy_byte
     76 
     77 ENTRY(strcpy)
     78     // For short copies, hard-code checking the first 8 bytes since this
     79     // new code doesn't win until after about 8 bytes.
     80     m_push
     81     m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
     82     m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
     83     m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
     84     m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish
     85     m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish
     86     m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish
     87     m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish
     88     m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue
     89 
     90 strcpy_finish:
     91     m_pop
     92 
     93 strcpy_continue:
     94     pld     [r1, #0]
     95     ands    r3, r0, #7
     96     beq     strcpy_check_src_align
     97 
     98     // Align to a double word (64 bits).
     99     rsb     r3, r3, #8
    100     lsls    ip, r3, #31
    101     beq     strcpy_align_to_32
    102 
    103     ldrb    r2, [r1], #1
    104     strb    r2, [r0], #1
    105     cbz     r2, strcpy_complete
    106 
    107 strcpy_align_to_32:
    108     bcc     strcpy_align_to_64
    109 
    110     ldrb    r2, [r1], #1
    111     strb    r2, [r0], #1
    112     cbz     r2, strcpy_complete
    113     ldrb    r2, [r1], #1
    114     strb    r2, [r0], #1
    115     cbz     r2, strcpy_complete
    116 
    117 strcpy_align_to_64:
    118     tst     r3, #4
    119     beq     strcpy_check_src_align
    120     ldr     r2, [r1], #4
    121 
    122     sub     ip, r2, #0x01010101
    123     bic     ip, ip, r2
    124     ands    ip, ip, #0x80808080
    125     bne     strcpy_zero_in_first_register
    126     str     r2, [r0], #4
    127 
    128 strcpy_check_src_align:
    129     // At this point dst is aligned to a double word, check if src
    130     // is also aligned to a double word.
    131     ands    r3, r1, #7
    132     bne     strcpy_unaligned_copy
    133 
    134     .p2align 2
    135 strcpy_mainloop:
    136     ldrd    r2, r3, [r1], #8
    137 
    138     pld     [r1, #64]
    139 
    140     sub     ip, r2, #0x01010101
    141     bic     ip, ip, r2
    142     ands    ip, ip, #0x80808080
    143     bne     strcpy_zero_in_first_register
    144 
    145     sub     ip, r3, #0x01010101
    146     bic     ip, ip, r3
    147     ands    ip, ip, #0x80808080
    148     bne     strcpy_zero_in_second_register
    149 
    150     strd    r2, r3, [r0], #8
    151     b       strcpy_mainloop
    152 
    153 strcpy_complete:
    154     m_pop
    155 
    156 strcpy_zero_in_first_register:
    157     lsls    lr, ip, #17
    158     bne     strcpy_copy1byte
    159     bcs     strcpy_copy2bytes
    160     lsls    ip, ip, #1
    161     bne     strcpy_copy3bytes
    162 
    163 strcpy_copy4bytes:
    164     // Copy 4 bytes to the destiniation.
    165     str     r2, [r0]
    166     m_pop
    167 
    168 strcpy_copy1byte:
    169     strb    r2, [r0]
    170     m_pop
    171 
    172 strcpy_copy2bytes:
    173     strh    r2, [r0]
    174     m_pop
    175 
    176 strcpy_copy3bytes:
    177     strh    r2, [r0], #2
    178     lsr     r2, #16
    179     strb    r2, [r0]
    180     m_pop
    181 
    182 strcpy_zero_in_second_register:
    183     lsls    lr, ip, #17
    184     bne     strcpy_copy5bytes
    185     bcs     strcpy_copy6bytes
    186     lsls    ip, ip, #1
    187     bne     strcpy_copy7bytes
    188 
    189     // Copy 8 bytes to the destination.
    190     strd    r2, r3, [r0]
    191     m_pop
    192 
    193 strcpy_copy5bytes:
    194     str     r2, [r0], #4
    195     strb    r3, [r0]
    196     m_pop
    197 
    198 strcpy_copy6bytes:
    199     str     r2, [r0], #4
    200     strh    r3, [r0]
    201     m_pop
    202 
    203 strcpy_copy7bytes:
    204     str     r2, [r0], #4
    205     strh    r3, [r0], #2
    206     lsr     r3, #16
    207     strb    r3, [r0]
    208     m_pop
    209 
    210 strcpy_unaligned_copy:
    211     // Dst is aligned to a double word, while src is at an unknown alignment.
    212     // There are 7 different versions of the unaligned copy code
    213     // to prevent overreading the src. The mainloop of every single version
    214     // will store 64 bits per loop. The difference is how much of src can
    215     // be read without potentially crossing a page boundary.
    216     tbb     [pc, r3]
    217 strcpy_unaligned_branchtable:
    218     .byte 0
    219     .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2)
    220     .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2)
    221     .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2)
    222     .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2)
    223     .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2)
    224     .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2)
    225     .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2)
    226 
    227     .p2align 2
    228     // Can read 7 bytes before possibly crossing a page.
    229 strcpy_unalign7:
    230     ldr     r2, [r1], #4
    231 
    232     sub     ip, r2, #0x01010101
    233     bic     ip, ip, r2
    234     ands    ip, ip, #0x80808080
    235     bne     strcpy_zero_in_first_register
    236 
    237     ldrb    r3, [r1]
    238     cbz     r3, strcpy_unalign7_copy5bytes
    239     ldrb    r4, [r1, #1]
    240     cbz     r4, strcpy_unalign7_copy6bytes
    241     ldrb    r5, [r1, #2]
    242     cbz     r5, strcpy_unalign7_copy7bytes
    243 
    244     ldr     r3, [r1], #4
    245     pld     [r1, #64]
    246 
    247     lsrs    ip, r3, #24
    248     strd    r2, r3, [r0], #8
    249     beq     strcpy_unalign_return
    250     b       strcpy_unalign7
    251 
    252 strcpy_unalign7_copy5bytes:
    253     str     r2, [r0], #4
    254     strb    r3, [r0]
    255 strcpy_unalign_return:
    256     m_pop
    257 
    258 strcpy_unalign7_copy6bytes:
    259     str     r2, [r0], #4
    260     strb    r3, [r0], #1
    261     strb    r4, [r0], #1
    262     m_pop
    263 
    264 strcpy_unalign7_copy7bytes:
    265     str     r2, [r0], #4
    266     strb    r3, [r0], #1
    267     strb    r4, [r0], #1
    268     strb    r5, [r0], #1
    269     m_pop
    270 
    271     .p2align 2
    272     // Can read 6 bytes before possibly crossing a page.
    273 strcpy_unalign6:
    274     ldr     r2, [r1], #4
    275 
    276     sub     ip, r2, #0x01010101
    277     bic     ip, ip, r2
    278     ands    ip, ip, #0x80808080
    279     bne     strcpy_zero_in_first_register
    280 
    281     ldrb    r4, [r1]
    282     cbz     r4, strcpy_unalign_copy5bytes
    283     ldrb    r5, [r1, #1]
    284     cbz     r5, strcpy_unalign_copy6bytes
    285 
    286     ldr     r3, [r1], #4
    287     pld     [r1, #64]
    288 
    289     tst     r3, #0xff0000
    290     beq     strcpy_copy7bytes
    291     lsrs    ip, r3, #24
    292     strd    r2, r3, [r0], #8
    293     beq     strcpy_unalign_return
    294     b       strcpy_unalign6
    295 
    296     .p2align 2
    297     // Can read 5 bytes before possibly crossing a page.
    298 strcpy_unalign5:
    299     ldr     r2, [r1], #4
    300 
    301     sub     ip, r2, #0x01010101
    302     bic     ip, ip, r2
    303     ands    ip, ip, #0x80808080
    304     bne     strcpy_zero_in_first_register
    305 
    306     ldrb    r4, [r1]
    307     cbz     r4, strcpy_unalign_copy5bytes
    308 
    309     ldr     r3, [r1], #4
    310 
    311     pld     [r1, #64]
    312 
    313     sub     ip, r3, #0x01010101
    314     bic     ip, ip, r3
    315     ands    ip, ip, #0x80808080
    316     bne     strcpy_zero_in_second_register
    317 
    318     strd    r2, r3, [r0], #8
    319     b       strcpy_unalign5
    320 
    321 strcpy_unalign_copy5bytes:
    322     str     r2, [r0], #4
    323     strb    r4, [r0]
    324     m_pop
    325 
    326 strcpy_unalign_copy6bytes:
    327     str     r2, [r0], #4
    328     strb    r4, [r0], #1
    329     strb    r5, [r0]
    330     m_pop
    331 
    332     .p2align 2
    333     // Can read 4 bytes before possibly crossing a page.
    334 strcpy_unalign4:
    335     ldr     r2, [r1], #4
    336 
    337     sub     ip, r2, #0x01010101
    338     bic     ip, ip, r2
    339     ands    ip, ip, #0x80808080
    340     bne     strcpy_zero_in_first_register
    341 
    342     ldr     r3, [r1], #4
    343     pld     [r1, #64]
    344 
    345     sub     ip, r3, #0x01010101
    346     bic     ip, ip, r3
    347     ands    ip, ip, #0x80808080
    348     bne     strcpy_zero_in_second_register
    349 
    350     strd    r2, r3, [r0], #8
    351     b       strcpy_unalign4
    352 
    353     .p2align 2
    354     // Can read 3 bytes before possibly crossing a page.
    355 strcpy_unalign3:
    356     ldrb    r2, [r1]
    357     cbz     r2, strcpy_unalign3_copy1byte
    358     ldrb    r3, [r1, #1]
    359     cbz     r3, strcpy_unalign3_copy2bytes
    360     ldrb    r4, [r1, #2]
    361     cbz     r4, strcpy_unalign3_copy3bytes
    362 
    363     ldr     r2, [r1], #4
    364     ldr     r3, [r1], #4
    365 
    366     pld     [r1, #64]
    367 
    368     lsrs    lr, r2, #24
    369     beq     strcpy_copy4bytes
    370 
    371     sub     ip, r3, #0x01010101
    372     bic     ip, ip, r3
    373     ands    ip, ip, #0x80808080
    374     bne     strcpy_zero_in_second_register
    375 
    376     strd    r2, r3, [r0], #8
    377     b       strcpy_unalign3
    378 
    379 strcpy_unalign3_copy1byte:
    380     strb    r2, [r0]
    381     m_pop
    382 
    383 strcpy_unalign3_copy2bytes:
    384     strb    r2, [r0], #1
    385     strb    r3, [r0]
    386     m_pop
    387 
    388 strcpy_unalign3_copy3bytes:
    389     strb    r2, [r0], #1
    390     strb    r3, [r0], #1
    391     strb    r4, [r0]
    392     m_pop
    393 
    394     .p2align 2
    395     // Can read 2 bytes before possibly crossing a page.
    396 strcpy_unalign2:
    397     ldrb    r2, [r1]
    398     cbz     r2, strcpy_unalign_copy1byte
    399     ldrb    r4, [r1, #1]
    400     cbz     r4, strcpy_unalign_copy2bytes
    401 
    402     ldr     r2, [r1], #4
    403     ldr     r3, [r1], #4
    404     pld     [r1, #64]
    405 
    406     tst     r2, #0xff0000
    407     beq     strcpy_copy3bytes
    408     lsrs    ip, r2, #24
    409     beq     strcpy_copy4bytes
    410 
    411     sub     ip, r3, #0x01010101
    412     bic     ip, ip, r3
    413     ands    ip, ip, #0x80808080
    414     bne     strcpy_zero_in_second_register
    415 
    416     strd    r2, r3, [r0], #8
    417     b       strcpy_unalign2
    418 
    419     .p2align 2
    420     // Can read 1 byte before possibly crossing a page.
    421 strcpy_unalign1:
    422     ldrb    r2, [r1]
    423     cbz     r2, strcpy_unalign_copy1byte
    424 
    425     ldr     r2, [r1], #4
    426     ldr     r3, [r1], #4
    427 
    428     pld     [r1, #64]
    429 
    430     sub     ip, r2, #0x01010101
    431     bic     ip, ip, r2
    432     ands    ip, ip, #0x80808080
    433     bne     strcpy_zero_in_first_register
    434 
    435     sub     ip, r3, #0x01010101
    436     bic     ip, ip, r3
    437     ands    ip, ip, #0x80808080
    438     bne     strcpy_zero_in_second_register
    439 
    440     strd    r2, r3, [r0], #8
    441     b       strcpy_unalign1
    442 
    443 strcpy_unalign_copy1byte:
    444     strb    r2, [r0]
    445     m_pop
    446 
    447 strcpy_unalign_copy2bytes:
    448     strb    r2, [r0], #1
    449     strb    r4, [r0]
    450     m_pop
    451 END(strcpy)
    452