Home | History | Annotate | Download | only in bionic
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 /*
     29  * Copyright (c) 2013 ARM Ltd
     30  * All rights reserved.
     31  *
     32  * Redistribution and use in source and binary forms, with or without
     33  * modification, are permitted provided that the following conditions
     34  * are met:
     35  * 1. Redistributions of source code must retain the above copyright
     36  *    notice, this list of conditions and the following disclaimer.
     37  * 2. Redistributions in binary form must reproduce the above copyright
     38  *    notice, this list of conditions and the following disclaimer in the
     39  *    documentation and/or other materials provided with the distribution.
     40  * 3. The name of the company may not be used to endorse or promote
     41  *    products derived from this software without specific prior written
     42  *    permission.
     43  *
     44  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
     45  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     46  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     47  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     48  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     49  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     50  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     51  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     52  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     53  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     54  */
     55 
     56 #if !defined(STPCPY) && !defined(STRCPY)
     57 #error "Either STPCPY or STRCPY must be defined."
     58 #endif
     59 
     60 #include <private/bionic_asm.h>
     61 
     62     .syntax unified
     63 
     64     .thumb
     65     .thumb_func
     66 
     67 #if defined(STPCPY)
     68     .macro m_push
     69     push    {r4, r5, lr}
     70     .cfi_def_cfa_offset 12
     71     .cfi_rel_offset r4, 0
     72     .cfi_rel_offset r5, 4
     73     .cfi_rel_offset lr, 8
     74     .endm // m_push
     75 #else
     76     .macro m_push
     77     push    {r0, r4, r5, lr}
     78     .cfi_def_cfa_offset 16
     79     .cfi_rel_offset r0, 0
     80     .cfi_rel_offset r4, 4
     81     .cfi_rel_offset r5, 8
     82     .cfi_rel_offset lr, 12
     83     .endm // m_push
     84 #endif
     85 
     86 #if defined(STPCPY)
     87     .macro m_ret inst
     88     \inst   {r4, r5, pc}
     89     .endm // m_ret
     90 #else
     91     .macro m_ret inst
     92     \inst   {r0, r4, r5, pc}
     93     .endm // m_ret
     94 #endif
     95 
     96     .macro m_copy_byte reg, cmd, label
     97     ldrb    \reg, [r1], #1
     98     strb    \reg, [r0], #1
     99     \cmd    \reg, \label
    100     .endm // m_copy_byte
    101 
    102 #if defined(STPCPY)
    103 ENTRY(stpcpy)
    104 #else
    105 ENTRY(strcpy)
    106 #endif
    107     // Unroll the first 8 bytes that will be copied.
    108     m_push
    109     m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
    110     m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
    111     m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
    112     m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish
    113     m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
    114     m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
    115     m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
    116     m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue
    117 
    118 .Lstringcopy_finish:
    119 #if defined(STPCPY)
    120     sub     r0, r0, #1
    121 #endif
    122     m_ret   inst=pop
    123 
    124 .Lstringcopy_continue:
    125     pld     [r1, #0]
    126     ands    r3, r0, #7
    127     bne     .Lstringcopy_align_dst
    128 
    129 .Lstringcopy_check_src_align:
    130     // At this point dst is aligned to a double word, check if src
    131     // is also aligned to a double word.
    132     ands    r3, r1, #7
    133     bne     .Lstringcopy_unaligned_copy
    134 
    135     .p2align 2
    136 .Lstringcopy_mainloop:
    137     ldmia   r1!, {r2, r3}
    138 
    139     pld     [r1, #64]
    140 
    141     sub     ip, r2, #0x01010101
    142     bic     ip, ip, r2
    143     ands    ip, ip, #0x80808080
    144     bne     .Lstringcopy_zero_in_first_register
    145 
    146     sub     ip, r3, #0x01010101
    147     bic     ip, ip, r3
    148     ands    ip, ip, #0x80808080
    149     bne     .Lstringcopy_zero_in_second_register
    150 
    151     stmia   r0!, {r2, r3}
    152     b       .Lstringcopy_mainloop
    153 
    154 .Lstringcopy_zero_in_first_register:
    155     lsls    lr, ip, #17
    156     itt     ne
    157     strbne  r2, [r0]
    158     m_ret   inst=popne
    159     itt     cs
    160 #if defined(STPCPY)
    161     strhcs  r2, [r0], #1
    162 #else
    163     strhcs  r2, [r0]
    164 #endif
    165     m_ret   inst=popcs
    166     lsls    ip, ip, #1
    167     itt     eq
    168 #if defined(STPCPY)
    169     streq   r2, [r0], #3
    170 #else
    171     streq   r2, [r0]
    172 #endif
    173     m_ret   inst=popeq
    174     strh    r2, [r0], #2
    175     lsr     r3, r2, #16
    176     strb    r3, [r0]
    177     m_ret   inst=pop
    178 
    179 .Lstringcopy_zero_in_second_register:
    180     lsls    lr, ip, #17
    181     ittt    ne
    182     stmiane r0!, {r2}
    183     strbne  r3, [r0]
    184     m_ret   inst=popne
    185     ittt    cs
    186     strcs   r2, [r0], #4
    187 #if defined(STPCPY)
    188     strhcs  r3, [r0], #1
    189 #else
    190     strhcs  r3, [r0]
    191 #endif
    192     m_ret   inst=popcs
    193     lsls    ip, ip, #1
    194 #if defined(STPCPY)
    195     ittt    eq
    196 #else
    197     itt     eq
    198 #endif
    199     stmiaeq r0, {r2, r3}
    200 #if defined(STPCPY)
    201     addeq   r0, r0, #7
    202 #endif
    203     m_ret   inst=popeq
    204     stmia   r0!, {r2}
    205     strh    r3, [r0], #2
    206     lsr     r4, r3, #16
    207     strb    r4, [r0]
    208     m_ret   inst=pop
    209 
    210 .Lstringcopy_align_dst:
    211     // Align to a double word (64 bits).
    212     rsb     r3, r3, #8
    213     lsls    ip, r3, #31
    214     beq     .Lstringcopy_align_to_32
    215 
    216     ldrb    r2, [r1], #1
    217     strb    r2, [r0], #1
    218     cbz     r2, .Lstringcopy_complete
    219 
    220 .Lstringcopy_align_to_32:
    221     bcc     .Lstringcopy_align_to_64
    222 
    223     ldrb    r4, [r1], #1
    224     strb    r4, [r0], #1
    225     cmp     r4, #0
    226 #if defined(STPCPY)
    227     itt     eq
    228     subeq   r0, r0, #1
    229 #else
    230     it      eq
    231 #endif
    232     m_ret   inst=popeq
    233     ldrb    r5, [r1], #1
    234     strb    r5, [r0], #1
    235     cmp     r5, #0
    236 #if defined(STPCPY)
    237     itt     eq
    238     subeq   r0, r0, #1
    239 #else
    240     it      eq
    241 #endif
    242     m_ret   inst=popeq
    243 
    244 .Lstringcopy_align_to_64:
    245     tst     r3, #4
    246     beq     .Lstringcopy_check_src_align
    247     // Read one byte at a time since we don't have any idea about the alignment
    248     // of the source and we don't want to read into a different page.
    249     ldrb    r2, [r1], #1
    250     strb    r2, [r0], #1
    251     cbz     r2, .Lstringcopy_complete
    252     ldrb    r2, [r1], #1
    253     strb    r2, [r0], #1
    254     cbz     r2, .Lstringcopy_complete
    255     ldrb    r2, [r1], #1
    256     strb    r2, [r0], #1
    257     cbz     r2, .Lstringcopy_complete
    258     ldrb    r2, [r1], #1
    259     strb    r2, [r0], #1
    260     cbz     r2, .Lstringcopy_complete
    261     b       .Lstringcopy_check_src_align
    262 
    263 .Lstringcopy_complete:
    264 #if defined(STPCPY)
    265     sub     r0, r0, #1
    266 #endif
    267     m_ret   inst=pop
    268 
    269 .Lstringcopy_unaligned_copy:
    270     // Dst is aligned to a double word, while src is at an unknown alignment.
    271     // There are 7 different versions of the unaligned copy code
    272     // to prevent overreading the src. The mainloop of every single version
    273     // will store 64 bits per loop. The difference is how much of src can
    274     // be read without potentially crossing a page boundary.
    275     tbb     [pc, r3]
    276 .Lstringcopy_unaligned_branchtable:
    277     .byte 0
    278     .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2)
    279     .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2)
    280     .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2)
    281     .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2)
    282     .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2)
    283     .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2)
    284     .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2)
    285 
    286     .p2align 2
    287     // Can read 7 bytes before possibly crossing a page.
    288 .Lstringcopy_unalign7:
    289     ldr     r2, [r1], #4
    290 
    291     sub     ip, r2, #0x01010101
    292     bic     ip, ip, r2
    293     ands    ip, ip, #0x80808080
    294     bne     .Lstringcopy_zero_in_first_register
    295 
    296     ldrb    r3, [r1]
    297     cbz     r3, .Lstringcopy_unalign7_copy5bytes
    298     ldrb    r4, [r1, #1]
    299     cbz     r4, .Lstringcopy_unalign7_copy6bytes
    300     ldrb    r5, [r1, #2]
    301     cbz     r5, .Lstringcopy_unalign7_copy7bytes
    302 
    303     ldr     r3, [r1], #4
    304     pld     [r1, #64]
    305 
    306     lsrs    ip, r3, #24
    307     stmia   r0!, {r2, r3}
    308 #if defined(STPCPY)
    309     beq     .Lstringcopy_finish
    310 #else
    311     beq     .Lstringcopy_unalign_return
    312 #endif
    313     b       .Lstringcopy_unalign7
    314 
    315 .Lstringcopy_unalign7_copy5bytes:
    316     stmia   r0!, {r2}
    317     strb    r3, [r0]
    318 .Lstringcopy_unalign_return:
    319     m_ret   inst=pop
    320 
    321 .Lstringcopy_unalign7_copy6bytes:
    322     stmia   r0!, {r2}
    323     strb    r3, [r0], #1
    324     strb    r4, [r0]
    325     m_ret   inst=pop
    326 
    327 .Lstringcopy_unalign7_copy7bytes:
    328     stmia   r0!, {r2}
    329     strb    r3, [r0], #1
    330     strb    r4, [r0], #1
    331     strb    r5, [r0]
    332     m_ret   inst=pop
    333 
    334     .p2align 2
    335     // Can read 6 bytes before possibly crossing a page.
    336 .Lstringcopy_unalign6:
    337     ldr     r2, [r1], #4
    338 
    339     sub     ip, r2, #0x01010101
    340     bic     ip, ip, r2
    341     ands    ip, ip, #0x80808080
    342     bne     .Lstringcopy_zero_in_first_register
    343 
    344     ldrb    r4, [r1]
    345     cbz     r4, .Lstringcopy_unalign_copy5bytes
    346     ldrb    r5, [r1, #1]
    347     cbz     r5, .Lstringcopy_unalign_copy6bytes
    348 
    349     ldr     r3, [r1], #4
    350     pld     [r1, #64]
    351 
    352     tst     r3, #0xff0000
    353     beq     .Lstringcopy_unalign6_copy7bytes
    354     lsrs    ip, r3, #24
    355     stmia   r0!, {r2, r3}
    356 #if defined(STPCPY)
    357     beq     .Lstringcopy_finish
    358 #else
    359     beq     .Lstringcopy_unalign_return
    360 #endif
    361     b       .Lstringcopy_unalign6
    362 
    363 .Lstringcopy_unalign6_copy7bytes:
    364     stmia   r0!, {r2}
    365     strh    r3, [r0], #2
    366     lsr     r3, #16
    367     strb    r3, [r0]
    368     m_ret   inst=pop
    369 
    370     .p2align 2
    371     // Can read 5 bytes before possibly crossing a page.
    372 .Lstringcopy_unalign5:
    373     ldr     r2, [r1], #4
    374 
    375     sub     ip, r2, #0x01010101
    376     bic     ip, ip, r2
    377     ands    ip, ip, #0x80808080
    378     bne     .Lstringcopy_zero_in_first_register
    379 
    380     ldrb    r4, [r1]
    381     cbz     r4, .Lstringcopy_unalign_copy5bytes
    382 
    383     ldr     r3, [r1], #4
    384 
    385     pld     [r1, #64]
    386 
    387     sub     ip, r3, #0x01010101
    388     bic     ip, ip, r3
    389     ands    ip, ip, #0x80808080
    390     bne     .Lstringcopy_zero_in_second_register
    391 
    392     stmia   r0!, {r2, r3}
    393     b       .Lstringcopy_unalign5
    394 
    395 .Lstringcopy_unalign_copy5bytes:
    396     stmia   r0!, {r2}
    397     strb    r4, [r0]
    398     m_ret   inst=pop
    399 
    400 .Lstringcopy_unalign_copy6bytes:
    401     stmia   r0!, {r2}
    402     strb    r4, [r0], #1
    403     strb    r5, [r0]
    404     m_ret   inst=pop
    405 
    406     .p2align 2
    407     // Can read 4 bytes before possibly crossing a page.
    408 .Lstringcopy_unalign4:
    409     ldmia   r1!, {r2}
    410 
    411     sub     ip, r2, #0x01010101
    412     bic     ip, ip, r2
    413     ands    ip, ip, #0x80808080
    414     bne     .Lstringcopy_zero_in_first_register
    415 
    416     ldmia   r1!, {r3}
    417     pld     [r1, #64]
    418 
    419     sub     ip, r3, #0x01010101
    420     bic     ip, ip, r3
    421     ands    ip, ip, #0x80808080
    422     bne     .Lstringcopy_zero_in_second_register
    423 
    424     stmia   r0!, {r2, r3}
    425     b       .Lstringcopy_unalign4
    426 
    427     .p2align 2
    428     // Can read 3 bytes before possibly crossing a page.
    429 .Lstringcopy_unalign3:
    430     ldrb    r2, [r1]
    431     cbz     r2, .Lstringcopy_unalign3_copy1byte
    432     ldrb    r3, [r1, #1]
    433     cbz     r3, .Lstringcopy_unalign3_copy2bytes
    434     ldrb    r4, [r1, #2]
    435     cbz     r4, .Lstringcopy_unalign3_copy3bytes
    436 
    437     ldr     r2, [r1], #4
    438     ldr     r3, [r1], #4
    439 
    440     pld     [r1, #64]
    441 
    442     lsrs    lr, r2, #24
    443     beq     .Lstringcopy_unalign_copy4bytes
    444 
    445     sub     ip, r3, #0x01010101
    446     bic     ip, ip, r3
    447     ands    ip, ip, #0x80808080
    448     bne     .Lstringcopy_zero_in_second_register
    449 
    450     stmia   r0!, {r2, r3}
    451     b       .Lstringcopy_unalign3
    452 
    453 .Lstringcopy_unalign3_copy1byte:
    454     strb    r2, [r0]
    455     m_ret   inst=pop
    456 
    457 .Lstringcopy_unalign3_copy2bytes:
    458     strb    r2, [r0], #1
    459     strb    r3, [r0]
    460     m_ret   inst=pop
    461 
    462 .Lstringcopy_unalign3_copy3bytes:
    463     strb    r2, [r0], #1
    464     strb    r3, [r0], #1
    465     strb    r4, [r0]
    466     m_ret   inst=pop
    467 
    468     .p2align 2
    469     // Can read 2 bytes before possibly crossing a page.
    470 .Lstringcopy_unalign2:
    471     ldrb    r2, [r1]
    472     cbz     r2, .Lstringcopy_unalign_copy1byte
    473     ldrb    r3, [r1, #1]
    474     cbz     r3, .Lstringcopy_unalign_copy2bytes
    475 
    476     ldr     r2, [r1], #4
    477     ldr     r3, [r1], #4
    478     pld     [r1, #64]
    479 
    480     tst     r2, #0xff0000
    481     beq     .Lstringcopy_unalign_copy3bytes
    482     lsrs    ip, r2, #24
    483     beq     .Lstringcopy_unalign_copy4bytes
    484 
    485     sub     ip, r3, #0x01010101
    486     bic     ip, ip, r3
    487     ands    ip, ip, #0x80808080
    488     bne     .Lstringcopy_zero_in_second_register
    489 
    490     stmia   r0!, {r2, r3}
    491     b       .Lstringcopy_unalign2
    492 
    493     .p2align 2
    494     // Can read 1 byte before possibly crossing a page.
    495 .Lstringcopy_unalign1:
    496     ldrb    r2, [r1]
    497     cbz     r2, .Lstringcopy_unalign_copy1byte
    498 
    499     ldr     r2, [r1], #4
    500     ldr     r3, [r1], #4
    501 
    502     pld     [r1, #64]
    503 
    504     sub     ip, r2, #0x01010101
    505     bic     ip, ip, r2
    506     ands    ip, ip, #0x80808080
    507     bne     .Lstringcopy_zero_in_first_register
    508 
    509     sub     ip, r3, #0x01010101
    510     bic     ip, ip, r3
    511     ands    ip, ip, #0x80808080
    512     bne     .Lstringcopy_zero_in_second_register
    513 
    514     stmia   r0!, {r2, r3}
    515     b       .Lstringcopy_unalign1
    516 
    517 .Lstringcopy_unalign_copy1byte:
    518     strb    r2, [r0]
    519     m_ret   inst=pop
    520 
    521 .Lstringcopy_unalign_copy2bytes:
    522     strb    r2, [r0], #1
    523     strb    r3, [r0]
    524     m_ret   inst=pop
    525 
    526 .Lstringcopy_unalign_copy3bytes:
    527     strh    r2, [r0], #2
    528     lsr     r2, #16
    529     strb    r2, [r0]
    530     m_ret   inst=pop
    531 
    532 .Lstringcopy_unalign_copy4bytes:
    533     stmia   r0, {r2}
    534 #if defined(STPCPY)
    535     add     r0, r0, #3
    536 #endif
    537     m_ret   inst=pop
    538 #if defined(STPCPY)
    539 END(stpcpy)
    540 #else
    541 END(strcpy)
    542 #endif
    543