Home | History | Annotate | Download | only in bionic
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 /*
     29  * Copyright (c) 2013 ARM Ltd
     30  * All rights reserved.
     31  *
     32  * Redistribution and use in source and binary forms, with or without
     33  * modification, are permitted provided that the following conditions
     34  * are met:
     35  * 1. Redistributions of source code must retain the above copyright
     36  *    notice, this list of conditions and the following disclaimer.
     37  * 2. Redistributions in binary form must reproduce the above copyright
     38  *    notice, this list of conditions and the following disclaimer in the
     39  *    documentation and/or other materials provided with the distribution.
     40  * 3. The name of the company may not be used to endorse or promote
     41  *    products derived from this software without specific prior written
     42  *    permission.
     43  *
     44  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
     45  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     46  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     47  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     48  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     49  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     50  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     51  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     52  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     53  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     54  */
     55 
     56 #if !defined(STPCPY) && !defined(STRCPY)
     57 #error "Either STPCPY or STRCPY must be defined."
     58 #endif
     59 
     60 #include <private/bionic_asm.h>
     61 
     62     .syntax unified
     63 
     64     .thumb
     65     .thumb_func
     66 
     67 #if defined(STPCPY)
     68     .macro m_push
     69     push    {r4, r5, lr}
     70     .cfi_def_cfa_offset 12
     71     .cfi_rel_offset r4, 0
     72     .cfi_rel_offset r5, 4
     73     .cfi_rel_offset lr, 8
     74     .endm // m_push
     75 #else
     76     .macro m_push
     77     push    {r0, r4, r5, lr}
     78     .cfi_def_cfa_offset 16
     79     .cfi_rel_offset r0, 0
     80     .cfi_rel_offset r4, 4
     81     .cfi_rel_offset r5, 8
     82     .cfi_rel_offset lr, 12
     83     .endm // m_push
     84 #endif
     85 
     86 #if defined(STPCPY)
     87     .macro m_ret inst
     88     \inst   {r4, r5, pc}
     89     .endm // m_ret
     90 #else
     91     .macro m_ret inst
     92     \inst   {r0, r4, r5, pc}
     93     .endm // m_ret
     94 #endif
     95 
     96     .macro m_copy_byte reg, cmd, label
     97     ldrb    \reg, [r1], #1
     98     strb    \reg, [r0], #1
     99     \cmd    \reg, \label
    100     .endm // m_copy_byte
    101 
    102 #if defined(STPCPY)
    103 ENTRY(stpcpy)
    104 #else
    105 ENTRY(strcpy)
    106 #endif
    107     // Unroll the first 8 bytes that will be copied.
    108     m_push
    109     m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
    110     m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
    111     m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
    112     m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish
    113     m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish
    114     m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish
    115     m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish
    116     m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue
    117 
    118 .Lstringcopy_finish:
    119 #if defined(STPCPY)
    120     sub     r0, r0, #1
    121 #endif
    122     m_ret   inst=pop
    123 
    124 .Lstringcopy_continue:
    125     pld     [r1, #0]
    126     ands    r3, r0, #7
    127     bne     .Lstringcopy_align_dst
    128 
    129 .Lstringcopy_check_src_align:
    130     // At this point dst is aligned to a double word, check if src
    131     // is also aligned to a double word.
    132     ands    r3, r1, #7
    133     bne     .Lstringcopy_unaligned_copy
    134 
    135     .p2align 2
    136 .Lstringcopy_mainloop:
    137     ldmia   r1!, {r2, r3}
    138 
    139     pld     [r1, #64]
    140 
    141     sub     ip, r2, #0x01010101
    142     bic     ip, ip, r2
    143     ands    ip, ip, #0x80808080
    144     bne     .Lstringcopy_zero_in_first_register
    145 
    146     sub     ip, r3, #0x01010101
    147     bic     ip, ip, r3
    148     ands    ip, ip, #0x80808080
    149     bne     .Lstringcopy_zero_in_second_register
    150 
    151     stmia   r0!, {r2, r3}
    152     b       .Lstringcopy_mainloop
    153 
    154 .Lstringcopy_zero_in_first_register:
    155     lsls    lr, ip, #17
    156     itt     ne
    157     strbne  r2, [r0]
    158     m_ret   inst=popne
    159     itt     cs
    160 #if defined(STPCPY)
    161     strhcs  r2, [r0], #1
    162 #else
    163     strhcs  r2, [r0]
    164 #endif
    165     m_ret   inst=popcs
    166     lsls    ip, ip, #1
    167     itt     eq
    168 #if defined(STPCPY)
    169     streq   r2, [r0], #3
    170 #else
    171     streq   r2, [r0]
    172 #endif
    173     m_ret   inst=popeq
    174     strh    r2, [r0], #2
    175     lsr     r3, r2, #16
    176     strb    r3, [r0]
    177     m_ret   inst=pop
    178 
    179 .Lstringcopy_zero_in_second_register:
    180     lsls    lr, ip, #17
    181     ittt    ne
    182     stmiane r0!, {r2}
    183     strbne  r3, [r0]
    184     m_ret   inst=popne
    185     ittt    cs
    186     strcs   r2, [r0], #4
    187 #if defined(STPCPY)
    188     strhcs  r3, [r0], #1
    189 #else
    190     strhcs  r3, [r0]
    191 #endif
    192     m_ret   inst=popcs
    193     lsls    ip, ip, #1
    194 #if defined(STPCPY)
    195     ittt    eq
    196 #else
    197     itt     eq
    198 #endif
    199     stmiaeq r0, {r2, r3}
    200 #if defined(STPCPY)
    201     addeq   r0, r0, #7
    202 #endif
    203     m_ret   inst=popeq
    204     stmia   r0!, {r2}
    205     strh    r3, [r0], #2
    206     lsr     r4, r3, #16
    207     strb    r4, [r0]
    208     m_ret   inst=pop
    209 
    210 .Lstringcopy_align_dst:
    211     // Align to a double word (64 bits).
    212     rsb     r3, r3, #8
    213     lsls    ip, r3, #31
    214     beq     .Lstringcopy_align_to_32
    215 
    216     ldrb    r2, [r1], #1
    217     strb    r2, [r0], #1
    218     cbz     r2, .Lstringcopy_complete
    219 
    220 .Lstringcopy_align_to_32:
    221     bcc     .Lstringcopy_align_to_64
    222 
    223     ldrb    r4, [r1], #1
    224     strb    r4, [r0], #1
    225     cmp     r4, #0
    226 #if defined(STPCPY)
    227     itt     eq
    228     subeq   r0, r0, #1
    229 #else
    230     it      eq
    231 #endif
    232     m_ret   inst=popeq
    233     ldrb    r5, [r1], #1
    234     strb    r5, [r0], #1
    235     cmp     r5, #0
    236 #if defined(STPCPY)
    237     itt     eq
    238     subeq   r0, r0, #1
    239 #else
    240     it      eq
    241 #endif
    242     m_ret   inst=popeq
    243 
    244 .Lstringcopy_align_to_64:
    245     tst     r3, #4
    246     beq     .Lstringcopy_check_src_align
    247     ldr     r2, [r1], #4
    248 
    249     sub     ip, r2, #0x01010101
    250     bic     ip, ip, r2
    251     ands    ip, ip, #0x80808080
    252     bne     .Lstringcopy_zero_in_first_register
    253     stmia   r0!, {r2}
    254     b       .Lstringcopy_check_src_align
    255 
    256 .Lstringcopy_complete:
    257 #if defined(STPCPY)
    258     sub     r0, r0, #1
    259 #endif
    260     m_ret   inst=pop
    261 
    262 .Lstringcopy_unaligned_copy:
    263     // Dst is aligned to a double word, while src is at an unknown alignment.
    264     // There are 7 different versions of the unaligned copy code
    265     // to prevent overreading the src. The mainloop of every single version
    266     // will store 64 bits per loop. The difference is how much of src can
    267     // be read without potentially crossing a page boundary.
    268     tbb     [pc, r3]
    269 .Lstringcopy_unaligned_branchtable:
    270     .byte 0
    271     .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2)
    272     .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2)
    273     .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2)
    274     .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2)
    275     .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2)
    276     .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2)
    277     .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2)
    278 
    279     .p2align 2
    280     // Can read 7 bytes before possibly crossing a page.
    281 .Lstringcopy_unalign7:
    282     ldr     r2, [r1], #4
    283 
    284     sub     ip, r2, #0x01010101
    285     bic     ip, ip, r2
    286     ands    ip, ip, #0x80808080
    287     bne     .Lstringcopy_zero_in_first_register
    288 
    289     ldrb    r3, [r1]
    290     cbz     r3, .Lstringcopy_unalign7_copy5bytes
    291     ldrb    r4, [r1, #1]
    292     cbz     r4, .Lstringcopy_unalign7_copy6bytes
    293     ldrb    r5, [r1, #2]
    294     cbz     r5, .Lstringcopy_unalign7_copy7bytes
    295 
    296     ldr     r3, [r1], #4
    297     pld     [r1, #64]
    298 
    299     lsrs    ip, r3, #24
    300     stmia   r0!, {r2, r3}
    301 #if defined(STPCPY)
    302     beq     .Lstringcopy_finish
    303 #else
    304     beq     .Lstringcopy_unalign_return
    305 #endif
    306     b       .Lstringcopy_unalign7
    307 
    308 .Lstringcopy_unalign7_copy5bytes:
    309     stmia   r0!, {r2}
    310     strb    r3, [r0]
    311 .Lstringcopy_unalign_return:
    312     m_ret   inst=pop
    313 
    314 .Lstringcopy_unalign7_copy6bytes:
    315     stmia   r0!, {r2}
    316     strb    r3, [r0], #1
    317     strb    r4, [r0]
    318     m_ret   inst=pop
    319 
    320 .Lstringcopy_unalign7_copy7bytes:
    321     stmia   r0!, {r2}
    322     strb    r3, [r0], #1
    323     strb    r4, [r0], #1
    324     strb    r5, [r0]
    325     m_ret   inst=pop
    326 
    327     .p2align 2
    328     // Can read 6 bytes before possibly crossing a page.
    329 .Lstringcopy_unalign6:
    330     ldr     r2, [r1], #4
    331 
    332     sub     ip, r2, #0x01010101
    333     bic     ip, ip, r2
    334     ands    ip, ip, #0x80808080
    335     bne     .Lstringcopy_zero_in_first_register
    336 
    337     ldrb    r4, [r1]
    338     cbz     r4, .Lstringcopy_unalign_copy5bytes
    339     ldrb    r5, [r1, #1]
    340     cbz     r5, .Lstringcopy_unalign_copy6bytes
    341 
    342     ldr     r3, [r1], #4
    343     pld     [r1, #64]
    344 
    345     tst     r3, #0xff0000
    346     beq     .Lstringcopy_unalign6_copy7bytes
    347     lsrs    ip, r3, #24
    348     stmia   r0!, {r2, r3}
    349 #if defined(STPCPY)
    350     beq     .Lstringcopy_finish
    351 #else
    352     beq     .Lstringcopy_unalign_return
    353 #endif
    354     b       .Lstringcopy_unalign6
    355 
    356 .Lstringcopy_unalign6_copy7bytes:
    357     stmia   r0!, {r2}
    358     strh    r3, [r0], #2
    359     lsr     r3, #16
    360     strb    r3, [r0]
    361     m_ret   inst=pop
    362 
    363     .p2align 2
    364     // Can read 5 bytes before possibly crossing a page.
    365 .Lstringcopy_unalign5:
    366     ldr     r2, [r1], #4
    367 
    368     sub     ip, r2, #0x01010101
    369     bic     ip, ip, r2
    370     ands    ip, ip, #0x80808080
    371     bne     .Lstringcopy_zero_in_first_register
    372 
    373     ldrb    r4, [r1]
    374     cbz     r4, .Lstringcopy_unalign_copy5bytes
    375 
    376     ldr     r3, [r1], #4
    377 
    378     pld     [r1, #64]
    379 
    380     sub     ip, r3, #0x01010101
    381     bic     ip, ip, r3
    382     ands    ip, ip, #0x80808080
    383     bne     .Lstringcopy_zero_in_second_register
    384 
    385     stmia   r0!, {r2, r3}
    386     b       .Lstringcopy_unalign5
    387 
    388 .Lstringcopy_unalign_copy5bytes:
    389     stmia   r0!, {r2}
    390     strb    r4, [r0]
    391     m_ret   inst=pop
    392 
    393 .Lstringcopy_unalign_copy6bytes:
    394     stmia   r0!, {r2}
    395     strb    r4, [r0], #1
    396     strb    r5, [r0]
    397     m_ret   inst=pop
    398 
    399     .p2align 2
    400     // Can read 4 bytes before possibly crossing a page.
    401 .Lstringcopy_unalign4:
    402     ldmia   r1!, {r2}
    403 
    404     sub     ip, r2, #0x01010101
    405     bic     ip, ip, r2
    406     ands    ip, ip, #0x80808080
    407     bne     .Lstringcopy_zero_in_first_register
    408 
    409     ldmia   r1!, {r3}
    410     pld     [r1, #64]
    411 
    412     sub     ip, r3, #0x01010101
    413     bic     ip, ip, r3
    414     ands    ip, ip, #0x80808080
    415     bne     .Lstringcopy_zero_in_second_register
    416 
    417     stmia   r0!, {r2, r3}
    418     b       .Lstringcopy_unalign4
    419 
    420     .p2align 2
    421     // Can read 3 bytes before possibly crossing a page.
    422 .Lstringcopy_unalign3:
    423     ldrb    r2, [r1]
    424     cbz     r2, .Lstringcopy_unalign3_copy1byte
    425     ldrb    r3, [r1, #1]
    426     cbz     r3, .Lstringcopy_unalign3_copy2bytes
    427     ldrb    r4, [r1, #2]
    428     cbz     r4, .Lstringcopy_unalign3_copy3bytes
    429 
    430     ldr     r2, [r1], #4
    431     ldr     r3, [r1], #4
    432 
    433     pld     [r1, #64]
    434 
    435     lsrs    lr, r2, #24
    436     beq     .Lstringcopy_unalign_copy4bytes
    437 
    438     sub     ip, r3, #0x01010101
    439     bic     ip, ip, r3
    440     ands    ip, ip, #0x80808080
    441     bne     .Lstringcopy_zero_in_second_register
    442 
    443     stmia   r0!, {r2, r3}
    444     b       .Lstringcopy_unalign3
    445 
    446 .Lstringcopy_unalign3_copy1byte:
    447     strb    r2, [r0]
    448     m_ret   inst=pop
    449 
    450 .Lstringcopy_unalign3_copy2bytes:
    451     strb    r2, [r0], #1
    452     strb    r3, [r0]
    453     m_ret   inst=pop
    454 
    455 .Lstringcopy_unalign3_copy3bytes:
    456     strb    r2, [r0], #1
    457     strb    r3, [r0], #1
    458     strb    r4, [r0]
    459     m_ret   inst=pop
    460 
    461     .p2align 2
    462     // Can read 2 bytes before possibly crossing a page.
    463 .Lstringcopy_unalign2:
    464     ldrb    r2, [r1]
    465     cbz     r2, .Lstringcopy_unalign_copy1byte
    466     ldrb    r3, [r1, #1]
    467     cbz     r3, .Lstringcopy_unalign_copy2bytes
    468 
    469     ldr     r2, [r1], #4
    470     ldr     r3, [r1], #4
    471     pld     [r1, #64]
    472 
    473     tst     r2, #0xff0000
    474     beq     .Lstringcopy_unalign_copy3bytes
    475     lsrs    ip, r2, #24
    476     beq     .Lstringcopy_unalign_copy4bytes
    477 
    478     sub     ip, r3, #0x01010101
    479     bic     ip, ip, r3
    480     ands    ip, ip, #0x80808080
    481     bne     .Lstringcopy_zero_in_second_register
    482 
    483     stmia   r0!, {r2, r3}
    484     b       .Lstringcopy_unalign2
    485 
    486     .p2align 2
    487     // Can read 1 byte before possibly crossing a page.
    488 .Lstringcopy_unalign1:
    489     ldrb    r2, [r1]
    490     cbz     r2, .Lstringcopy_unalign_copy1byte
    491 
    492     ldr     r2, [r1], #4
    493     ldr     r3, [r1], #4
    494 
    495     pld     [r1, #64]
    496 
    497     sub     ip, r2, #0x01010101
    498     bic     ip, ip, r2
    499     ands    ip, ip, #0x80808080
    500     bne     .Lstringcopy_zero_in_first_register
    501 
    502     sub     ip, r3, #0x01010101
    503     bic     ip, ip, r3
    504     ands    ip, ip, #0x80808080
    505     bne     .Lstringcopy_zero_in_second_register
    506 
    507     stmia   r0!, {r2, r3}
    508     b       .Lstringcopy_unalign1
    509 
    510 .Lstringcopy_unalign_copy1byte:
    511     strb    r2, [r0]
    512     m_ret   inst=pop
    513 
    514 .Lstringcopy_unalign_copy2bytes:
    515     strb    r2, [r0], #1
    516     strb    r3, [r0]
    517     m_ret   inst=pop
    518 
    519 .Lstringcopy_unalign_copy3bytes:
    520     strh    r2, [r0], #2
    521     lsr     r2, #16
    522     strb    r2, [r0]
    523     m_ret   inst=pop
    524 
    525 .Lstringcopy_unalign_copy4bytes:
    526     stmia   r0, {r2}
    527 #if defined(STPCPY)
    528     add     r0, r0, #3
    529 #endif
    530     m_ret   inst=pop
    531 #if defined(STPCPY)
    532 END(stpcpy)
    533 #else
    534 END(strcpy)
    535 #endif
    536