Home | History | Annotate | Download | only in bionic
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 /*
     29  * Copyright (c) 2013 ARM Ltd
     30  * All rights reserved.
     31  *
     32  * Redistribution and use in source and binary forms, with or without
     33  * modification, are permitted provided that the following conditions
     34  * are met:
     35  * 1. Redistributions of source code must retain the above copyright
     36  *    notice, this list of conditions and the following disclaimer.
     37  * 2. Redistributions in binary form must reproduce the above copyright
     38  *    notice, this list of conditions and the following disclaimer in the
     39  *    documentation and/or other materials provided with the distribution.
     40  * 3. The name of the company may not be used to endorse or promote
     41  *    products derived from this software without specific prior written
     42  *    permission.
     43  *
     44  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
     45  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     46  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     47  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     48  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     49  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     50  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     51  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     52  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     53  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     54  */
     55 
     56 #include <private/bionic_asm.h>
     57 
     58     .syntax unified
     59 
     60     .thumb
     61     .thumb_func
     62 
     63     .macro m_push
     64     push    {r0, r4, r5, lr}
     65     .endm // m_push
     66 
     67     .macro m_ret inst
     68     \inst   {r0, r4, r5, pc}
     69     .endm // m_ret
     70 
     71     .macro m_scan_byte
     72     ldrb    r3, [r0]
     73     cbz     r3, .Lstrcat_r0_scan_done
     74     add     r0, #1
     75     .endm // m_scan_byte
     76 
     77     .macro m_copy_byte reg, cmd, label
     78     ldrb    \reg, [r1], #1
     79     strb    \reg, [r0], #1
     80     \cmd    \reg, \label
     81     .endm // m_copy_byte
     82 
     83 ENTRY(strcat)
     84     // Quick check to see if src is empty.
     85     ldrb        r2, [r1]
     86     pld         [r1, #0]
     87     cbnz        r2, .Lstrcat_continue
     88     bx          lr
     89 
     90 .Lstrcat_continue:
     91     // To speed up really small dst strings, unroll checking the first 4 bytes.
     92     m_push
     93     m_scan_byte
     94     m_scan_byte
     95     m_scan_byte
     96     m_scan_byte
     97 
     98     ands    r3, r0, #7
     99     bne     .Lstrcat_align_src
    100 
    101     .p2align 2
    102 .Lstrcat_mainloop:
    103     ldmia   r0!, {r2, r3}
    104 
    105     pld     [r0, #64]
    106 
    107     sub     ip, r2, #0x01010101
    108     bic     ip, ip, r2
    109     ands    ip, ip, #0x80808080
    110     bne     .Lstrcat_zero_in_first_register
    111 
    112     sub     ip, r3, #0x01010101
    113     bic     ip, ip, r3
    114     ands    ip, ip, #0x80808080
    115     bne     .Lstrcat_zero_in_second_register
    116     b       .Lstrcat_mainloop
    117 
    118 .Lstrcat_zero_in_first_register:
    119     sub     r0, r0, #4
    120 
    121 .Lstrcat_zero_in_second_register:
    122     // Check for zero in byte 0.
    123     tst     ip, #0x80
    124     it      ne
    125     subne   r0, r0, #4
    126     bne     .Lstrcat_r0_scan_done
    127     // Check for zero in byte 1.
    128     tst     ip, #0x8000
    129     it      ne
    130     subne   r0, r0, #3
    131     bne     .Lstrcat_r0_scan_done
    132     // Check for zero in byte 2.
    133     tst     ip, #0x800000
    134     it      ne
    135     subne   r0, r0, #2
    136     it      eq
    137     // Zero is in byte 3.
    138     subeq   r0, r0, #1
    139 
    140 .Lstrcat_r0_scan_done:
    141     // Unroll the first 8 bytes that will be copied.
    142     m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
    143     m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
    144     m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
    145     m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish
    146     m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish
    147     m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish
    148     m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish
    149     m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue
    150 
    151 .Lstrcpy_finish:
    152     m_ret   inst=pop
    153 
    154 .Lstrcpy_continue:
    155     pld     [r1, #0]
    156     ands    r3, r0, #7
    157     bne     .Lstrcpy_align_dst
    158 
    159 .Lstrcpy_check_src_align:
    160     // At this point dst is aligned to a double word, check if src
    161     // is also aligned to a double word.
    162     ands    r3, r1, #7
    163     bne     .Lstrcpy_unaligned_copy
    164 
    165     .p2align 2
    166 .Lstrcpy_mainloop:
    167     ldmia   r1!, {r2, r3}
    168 
    169     pld     [r1, #64]
    170 
    171     sub     ip, r2, #0x01010101
    172     bic     ip, ip, r2
    173     ands    ip, ip, #0x80808080
    174     bne     .Lstrcpy_zero_in_first_register
    175 
    176     sub     ip, r3, #0x01010101
    177     bic     ip, ip, r3
    178     ands    ip, ip, #0x80808080
    179     bne     .Lstrcpy_zero_in_second_register
    180 
    181     stmia   r0!, {r2, r3}
    182     b       .Lstrcpy_mainloop
    183 
    184 .Lstrcpy_zero_in_first_register:
    185     lsls    lr, ip, #17
    186     itt     ne
    187     strbne  r2, [r0]
    188     m_ret   inst=popne
    189     itt     cs
    190     strhcs  r2, [r0]
    191     m_ret   inst=popcs
    192     lsls    ip, ip, #1
    193     itt     eq
    194     streq   r2, [r0]
    195     m_ret   inst=popeq
    196     strh    r2, [r0], #2
    197     lsr     r3, r2, #16
    198     strb    r3, [r0]
    199     m_ret   inst=pop
    200 
    201 .Lstrcpy_zero_in_second_register:
    202     lsls    lr, ip, #17
    203     ittt    ne
    204     stmiane r0!, {r2}
    205     strbne  r3, [r0]
    206     m_ret   inst=popne
    207     ittt    cs
    208     strcs   r2, [r0], #4
    209     strhcs  r3, [r0]
    210     m_ret   inst=popcs
    211     lsls    ip, ip, #1
    212     itt     eq
    213     stmiaeq r0, {r2, r3}
    214     m_ret   inst=popeq
    215     stmia   r0!, {r2}
    216     strh    r3, [r0], #2
    217     lsr     r4, r3, #16
    218     strb    r4, [r0]
    219     m_ret   inst=pop
    220 
    221 .Lstrcpy_align_dst:
    222     // Align to a double word (64 bits).
    223     rsb     r3, r3, #8
    224     lsls    ip, r3, #31
    225     beq     .Lstrcpy_align_to_32
    226 
    227     ldrb    r2, [r1], #1
    228     strb    r2, [r0], #1
    229     cbz     r2, .Lstrcpy_complete
    230 
    231 .Lstrcpy_align_to_32:
    232     bcc     .Lstrcpy_align_to_64
    233 
    234     ldrb    r4, [r1], #1
    235     strb    r4, [r0], #1
    236     cmp     r4, #0
    237     it      eq
    238     m_ret   inst=popeq
    239     ldrb    r5, [r1], #1
    240     strb    r5, [r0], #1
    241     cmp     r5, #0
    242     it      eq
    243     m_ret   inst=popeq
    244 
    245 .Lstrcpy_align_to_64:
    246     tst     r3, #4
    247     beq     .Lstrcpy_check_src_align
    248     // Read one byte at a time since we don't know the src alignment
    249     // and we don't want to read into a different page.
    250     ldrb    r4, [r1], #1
    251     strb    r4, [r0], #1
    252     cbz     r4, .Lstrcpy_complete
    253     ldrb    r5, [r1], #1
    254     strb    r5, [r0], #1
    255     cbz     r5, .Lstrcpy_complete
    256     ldrb    r4, [r1], #1
    257     strb    r4, [r0], #1
    258     cbz     r4, .Lstrcpy_complete
    259     ldrb    r5, [r1], #1
    260     strb    r5, [r0], #1
    261     cbz     r5, .Lstrcpy_complete
    262     b       .Lstrcpy_check_src_align
    263 
    264 .Lstrcpy_complete:
    265     m_ret   inst=pop
    266 
    267 .Lstrcpy_unaligned_copy:
    268     // Dst is aligned to a double word, while src is at an unknown alignment.
    269     // There are 7 different versions of the unaligned copy code
    270     // to prevent overreading the src. The mainloop of every single version
    271     // will store 64 bits per loop. The difference is how much of src can
    272     // be read without potentially crossing a page boundary.
    273     tbb     [pc, r3]
    274 .Lstrcpy_unaligned_branchtable:
    275     .byte 0
    276     .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2)
    277     .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2)
    278     .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2)
    279     .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2)
    280     .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2)
    281     .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2)
    282     .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2)
    283 
    284     .p2align 2
    285     // Can read 7 bytes before possibly crossing a page.
    286 .Lstrcpy_unalign7:
    287     ldr     r2, [r1], #4
    288 
    289     sub     ip, r2, #0x01010101
    290     bic     ip, ip, r2
    291     ands    ip, ip, #0x80808080
    292     bne     .Lstrcpy_zero_in_first_register
    293 
    294     ldrb    r3, [r1]
    295     cbz     r3, .Lstrcpy_unalign7_copy5bytes
    296     ldrb    r4, [r1, #1]
    297     cbz     r4, .Lstrcpy_unalign7_copy6bytes
    298     ldrb    r5, [r1, #2]
    299     cbz     r5, .Lstrcpy_unalign7_copy7bytes
    300 
    301     ldr     r3, [r1], #4
    302     pld     [r1, #64]
    303 
    304     lsrs    ip, r3, #24
    305     stmia   r0!, {r2, r3}
    306     beq     .Lstrcpy_unalign_return
    307     b       .Lstrcpy_unalign7
    308 
    309 .Lstrcpy_unalign7_copy5bytes:
    310     stmia   r0!, {r2}
    311     strb    r3, [r0]
    312 .Lstrcpy_unalign_return:
    313     m_ret   inst=pop
    314 
    315 .Lstrcpy_unalign7_copy6bytes:
    316     stmia   r0!, {r2}
    317     strb    r3, [r0], #1
    318     strb    r4, [r0], #1
    319     m_ret   inst=pop
    320 
    321 .Lstrcpy_unalign7_copy7bytes:
    322     stmia   r0!, {r2}
    323     strb    r3, [r0], #1
    324     strb    r4, [r0], #1
    325     strb    r5, [r0], #1
    326     m_ret   inst=pop
    327 
    328     .p2align 2
    329     // Can read 6 bytes before possibly crossing a page.
    330 .Lstrcpy_unalign6:
    331     ldr     r2, [r1], #4
    332 
    333     sub     ip, r2, #0x01010101
    334     bic     ip, ip, r2
    335     ands    ip, ip, #0x80808080
    336     bne     .Lstrcpy_zero_in_first_register
    337 
    338     ldrb    r4, [r1]
    339     cbz     r4, .Lstrcpy_unalign_copy5bytes
    340     ldrb    r5, [r1, #1]
    341     cbz     r5, .Lstrcpy_unalign_copy6bytes
    342 
    343     ldr     r3, [r1], #4
    344     pld     [r1, #64]
    345 
    346     tst     r3, #0xff0000
    347     beq     .Lstrcpy_unalign6_copy7bytes
    348     lsrs    ip, r3, #24
    349     stmia   r0!, {r2, r3}
    350     beq     .Lstrcpy_unalign_return
    351     b       .Lstrcpy_unalign6
    352 
    353 .Lstrcpy_unalign6_copy7bytes:
    354     stmia   r0!, {r2}
    355     strh    r3, [r0], #2
    356     lsr     r3, #16
    357     strb    r3, [r0]
    358     m_ret   inst=pop
    359 
    360     .p2align 2
    361     // Can read 5 bytes before possibly crossing a page.
    362 .Lstrcpy_unalign5:
    363     ldr     r2, [r1], #4
    364 
    365     sub     ip, r2, #0x01010101
    366     bic     ip, ip, r2
    367     ands    ip, ip, #0x80808080
    368     bne     .Lstrcpy_zero_in_first_register
    369 
    370     ldrb    r4, [r1]
    371     cbz     r4, .Lstrcpy_unalign_copy5bytes
    372 
    373     ldr     r3, [r1], #4
    374 
    375     pld     [r1, #64]
    376 
    377     sub     ip, r3, #0x01010101
    378     bic     ip, ip, r3
    379     ands    ip, ip, #0x80808080
    380     bne     .Lstrcpy_zero_in_second_register
    381 
    382     stmia   r0!, {r2, r3}
    383     b       .Lstrcpy_unalign5
    384 
    385 .Lstrcpy_unalign_copy5bytes:
    386     stmia   r0!, {r2}
    387     strb    r4, [r0]
    388     m_ret   inst=pop
    389 
    390 .Lstrcpy_unalign_copy6bytes:
    391     stmia   r0!, {r2}
    392     strb    r4, [r0], #1
    393     strb    r5, [r0]
    394     m_ret   inst=pop
    395 
    396     .p2align 2
    397     // Can read 4 bytes before possibly crossing a page.
    398 .Lstrcpy_unalign4:
    399     ldmia   r1!, {r2}
    400 
    401     sub     ip, r2, #0x01010101
    402     bic     ip, ip, r2
    403     ands    ip, ip, #0x80808080
    404     bne     .Lstrcpy_zero_in_first_register
    405 
    406     ldmia   r1!, {r3}
    407     pld     [r1, #64]
    408 
    409     sub     ip, r3, #0x01010101
    410     bic     ip, ip, r3
    411     ands    ip, ip, #0x80808080
    412     bne     .Lstrcpy_zero_in_second_register
    413 
    414     stmia   r0!, {r2, r3}
    415     b       .Lstrcpy_unalign4
    416 
    417     .p2align 2
    418     // Can read 3 bytes before possibly crossing a page.
    419 .Lstrcpy_unalign3:
    420     ldrb    r2, [r1]
    421     cbz     r2, .Lstrcpy_unalign3_copy1byte
    422     ldrb    r3, [r1, #1]
    423     cbz     r3, .Lstrcpy_unalign3_copy2bytes
    424     ldrb    r4, [r1, #2]
    425     cbz     r4, .Lstrcpy_unalign3_copy3bytes
    426 
    427     ldr     r2, [r1], #4
    428     ldr     r3, [r1], #4
    429 
    430     pld     [r1, #64]
    431 
    432     lsrs    lr, r2, #24
    433     beq     .Lstrcpy_unalign_copy4bytes
    434 
    435     sub     ip, r3, #0x01010101
    436     bic     ip, ip, r3
    437     ands    ip, ip, #0x80808080
    438     bne     .Lstrcpy_zero_in_second_register
    439 
    440     stmia   r0!, {r2, r3}
    441     b       .Lstrcpy_unalign3
    442 
    443 .Lstrcpy_unalign3_copy1byte:
    444     strb    r2, [r0]
    445     m_ret   inst=pop
    446 
    447 .Lstrcpy_unalign3_copy2bytes:
    448     strb    r2, [r0], #1
    449     strb    r3, [r0]
    450     m_ret   inst=pop
    451 
    452 .Lstrcpy_unalign3_copy3bytes:
    453     strb    r2, [r0], #1
    454     strb    r3, [r0], #1
    455     strb    r4, [r0]
    456     m_ret   inst=pop
    457 
    458     .p2align 2
    459     // Can read 2 bytes before possibly crossing a page.
    460 .Lstrcpy_unalign2:
    461     ldrb    r2, [r1]
    462     cbz     r2, .Lstrcpy_unalign_copy1byte
    463     ldrb    r3, [r1, #1]
    464     cbz     r3, .Lstrcpy_unalign_copy2bytes
    465 
    466     ldr     r2, [r1], #4
    467     ldr     r3, [r1], #4
    468     pld     [r1, #64]
    469 
    470     tst     r2, #0xff0000
    471     beq     .Lstrcpy_unalign_copy3bytes
    472     lsrs    ip, r2, #24
    473     beq     .Lstrcpy_unalign_copy4bytes
    474 
    475     sub     ip, r3, #0x01010101
    476     bic     ip, ip, r3
    477     ands    ip, ip, #0x80808080
    478     bne     .Lstrcpy_zero_in_second_register
    479 
    480     stmia   r0!, {r2, r3}
    481     b       .Lstrcpy_unalign2
    482 
    483     .p2align 2
    484     // Can read 1 byte before possibly crossing a page.
    485 .Lstrcpy_unalign1:
    486     ldrb    r2, [r1]
    487     cbz     r2, .Lstrcpy_unalign_copy1byte
    488 
    489     ldr     r2, [r1], #4
    490     ldr     r3, [r1], #4
    491 
    492     pld     [r1, #64]
    493 
    494     sub     ip, r2, #0x01010101
    495     bic     ip, ip, r2
    496     ands    ip, ip, #0x80808080
    497     bne     .Lstrcpy_zero_in_first_register
    498 
    499     sub     ip, r3, #0x01010101
    500     bic     ip, ip, r3
    501     ands    ip, ip, #0x80808080
    502     bne     .Lstrcpy_zero_in_second_register
    503 
    504     stmia   r0!, {r2, r3}
    505     b       .Lstrcpy_unalign1
    506 
    507 .Lstrcpy_unalign_copy1byte:
    508     strb    r2, [r0]
    509     m_ret   inst=pop
    510 
    511 .Lstrcpy_unalign_copy2bytes:
    512     strb    r2, [r0], #1
    513     strb    r3, [r0]
    514     m_ret   inst=pop
    515 
    516 .Lstrcpy_unalign_copy3bytes:
    517     strh    r2, [r0], #2
    518     lsr     r2, #16
    519     strb    r2, [r0]
    520     m_ret   inst=pop
    521 
    522 .Lstrcpy_unalign_copy4bytes:
    523     stmia   r0, {r2}
    524     m_ret   inst=pop
    525 
    526 .Lstrcat_align_src:
    527     // Align to a double word (64 bits).
    528     rsb     r3, r3, #8
    529     lsls    ip, r3, #31
    530     beq     .Lstrcat_align_to_32
    531     ldrb    r2, [r0], #1
    532     cbz     r2, .Lstrcat_r0_update
    533 
    534 .Lstrcat_align_to_32:
    535     bcc     .Lstrcat_align_to_64
    536     ldrb    r2, [r0], #1
    537     cbz     r2, .Lstrcat_r0_update
    538     ldrb    r2, [r0], #1
    539     cbz     r2, .Lstrcat_r0_update
    540 
    541 .Lstrcat_align_to_64:
    542     tst     r3, #4
    543     beq     .Lstrcat_mainloop
    544     ldr     r3, [r0], #4
    545 
    546     sub     ip, r3, #0x01010101
    547     bic     ip, ip, r3
    548     ands    ip, ip, #0x80808080
    549     bne     .Lstrcat_zero_in_second_register
    550     b       .Lstrcat_mainloop
    551 
    552 .Lstrcat_r0_update:
    553     sub     r0, r0, #1
    554     b .Lstrcat_r0_scan_done
    555 END(strcat)
    556