Home | History | Annotate | Download | only in bionic
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 /*
     29  * Copyright (c) 2013 ARM Ltd
     30  * All rights reserved.
     31  *
     32  * Redistribution and use in source and binary forms, with or without
     33  * modification, are permitted provided that the following conditions
     34  * are met:
     35  * 1. Redistributions of source code must retain the above copyright
     36  *    notice, this list of conditions and the following disclaimer.
     37  * 2. Redistributions in binary form must reproduce the above copyright
     38  *    notice, this list of conditions and the following disclaimer in the
     39  *    documentation and/or other materials provided with the distribution.
     40  * 3. The name of the company may not be used to endorse or promote
     41  *    products derived from this software without specific prior written
     42  *    permission.
     43  *
     44  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
     45  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     46  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     47  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     48  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     49  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     50  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     51  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     52  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     53  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     54  */
     55 
     56 #include <private/bionic_asm.h>
     57 
     58     .syntax unified
     59 
     60     .thumb
     61     .thumb_func
     62 
     63     .macro m_push
     64     push    {r0, r4, r5, lr}
     65     .endm // m_push
     66 
     67     .macro m_pop
     68     pop     {r0, r4, r5, pc}
     69     .endm // m_pop
     70 
     71     .macro m_scan_byte
     72     ldrb    r3, [r0]
     73     cbz     r3, .L_strcat_r0_scan_done
     74     add     r0, #1
     75     .endm // m_scan_byte
     76 
     77     .macro m_copy_byte reg, cmd, label
     78     ldrb    \reg, [r1], #1
     79     strb    \reg, [r0], #1
     80     \cmd    \reg, \label
     81     .endm // m_copy_byte
     82 
     83 ENTRY(strcat)
     84     // Quick check to see if src is empty.
     85     ldrb    r2, [r1]
     86     pld     [r1, #0]
     87     cbnz    r2, .L_strcat_continue
     88     bx      lr
     89 
     90 .L_strcat_continue:
     91     // To speed up really small dst strings, unroll checking the first 4 bytes.
     92     m_push
     93     m_scan_byte
     94     m_scan_byte
     95     m_scan_byte
     96     m_scan_byte
     97 
     98     ands    r3, r0, #7
     99     beq     .L_strcat_mainloop
    100 
    101     // Align to a double word (64 bits).
    102     rsb     r3, r3, #8
    103     lsls    ip, r3, #31
    104     beq     .L_strcat_align_to_32
    105 
    106     ldrb    r5, [r0]
    107     cbz     r5, .L_strcat_r0_scan_done
    108     add     r0, r0, #1
    109 
    110 .L_strcat_align_to_32:
    111     bcc     .L_strcat_align_to_64
    112 
    113     ldrb    r2, [r0]
    114     cbz     r2, .L_strcat_r0_scan_done
    115     add     r0, r0, #1
    116     ldrb    r4, [r0]
    117     cbz     r4, .L_strcat_r0_scan_done
    118     add     r0, r0, #1
    119 
    120 .L_strcat_align_to_64:
    121     tst     r3, #4
    122     beq     .L_strcat_mainloop
    123     ldr     r3, [r0], #4
    124 
    125     sub     ip, r3, #0x01010101
    126     bic     ip, ip, r3
    127     ands    ip, ip, #0x80808080
    128     bne     .L_strcat_zero_in_second_register
    129     b       .L_strcat_mainloop
    130 
    131 .L_strcat_r0_scan_done:
    132     // For short copies, hard-code checking the first 8 bytes since this
    133     // new code doesn't win until after about 8 bytes.
    134     m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
    135     m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
    136     m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
    137     m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish
    138     m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish
    139     m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish
    140     m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish
    141     m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue
    142 
    143 .L_strcpy_finish:
    144     m_pop
    145 
    146 .L_strcpy_continue:
    147     ands    r3, r0, #7
    148     beq     .L_strcpy_check_src_align
    149 
    150     // Align to a double word (64 bits).
    151     rsb     r3, r3, #8
    152     lsls    ip, r3, #31
    153     beq     .L_strcpy_align_to_32
    154 
    155     ldrb    r2, [r1], #1
    156     strb    r2, [r0], #1
    157     cbz     r2, .L_strcpy_complete
    158 
    159 .L_strcpy_align_to_32:
    160     bcc     .L_strcpy_align_to_64
    161 
    162     ldrb    r2, [r1], #1
    163     strb    r2, [r0], #1
    164     cbz     r2, .L_strcpy_complete
    165     ldrb    r2, [r1], #1
    166     strb    r2, [r0], #1
    167     cbz     r2, .L_strcpy_complete
    168 
    169 .L_strcpy_align_to_64:
    170     tst     r3, #4
    171     beq     .L_strcpy_check_src_align
    172     // Read one byte at a time since we don't know the src alignment
    173     // and we don't want to read into a different page.
    174     ldrb    r2, [r1], #1
    175     strb    r2, [r0], #1
    176     cbz     r2, .L_strcpy_complete
    177     ldrb    r2, [r1], #1
    178     strb    r2, [r0], #1
    179     cbz     r2, .L_strcpy_complete
    180     ldrb    r2, [r1], #1
    181     strb    r2, [r0], #1
    182     cbz     r2, .L_strcpy_complete
    183     ldrb    r2, [r1], #1
    184     strb    r2, [r0], #1
    185     cbz     r2, .L_strcpy_complete
    186 
    187 .L_strcpy_check_src_align:
    188     // At this point dst is aligned to a double word, check if src
    189     // is also aligned to a double word.
    190     ands    r3, r1, #7
    191     bne     .L_strcpy_unaligned_copy
    192 
    193     .p2align 2
    194 .L_strcpy_mainloop:
    195     ldrd    r2, r3, [r1], #8
    196 
    197     pld     [r1, #64]
    198 
    199     sub     ip, r2, #0x01010101
    200     bic     ip, ip, r2
    201     ands    ip, ip, #0x80808080
    202     bne     .L_strcpy_zero_in_first_register
    203 
    204     sub     ip, r3, #0x01010101
    205     bic     ip, ip, r3
    206     ands    ip, ip, #0x80808080
    207     bne     .L_strcpy_zero_in_second_register
    208 
    209     strd    r2, r3, [r0], #8
    210     b       .L_strcpy_mainloop
    211 
    212 .L_strcpy_complete:
    213     m_pop
    214 
    215 .L_strcpy_zero_in_first_register:
    216     lsls    lr, ip, #17
    217     bne     .L_strcpy_copy1byte
    218     bcs     .L_strcpy_copy2bytes
    219     lsls    ip, ip, #1
    220     bne     .L_strcpy_copy3bytes
    221 
    222 .L_strcpy_copy4bytes:
    223     // Copy 4 bytes to the destiniation.
    224     str     r2, [r0]
    225     m_pop
    226 
    227 .L_strcpy_copy1byte:
    228     strb    r2, [r0]
    229     m_pop
    230 
    231 .L_strcpy_copy2bytes:
    232     strh    r2, [r0]
    233     m_pop
    234 
    235 .L_strcpy_copy3bytes:
    236     strh    r2, [r0], #2
    237     lsr     r2, #16
    238     strb    r2, [r0]
    239     m_pop
    240 
    241 .L_strcpy_zero_in_second_register:
    242     lsls    lr, ip, #17
    243     bne     .L_strcpy_copy5bytes
    244     bcs     .L_strcpy_copy6bytes
    245     lsls    ip, ip, #1
    246     bne     .L_strcpy_copy7bytes
    247 
    248     // Copy 8 bytes to the destination.
    249     strd    r2, r3, [r0]
    250     m_pop
    251 
    252 .L_strcpy_copy5bytes:
    253     str     r2, [r0], #4
    254     strb    r3, [r0]
    255     m_pop
    256 
    257 .L_strcpy_copy6bytes:
    258     str     r2, [r0], #4
    259     strh    r3, [r0]
    260     m_pop
    261 
    262 .L_strcpy_copy7bytes:
    263     str     r2, [r0], #4
    264     strh    r3, [r0], #2
    265     lsr     r3, #16
    266     strb    r3, [r0]
    267     m_pop
    268 
    269 .L_strcpy_unaligned_copy:
    270     // Dst is aligned to a double word, while src is at an unknown alignment.
    271     // There are 7 different versions of the unaligned copy code
    272     // to prevent overreading the src. The mainloop of every single version
    273     // will store 64 bits per loop. The difference is how much of src can
    274     // be read without potentially crossing a page boundary.
    275     tbb     [pc, r3]
    276 .L_strcpy_unaligned_branchtable:
    277     .byte 0
    278     .byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2)
    279     .byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2)
    280     .byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2)
    281     .byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2)
    282     .byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2)
    283     .byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2)
    284     .byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2)
    285 
    286     .p2align 2
    287     // Can read 7 bytes before possibly crossing a page.
    288 .L_strcpy_unalign7:
    289     ldr     r2, [r1], #4
    290 
    291     sub     ip, r2, #0x01010101
    292     bic     ip, ip, r2
    293     ands    ip, ip, #0x80808080
    294     bne     .L_strcpy_zero_in_first_register
    295 
    296     ldrb    r3, [r1]
    297     cbz     r3, .L_strcpy_unalign7_copy5bytes
    298     ldrb    r4, [r1, #1]
    299     cbz     r4, .L_strcpy_unalign7_copy6bytes
    300     ldrb    r5, [r1, #2]
    301     cbz     r5, .L_strcpy_unalign7_copy7bytes
    302 
    303     ldr     r3, [r1], #4
    304     pld     [r1, #64]
    305 
    306     lsrs    ip, r3, #24
    307     strd    r2, r3, [r0], #8
    308     beq     .L_strcpy_unalign_return
    309     b       .L_strcpy_unalign7
    310 
    311 .L_strcpy_unalign7_copy5bytes:
    312     str     r2, [r0], #4
    313     strb    r3, [r0]
    314 .L_strcpy_unalign_return:
    315     m_pop
    316 
    317 .L_strcpy_unalign7_copy6bytes:
    318     str     r2, [r0], #4
    319     strb    r3, [r0], #1
    320     strb    r4, [r0], #1
    321     m_pop
    322 
    323 .L_strcpy_unalign7_copy7bytes:
    324     str     r2, [r0], #4
    325     strb    r3, [r0], #1
    326     strb    r4, [r0], #1
    327     strb    r5, [r0], #1
    328     m_pop
    329 
    330     .p2align 2
    331     // Can read 6 bytes before possibly crossing a page.
    332 .L_strcpy_unalign6:
    333     ldr     r2, [r1], #4
    334 
    335     sub     ip, r2, #0x01010101
    336     bic     ip, ip, r2
    337     ands    ip, ip, #0x80808080
    338     bne     .L_strcpy_zero_in_first_register
    339 
    340     ldrb    r4, [r1]
    341     cbz     r4, .L_strcpy_unalign_copy5bytes
    342     ldrb    r5, [r1, #1]
    343     cbz     r5, .L_strcpy_unalign_copy6bytes
    344 
    345     ldr     r3, [r1], #4
    346     pld     [r1, #64]
    347 
    348     tst     r3, #0xff0000
    349     beq     .L_strcpy_copy7bytes
    350     lsrs    ip, r3, #24
    351     strd    r2, r3, [r0], #8
    352     beq     .L_strcpy_unalign_return
    353     b       .L_strcpy_unalign6
    354 
    355     .p2align 2
    356     // Can read 5 bytes before possibly crossing a page.
    357 .L_strcpy_unalign5:
    358     ldr     r2, [r1], #4
    359 
    360     sub     ip, r2, #0x01010101
    361     bic     ip, ip, r2
    362     ands    ip, ip, #0x80808080
    363     bne     .L_strcpy_zero_in_first_register
    364 
    365     ldrb    r4, [r1]
    366     cbz     r4, .L_strcpy_unalign_copy5bytes
    367 
    368     ldr     r3, [r1], #4
    369 
    370     pld     [r1, #64]
    371 
    372     sub     ip, r3, #0x01010101
    373     bic     ip, ip, r3
    374     ands    ip, ip, #0x80808080
    375     bne     .L_strcpy_zero_in_second_register
    376 
    377     strd    r2, r3, [r0], #8
    378     b       .L_strcpy_unalign5
    379 
    380 .L_strcpy_unalign_copy5bytes:
    381     str     r2, [r0], #4
    382     strb    r4, [r0]
    383     m_pop
    384 
    385 .L_strcpy_unalign_copy6bytes:
    386     str     r2, [r0], #4
    387     strb    r4, [r0], #1
    388     strb    r5, [r0]
    389     m_pop
    390 
    391     .p2align 2
    392     // Can read 4 bytes before possibly crossing a page.
    393 .L_strcpy_unalign4:
    394     ldr     r2, [r1], #4
    395 
    396     sub     ip, r2, #0x01010101
    397     bic     ip, ip, r2
    398     ands    ip, ip, #0x80808080
    399     bne     .L_strcpy_zero_in_first_register
    400 
    401     ldr     r3, [r1], #4
    402     pld     [r1, #64]
    403 
    404     sub     ip, r3, #0x01010101
    405     bic     ip, ip, r3
    406     ands    ip, ip, #0x80808080
    407     bne     .L_strcpy_zero_in_second_register
    408 
    409     strd    r2, r3, [r0], #8
    410     b       .L_strcpy_unalign4
    411 
    412     .p2align 2
    413     // Can read 3 bytes before possibly crossing a page.
    414 .L_strcpy_unalign3:
    415     ldrb    r2, [r1]
    416     cbz     r2, .L_strcpy_unalign3_copy1byte
    417     ldrb    r3, [r1, #1]
    418     cbz     r3, .L_strcpy_unalign3_copy2bytes
    419     ldrb    r4, [r1, #2]
    420     cbz     r4, .L_strcpy_unalign3_copy3bytes
    421 
    422     ldr     r2, [r1], #4
    423     ldr     r3, [r1], #4
    424 
    425     pld     [r1, #64]
    426 
    427     lsrs    lr, r2, #24
    428     beq     .L_strcpy_copy4bytes
    429 
    430     sub     ip, r3, #0x01010101
    431     bic     ip, ip, r3
    432     ands    ip, ip, #0x80808080
    433     bne     .L_strcpy_zero_in_second_register
    434 
    435     strd    r2, r3, [r0], #8
    436     b       .L_strcpy_unalign3
    437 
    438 .L_strcpy_unalign3_copy1byte:
    439     strb    r2, [r0]
    440     m_pop
    441 
    442 .L_strcpy_unalign3_copy2bytes:
    443     strb    r2, [r0], #1
    444     strb    r3, [r0]
    445     m_pop
    446 
    447 .L_strcpy_unalign3_copy3bytes:
    448     strb    r2, [r0], #1
    449     strb    r3, [r0], #1
    450     strb    r4, [r0]
    451     m_pop
    452 
    453     .p2align 2
    454     // Can read 2 bytes before possibly crossing a page.
    455 .L_strcpy_unalign2:
    456     ldrb    r2, [r1]
    457     cbz     r2, .L_strcpy_unalign_copy1byte
    458     ldrb    r4, [r1, #1]
    459     cbz     r4, .L_strcpy_unalign_copy2bytes
    460 
    461     ldr     r2, [r1], #4
    462     ldr     r3, [r1], #4
    463     pld     [r1, #64]
    464 
    465     tst     r2, #0xff0000
    466     beq     .L_strcpy_copy3bytes
    467     lsrs    ip, r2, #24
    468     beq     .L_strcpy_copy4bytes
    469 
    470     sub     ip, r3, #0x01010101
    471     bic     ip, ip, r3
    472     ands    ip, ip, #0x80808080
    473     bne     .L_strcpy_zero_in_second_register
    474 
    475     strd    r2, r3, [r0], #8
    476     b       .L_strcpy_unalign2
    477 
    478     .p2align 2
    479     // Can read 1 byte before possibly crossing a page.
    480 .L_strcpy_unalign1:
    481     ldrb    r2, [r1]
    482     cbz     r2, .L_strcpy_unalign_copy1byte
    483 
    484     ldr     r2, [r1], #4
    485     ldr     r3, [r1], #4
    486 
    487     pld     [r1, #64]
    488 
    489     sub     ip, r2, #0x01010101
    490     bic     ip, ip, r2
    491     ands    ip, ip, #0x80808080
    492     bne     .L_strcpy_zero_in_first_register
    493 
    494     sub     ip, r3, #0x01010101
    495     bic     ip, ip, r3
    496     ands    ip, ip, #0x80808080
    497     bne     .L_strcpy_zero_in_second_register
    498 
    499     strd    r2, r3, [r0], #8
    500     b       .L_strcpy_unalign1
    501 
    502 .L_strcpy_unalign_copy1byte:
    503     strb    r2, [r0]
    504     m_pop
    505 
    506 .L_strcpy_unalign_copy2bytes:
    507     strb    r2, [r0], #1
    508     strb    r4, [r0]
    509     m_pop
    510 
    511     .p2align 2
    512 .L_strcat_mainloop:
    513     ldrd    r2, r3, [r0], #8
    514 
    515     pld     [r0, #64]
    516 
    517     sub     ip, r2, #0x01010101
    518     bic     ip, ip, r2
    519     ands    ip, ip, #0x80808080
    520     bne     .L_strcat_zero_in_first_register
    521 
    522     sub     ip, r3, #0x01010101
    523     bic     ip, ip, r3
    524     ands    ip, ip, #0x80808080
    525     bne     .L_strcat_zero_in_second_register
    526     b       .L_strcat_mainloop
    527 
    528 .L_strcat_zero_in_first_register:
    529     // Prefetch the src now, it's going to be used soon.
    530     pld     [r1, #0]
    531     lsls    lr, ip, #17
    532     bne     .L_strcat_sub8
    533     bcs     .L_strcat_sub7
    534     lsls    ip, ip, #1
    535     bne     .L_strcat_sub6
    536 
    537     sub     r0, r0, #5
    538     b       .L_strcat_r0_scan_done
    539 
    540 .L_strcat_sub8:
    541     sub     r0, r0, #8
    542     b       .L_strcat_r0_scan_done
    543 
    544 .L_strcat_sub7:
    545     sub     r0, r0, #7
    546     b       .L_strcat_r0_scan_done
    547 
    548 .L_strcat_sub6:
    549     sub     r0, r0, #6
    550     b       .L_strcat_r0_scan_done
    551 
    552 .L_strcat_zero_in_second_register:
    553     // Prefetch the src now, it's going to be used soon.
    554     pld     [r1, #0]
    555     lsls    lr, ip, #17
    556     bne     .L_strcat_sub4
    557     bcs     .L_strcat_sub3
    558     lsls    ip, ip, #1
    559     bne     .L_strcat_sub2
    560 
    561     sub     r0, r0, #1
    562     b       .L_strcat_r0_scan_done
    563 
    564 .L_strcat_sub4:
    565     sub     r0, r0, #4
    566     b       .L_strcat_r0_scan_done
    567 
    568 .L_strcat_sub3:
    569     sub     r0, r0, #3
    570     b       .L_strcat_r0_scan_done
    571 
    572 .L_strcat_sub2:
    573     sub     r0, r0, #2
    574     b       .L_strcat_r0_scan_done
    575 END(strcat)
    576