Home | History | Annotate | Download | only in bionic
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 /*
     29  * Copyright (c) 2013 ARM Ltd
     30  * All rights reserved.
     31  *
     32  * Redistribution and use in source and binary forms, with or without
     33  * modification, are permitted provided that the following conditions
     34  * are met:
     35  * 1. Redistributions of source code must retain the above copyright
     36  *    notice, this list of conditions and the following disclaimer.
     37  * 2. Redistributions in binary form must reproduce the above copyright
     38  *    notice, this list of conditions and the following disclaimer in the
     39  *    documentation and/or other materials provided with the distribution.
     40  * 3. The name of the company may not be used to endorse or promote
     41  *    products derived from this software without specific prior written
     42  *    permission.
     43  *
     44  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
     45  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     46  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     47  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     48  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     49  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     50  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     51  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     52  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     53  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     54  */
     55 
     56 .L_memcpy_base:
     57         // Assumes that n >= 0, and dst, src are valid pointers.
     58         // For any sizes less than 832 use the neon code that doesn't
     59         // care about the src alignment. This avoids any checks
     60         // for src alignment, and offers the best improvement since
     61         // smaller sized copies are dominated by the overhead of
     62         // the pre and post main loop.
     63         // For larger copies, if src and dst cannot both be aligned to
     64         // word boundaries, use the neon code.
     65         // For all other copies, align dst to a double word boundary
     66         // and copy using LDRD/STRD instructions.
     67 
     68         cmp     r2, #16
     69         blo     .L_copy_less_than_16_unknown_align
     70 
     71 .L_copy_unknown_alignment:
     72         // Unknown alignment of src and dst.
     73         // Assumes that the first few bytes have already been prefetched.
     74 
     75         // Align destination to 128 bits. The mainloop store instructions
     76         // require this alignment or they will throw an exception.
     77         rsb         r3, r0, #0
     78         ands        r3, r3, #0xF
     79         beq         2f
     80 
     81         // Copy up to 15 bytes (count in r3).
     82         sub         r2, r2, r3
     83         movs        ip, r3, lsl #31
     84 
     85         itt         mi
     86         ldrbmi      lr, [r1], #1
     87         strbmi      lr, [r0], #1
     88         itttt       cs
     89         ldrbcs      ip, [r1], #1
     90         ldrbcs      lr, [r1], #1
     91         strbcs      ip, [r0], #1
     92         strbcs      lr, [r0], #1
     93 
     94         movs        ip, r3, lsl #29
     95         bge         1f
     96         // Copies 4 bytes, dst 32 bits aligned before, at least 64 bits after.
     97         vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
     98         vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]!
     99 1:      bcc         2f
    100         // Copies 8 bytes, dst 64 bits aligned before, at least 128 bits after.
    101         vld1.8      {d0}, [r1]!
    102         vst1.8      {d0}, [r0, :64]!
    103 
    104 2:      // Make sure we have at least 64 bytes to copy.
    105         subs        r2, r2, #64
    106         blo         2f
    107 
    108 1:      // The main loop copies 64 bytes at a time.
    109         vld1.8      {d0  - d3},   [r1]!
    110         vld1.8      {d4  - d7},   [r1]!
    111         pld         [r1, #(64*4)]
    112         subs        r2, r2, #64
    113         vst1.8      {d0  - d3},   [r0, :128]!
    114         vst1.8      {d4  - d7},   [r0, :128]!
    115         bhs         1b
    116 
    117 2:      // Fix-up the remaining count and make sure we have >= 32 bytes left.
    118         adds        r2, r2, #32
    119         blo         3f
    120 
    121         // 32 bytes. These cache lines were already preloaded.
    122         vld1.8      {d0 - d3},  [r1]!
    123         sub         r2, r2, #32
    124         vst1.8      {d0 - d3},  [r0, :128]!
    125 3:      // Less than 32 left.
    126         add         r2, r2, #32
    127         tst         r2, #0x10
    128         beq         .L_copy_less_than_16_unknown_align
    129         // Copies 16 bytes, destination 128 bits aligned.
    130         vld1.8      {d0, d1}, [r1]!
    131         vst1.8      {d0, d1}, [r0, :128]!
    132 
    133 .L_copy_less_than_16_unknown_align:
    134         // Copy up to 15 bytes (count in r2).
    135         movs        ip, r2, lsl #29
    136         bcc         1f
    137         vld1.8      {d0}, [r1]!
    138         vst1.8      {d0}, [r0]!
    139 1:      bge         2f
    140         vld4.8      {d0[0], d1[0], d2[0], d3[0]}, [r1]!
    141         vst4.8      {d0[0], d1[0], d2[0], d3[0]}, [r0]!
    142 
    143 2:      // Copy 0 to 4 bytes.
    144         lsls        r2, r2, #31
    145         itt         ne
    146         ldrbne      lr, [r1], #1
    147         strbne      lr, [r0], #1
    148         itttt       cs
    149         ldrbcs      ip, [r1], #1
    150         ldrbcs      lr, [r1]
    151         strbcs      ip, [r0], #1
    152         strbcs      lr, [r0]
    153 
    154         pop         {r0, pc}
    155