Home | History | Annotate | Download | only in bionic
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *  * Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  *  * Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in
     12  *    the documentation and/or other materials provided with the
     13  *    distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     18  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     19  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
     22  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     25  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <private/bionic_asm.h>
     30 
     31 /*
     32  * This code assumes it is running on a processor that supports all arm v7
     33  * instructions and that supports neon instructions.
     34  */
     35 
     36     .fpu    neon
     37     .syntax unified
     38 
     39 ENTRY(__memset_chk)
     40         cmp         r2, r3
     41         bls         memset
     42 
     43         // Preserve lr for backtrace.
     44         push        {lr}
     45         .cfi_def_cfa_offset 4
     46         .cfi_rel_offset lr, 0
     47 
     48         bl          __memset_chk_fail
     49 END(__memset_chk)
     50 
     51 /* memset() returns its first argument.  */
     52 ENTRY(memset)
     53         // The neon memset only wins for less than 132.
     54         cmp         r2, #132
     55         bhi         .L_memset_large_copy
     56 
     57         mov         r3, r0
     58         vdup.8      q0, r1
     59 
     60         /* make sure we have at least 32 bytes to write */
     61         subs        r2, r2, #32
     62         blo         2f
     63         vmov        q1, q0
     64 
     65 1:      /* The main loop writes 32 bytes at a time */
     66         subs        r2, r2, #32
     67         vst1.8      {d0 - d3}, [r3]!
     68         bhs         1b
     69 
     70 2:      /* less than 32 left */
     71         add         r2, r2, #32
     72         tst         r2, #0x10
     73         beq         3f
     74 
     75         // writes 16 bytes, 128-bits aligned
     76         vst1.8      {d0, d1}, [r3]!
     77 3:      /* write up to 15-bytes (count in r2) */
     78         movs        ip, r2, lsl #29
     79         bcc         1f
     80         vst1.8      {d0}, [r3]!
     81 1:      bge         2f
     82         vst1.32     {d0[0]}, [r3]!
     83 2:      movs        ip, r2, lsl #31
     84         strbmi      r1, [r3], #1
     85         strbcs      r1, [r3], #1
     86         strbcs      r1, [r3], #1
     87         bx          lr
     88 
     89 .L_memset_large_copy:
     90         /* compute the offset to align the destination
     91          * offset = (4-(src&3))&3 = -src & 3
     92          */
     93         stmfd       sp!, {r0, r4-r7, lr}
     94         .cfi_def_cfa_offset 24
     95         .cfi_rel_offset r0, 0
     96         .cfi_rel_offset r4, 4
     97         .cfi_rel_offset r5, 8
     98         .cfi_rel_offset r6, 12
     99         .cfi_rel_offset r7, 16
    100         .cfi_rel_offset lr, 20
    101 
    102         rsb         r3, r0, #0
    103         ands        r3, r3, #3
    104         cmp         r3, r2
    105         movhi       r3, r2
    106 
    107         /* splat r1 */
    108         mov         r1, r1, lsl #24
    109         orr         r1, r1, r1, lsr #8
    110         orr         r1, r1, r1, lsr #16
    111 
    112         movs        r12, r3, lsl #31
    113         strbcs      r1, [r0], #1    /* can't use strh (alignment unknown) */
    114         strbcs      r1, [r0], #1
    115         strbmi      r1, [r0], #1
    116         subs        r2, r2, r3
    117         popls       {r0, r4-r7, pc}   /* return */
    118 
    119         /* align the destination to a cache-line */
    120         mov         r12, r1
    121         mov         lr, r1
    122         mov         r4, r1
    123         mov         r5, r1
    124         mov         r6, r1
    125         mov         r7, r1
    126 
    127         rsb         r3, r0, #0
    128         ands        r3, r3, #0x1C
    129         beq         3f
    130         cmp         r3, r2
    131         andhi       r3, r2, #0x1C
    132         sub         r2, r2, r3
    133 
    134         /* conditionally writes 0 to 7 words (length in r3) */
    135         movs        r3, r3, lsl #28
    136         stmcs       r0!, {r1, lr}
    137         stmcs       r0!, {r1, lr}
    138         stmmi       r0!, {r1, lr}
    139         movs        r3, r3, lsl #2
    140         strcs       r1, [r0], #4
    141 
    142 3:
    143         subs        r2, r2, #32
    144         mov         r3, r1
    145         bmi         2f
    146 1:      subs        r2, r2, #32
    147         stmia       r0!, {r1,r3,r4,r5,r6,r7,r12,lr}
    148         bhs         1b
    149 2:      add         r2, r2, #32
    150 
    151         /* conditionally stores 0 to 31 bytes */
    152         movs        r2, r2, lsl #28
    153         stmcs       r0!, {r1,r3,r12,lr}
    154         stmmi       r0!, {r1, lr}
    155         movs        r2, r2, lsl #2
    156         strcs       r1, [r0], #4
    157         strhmi      r1, [r0], #2
    158         movs        r2, r2, lsl #2
    159         strbcs      r1, [r0]
    160         ldmfd       sp!, {r0, r4-r7, pc}
    161 END(memset)
    162