Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2010 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 /* Changes:
      9  * 2010-08-11 Steve McIntyre <steve.mcintyre (at) arm.com>
     10  *    Added small changes to the two functions to make them work on the
     11  *    specified number of 16- or 32-bit values rather than the original
     12  *    code which was specified as a count of bytes. More verbose comments
     13  *    to aid future maintenance.
     14  */
     15 
     16     .text
     17     .align 4
     18     .syntax unified
     19 
     20     .global arm_memset32
     21     .type   arm_memset32, %function
     22     .global arm_memset16
     23     .type   arm_memset16, %function
     24 
     25 /*
     26  * Optimized memset functions for ARM.
     27  *
     28  * void arm_memset16(uint16_t* dst, uint16_t value, int count);
     29  * void arm_memset32(uint32_t* dst, uint32_t value, int count);
     30  *
     31  */
     32 arm_memset16:
     33         .fnstart
     34         push        {lr}
     35 
     36         /* if count is equal to zero then abort */
     37         teq         r2, #0
     38         ble         .Lfinish
     39 
     40         /* Multiply count by 2 - go from the number of 16-bit shorts
     41          * to the number of bytes desired. */
     42         mov         r2, r2, lsl #1
     43 
     44         /* expand the data to 32 bits */
     45         orr         r1, r1, r1, lsl #16
     46 
     47         /* align to 32 bits */
     48         tst         r0, #2
     49         strhne      r1, [r0], #2
     50         subne       r2, r2, #2
     51 
     52         /* Now jump into the main loop below. */
     53         b           .Lwork_32
     54         .fnend
     55 
     56 arm_memset32:
     57         .fnstart
     58         push        {lr}
     59 
     60         /* if count is equal to zero then abort */
     61         teq         r2, #0
     62         ble         .Lfinish
     63 
     64         /* Multiply count by 4 - go from the number of 32-bit words to
     65          * the number of bytes desired. */
     66         mov         r2, r2, lsl #2
     67 
     68 .Lwork_32:
     69         /* Set up registers ready for writing them out. */
     70         mov         ip, r1
     71         mov         lr, r1
     72 
     73         /* Try to align the destination to a cache line. Assume 32
     74          * byte (8 word) cache lines, it's the common case. */
     75         rsb         r3, r0, #0
     76         ands        r3, r3, #0x1C
     77         beq         .Laligned32
     78         cmp         r3, r2
     79         andhi       r3, r2, #0x1C
     80         sub         r2, r2, r3
     81 
     82         /* (Optionally) write any unaligned leading bytes.
     83          * (0-28 bytes, length in r3) */
     84         movs        r3, r3, lsl #28
     85         stmiacs     r0!, {r1, lr}
     86         stmiacs     r0!, {r1, lr}
     87         stmiami     r0!, {r1, lr}
     88         movs        r3, r3, lsl #2
     89         strcs       r1, [r0], #4
     90 
     91         /* Now quickly loop through the cache-aligned data. */
     92 .Laligned32:
     93         mov         r3, r1
     94 1:      subs        r2, r2, #32
     95         stmiahs     r0!, {r1,r3,ip,lr}
     96         stmiahs     r0!, {r1,r3,ip,lr}
     97         bhs         1b
     98         add         r2, r2, #32
     99 
    100         /* (Optionally) store any remaining trailing bytes.
    101          * (0-30 bytes, length in r2) */
    102         movs        r2, r2, lsl #28
    103         stmiacs     r0!, {r1,r3,ip,lr}
    104         stmiami     r0!, {r1,lr}
    105         movs        r2, r2, lsl #2
    106         strcs       r1, [r0], #4
    107         strhmi      lr, [r0], #2
    108 
    109 .Lfinish:
    110         pop         {pc}
    111         .fnend
    112