1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* Changes: 18 * 2010-08-11 Steve McIntyre <steve.mcintyre (at) arm.com> 19 * Added small changes to the two functions to make them work on the 20 * specified number of 16- or 32-bit values rather than the original 21 * code which was specified as a count of bytes. More verbose comments 22 * to aid future maintenance. 23 */ 24 25 .text 26 .align 27 28 .global arm_memset32 29 .type arm_memset32, %function 30 .global arm_memset16 31 .type arm_memset16, %function 32 33 /* 34 * Optimized memset functions for ARM. 35 * 36 * void arm_memset16(uint16_t* dst, uint16_t value, int count); 37 * void arm_memset32(uint32_t* dst, uint32_t value, int count); 38 * 39 */ 40 arm_memset16: 41 .fnstart 42 push {lr} 43 44 /* expand the data to 32 bits */ 45 orr r1, r1, lsl #16 46 47 /* align to 32 bits */ 48 tst r0, #2 49 strneh r1, [r0], #2 50 subne r2, r2, #2 51 52 /* Multiply count by 2 - go from the number of 16-bit shorts 53 * to the number of bytes desired. */ 54 mov r2, r2, lsl #1 55 56 /* Now jump into the main loop below. */ 57 b .Lwork_32 58 .fnend 59 60 arm_memset32: 61 .fnstart 62 push {lr} 63 64 /* Multiply count by 4 - go from the number of 32-bit words to 65 * the number of bytes desired. */ 66 mov r2, r2, lsl #2 67 68 .Lwork_32: 69 /* Set up registers ready for writing them out. */ 70 mov ip, r1 71 mov lr, r1 72 73 /* Try to align the destination to a cache line. Assume 32 74 * byte (8 word) cache lines, it's the common case. */ 75 rsb r3, r0, #0 76 ands r3, r3, #0x1C 77 beq .Laligned32 78 cmp r3, r2 79 andhi r3, r2, #0x1C 80 sub r2, r2, r3 81 82 /* (Optionally) write any unaligned leading bytes. 83 * (0-28 bytes, length in r3) */ 84 movs r3, r3, lsl #28 85 stmcsia r0!, {r1, lr} 86 stmcsia r0!, {r1, lr} 87 stmmiia r0!, {r1, lr} 88 movs r3, r3, lsl #2 89 strcs r1, [r0], #4 90 91 /* Now quickly loop through the cache-aligned data. */ 92 .Laligned32: 93 mov r3, r1 94 1: subs r2, r2, #32 95 stmhsia r0!, {r1,r3,ip,lr} 96 stmhsia r0!, {r1,r3,ip,lr} 97 bhs 1b 98 add r2, r2, #32 99 100 /* (Optionally) store any remaining trailing bytes. 101 * (0-30 bytes, length in r2) */ 102 movs r2, r2, lsl #28 103 stmcsia r0!, {r1,r3,ip,lr} 104 stmmiia r0!, {r1,lr} 105 movs r2, r2, lsl #2 106 strcs r1, [r0], #4 107 strmih lr, [r0], #2 108 109 pop {pc} 110 .fnend 111