1 /* 2 * Copyright 2010 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 /* Changes: 9 * 2010-08-11 Steve McIntyre <steve.mcintyre (at) arm.com> 10 * Added small changes to the two functions to make them work on the 11 * specified number of 16- or 32-bit values rather than the original 12 * code which was specified as a count of bytes. More verbose comments 13 * to aid future maintenance. 14 */ 15 16 .text 17 .align 18 19 .global arm_memset32 20 .type arm_memset32, %function 21 .global arm_memset16 22 .type arm_memset16, %function 23 24 /* 25 * Optimized memset functions for ARM. 26 * 27 * void arm_memset16(uint16_t* dst, uint16_t value, int count); 28 * void arm_memset32(uint32_t* dst, uint32_t value, int count); 29 * 30 */ 31 arm_memset16: 32 .fnstart 33 push {lr} 34 35 /* if count is equal to zero then abort */ 36 teq r2, #0 37 ble .Lfinish 38 39 /* Multiply count by 2 - go from the number of 16-bit shorts 40 * to the number of bytes desired. */ 41 mov r2, r2, lsl #1 42 43 /* expand the data to 32 bits */ 44 orr r1, r1, lsl #16 45 46 /* align to 32 bits */ 47 tst r0, #2 48 strneh r1, [r0], #2 49 subne r2, r2, #2 50 51 /* Now jump into the main loop below. */ 52 b .Lwork_32 53 .fnend 54 55 arm_memset32: 56 .fnstart 57 push {lr} 58 59 /* if count is equal to zero then abort */ 60 teq r2, #0 61 ble .Lfinish 62 63 /* Multiply count by 4 - go from the number of 32-bit words to 64 * the number of bytes desired. */ 65 mov r2, r2, lsl #2 66 67 .Lwork_32: 68 /* Set up registers ready for writing them out. */ 69 mov ip, r1 70 mov lr, r1 71 72 /* Try to align the destination to a cache line. Assume 32 73 * byte (8 word) cache lines, it's the common case. */ 74 rsb r3, r0, #0 75 ands r3, r3, #0x1C 76 beq .Laligned32 77 cmp r3, r2 78 andhi r3, r2, #0x1C 79 sub r2, r2, r3 80 81 /* (Optionally) write any unaligned leading bytes. 82 * (0-28 bytes, length in r3) */ 83 movs r3, r3, lsl #28 84 stmcsia r0!, {r1, lr} 85 stmcsia r0!, {r1, lr} 86 stmmiia r0!, {r1, lr} 87 movs r3, r3, lsl #2 88 strcs r1, [r0], #4 89 90 /* Now quickly loop through the cache-aligned data. */ 91 .Laligned32: 92 mov r3, r1 93 1: subs r2, r2, #32 94 stmhsia r0!, {r1,r3,ip,lr} 95 stmhsia r0!, {r1,r3,ip,lr} 96 bhs 1b 97 add r2, r2, #32 98 99 /* (Optionally) store any remaining trailing bytes. 100 * (0-30 bytes, length in r2) */ 101 movs r2, r2, lsl #28 102 stmcsia r0!, {r1,r3,ip,lr} 103 stmmiia r0!, {r1,lr} 104 movs r2, r2, lsl #2 105 strcs r1, [r0], #4 106 strmih lr, [r0], #2 107 108 .Lfinish: 109 pop {pc} 110 .fnend 111