1 /* memset.S: optimised assembly memset 2 * 3 * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells (at) redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 13 .text 14 .p2align 4 15 16 ############################################################################### 17 # 18 # void *memset(void *p, char ch, size_t count) 19 # 20 # - NOTE: must not use any stack. exception detection performs function return 21 # to caller's fixup routine, aborting the remainder of the set 22 # GR4, GR7, GR8, and GR11 must be managed 23 # 24 ############################################################################### 25 .globl memset,__memset_end 26 .type memset,@function 27 memset: 28 orcc.p gr10,gr0,gr5,icc3 ; GR5 = count 29 andi gr9,#0xff,gr9 30 or.p gr8,gr0,gr4 ; GR4 = address 31 beqlr icc3,#0 32 33 # conditionally write a byte to 2b-align the address 34 setlos.p #1,gr6 35 andicc gr4,#1,gr0,icc0 36 ckne icc0,cc7 37 cstb.p gr9,@(gr4,gr0) ,cc7,#1 38 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 39 cadd.p gr4,gr6,gr4 ,cc7,#1 40 beqlr icc3,#0 41 42 # conditionally write a word to 4b-align the address 43 andicc.p gr4,#2,gr0,icc0 44 subicc gr5,#2,gr0,icc1 45 setlos.p #2,gr6 46 ckne icc0,cc7 47 slli.p gr9,#8,gr12 ; need to double up the pattern 48 cknc icc1,cc5 49 or.p gr9,gr12,gr12 50 andcr cc7,cc5,cc7 51 52 csth.p gr12,@(gr4,gr0) ,cc7,#1 53 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 54 cadd.p gr4,gr6,gr4 ,cc7,#1 55 beqlr icc3,#0 56 57 # conditionally write a dword to 8b-align the address 58 andicc.p gr4,#4,gr0,icc0 59 subicc gr5,#4,gr0,icc1 60 setlos.p #4,gr6 61 ckne icc0,cc7 62 slli.p gr12,#16,gr13 ; need to quadruple-up the pattern 63 cknc icc1,cc5 64 or.p gr13,gr12,gr12 65 andcr cc7,cc5,cc7 66 67 cst.p gr12,@(gr4,gr0) ,cc7,#1 68 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 69 cadd.p gr4,gr6,gr4 ,cc7,#1 70 beqlr icc3,#0 71 72 or.p gr12,gr12,gr13 ; need to octuple-up the pattern 73 74 # the address is now 8b-aligned - loop around writing 64b chunks 75 setlos #8,gr7 76 subi.p gr4,#8,gr4 ; store with update index does weird stuff 77 setlos #64,gr6 78 79 subicc gr5,#64,gr0,icc0 80 0: cknc icc0,cc7 81 cstdu gr12,@(gr4,gr7) ,cc7,#1 82 cstdu gr12,@(gr4,gr7) ,cc7,#1 83 cstdu gr12,@(gr4,gr7) ,cc7,#1 84 cstdu gr12,@(gr4,gr7) ,cc7,#1 85 cstdu gr12,@(gr4,gr7) ,cc7,#1 86 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 87 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 88 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 89 subicc gr5,#64,gr0,icc0 90 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 91 beqlr icc3,#0 92 bnc icc0,#2,0b 93 94 # now do 32-byte remnant 95 subicc.p gr5,#32,gr0,icc0 96 setlos #32,gr6 97 cknc icc0,cc7 98 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 99 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 100 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 101 setlos #16,gr6 102 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 103 subicc gr5,#16,gr0,icc0 104 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 105 beqlr icc3,#0 106 107 # now do 16-byte remnant 108 cknc icc0,cc7 109 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 110 csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 111 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 112 beqlr icc3,#0 113 114 # now do 8-byte remnant 115 subicc gr5,#8,gr0,icc1 116 cknc icc1,cc7 117 cstdu.p gr12,@(gr4,gr7) ,cc7,#1 118 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 119 setlos.p #4,gr7 120 beqlr icc3,#0 121 122 # now do 4-byte remnant 123 subicc gr5,#4,gr0,icc0 124 addi.p gr4,#4,gr4 125 cknc icc0,cc7 126 cstu.p gr12,@(gr4,gr7) ,cc7,#1 127 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 128 subicc.p gr5,#2,gr0,icc1 129 beqlr icc3,#0 130 131 # now do 2-byte remnant 132 setlos #2,gr7 133 addi.p gr4,#2,gr4 134 cknc icc1,cc7 135 csthu.p gr12,@(gr4,gr7) ,cc7,#1 136 csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 137 subicc.p gr5,#1,gr0,icc0 138 beqlr icc3,#0 139 140 # now do 1-byte remnant 141 setlos #0,gr7 142 addi.p gr4,#2,gr4 143 cknc icc0,cc7 144 cstb.p gr12,@(gr4,gr0) ,cc7,#1 145 bralr 146 __memset_end: 147 148 .size memset, __memset_end-memset 149 150 ############################################################################### 151 # 152 # clear memory in userspace 153 # - return the number of bytes that could not be cleared (0 on complete success) 154 # 155 # long __memset_user(void *p, size_t count) 156 # 157 ############################################################################### 158 .globl __memset_user, __memset_user_error_lr, __memset_user_error_handler 159 .type __memset_user,@function 160 __memset_user: 161 movsg lr,gr11 162 163 # abuse memset to do the dirty work 164 or.p gr9,gr9,gr10 165 setlos #0,gr9 166 call memset 167 __memset_user_error_lr: 168 jmpl.p @(gr11,gr0) 169 setlos #0,gr8 170 171 # deal any exception generated by memset 172 # GR4 - memset's address tracking pointer 173 # GR7 - memset's step value (index register for store insns) 174 # GR8 - memset's original start address 175 # GR10 - memset's original count 176 __memset_user_error_handler: 177 add.p gr4,gr7,gr4 178 add gr8,gr10,gr8 179 jmpl.p @(gr11,gr0) 180 sub gr8,gr4,gr8 ; we return the amount left uncleared 181 182 .size __memset_user, .-__memset_user 183