1 #ifndef ETHERBOOT_BITS_STRING_H 2 #define ETHERBOOT_BITS_STRING_H 3 /* 4 * Taken from Linux /usr/include/asm/string.h 5 * All except memcpy, memmove, memset and memcmp removed. 6 * 7 * Non-standard memswap() function added because it saves quite a bit 8 * of code (mbrown (at) fensystems.co.uk). 9 */ 10 11 /* 12 * This string-include defines all string functions as inline 13 * functions. Use gcc. It also assumes ds=es=data space, this should be 14 * normal. Most of the string-functions are rather heavily hand-optimized, 15 * see especially strtok,strstr,str[c]spn. They should work, but are not 16 * very easy to understand. Everything is done entirely within the register 17 * set, making the functions fast and clean. String instructions have been 18 * used through-out, making for "slightly" unclear code :-) 19 * 20 * NO Copyright (C) 1991, 1992 Linus Torvalds, 21 * consider these trivial functions to be PD. 22 */ 23 24 FILE_LICENCE ( PUBLIC_DOMAIN ); 25 26 #define __HAVE_ARCH_MEMCPY 27 28 extern void * __memcpy ( void *dest, const void *src, size_t len ); 29 30 #if 0 31 static inline __attribute__ (( always_inline )) void * 32 __memcpy ( void *dest, const void *src, size_t len ) { 33 int d0, d1, d2; 34 __asm__ __volatile__ ( "rep ; movsb" 35 : "=&c" ( d0 ), "=&S" ( d1 ), "=&D" ( d2 ) 36 : "0" ( len ), "1" ( src ), "2" ( dest ) 37 : "memory" ); 38 return dest; 39 } 40 #endif 41 42 static inline __attribute__ (( always_inline )) void * 43 __constant_memcpy ( void *dest, const void *src, size_t len ) { 44 union { 45 uint32_t u32[2]; 46 uint16_t u16[4]; 47 uint8_t u8[8]; 48 } __attribute__ (( __may_alias__ )) *dest_u = dest; 49 const union { 50 uint32_t u32[2]; 51 uint16_t u16[4]; 52 uint8_t u8[8]; 53 } __attribute__ (( __may_alias__ )) *src_u = src; 54 const void *esi; 55 void *edi; 56 57 switch ( len ) { 58 case 0 : /* 0 bytes */ 59 return dest; 60 /* 61 * Single-register moves; these are always better than a 62 * string operation. We can clobber an arbitrary two 63 * registers (data, source, dest can re-use source register) 64 * instead of being restricted to esi and edi. There's also a 65 * much greater potential for optimising with nearby code. 66 * 67 */ 68 case 1 : /* 4 bytes */ 69 dest_u->u8[0] = src_u->u8[0]; 70 return dest; 71 case 2 : /* 6 bytes */ 72 dest_u->u16[0] = src_u->u16[0]; 73 return dest; 74 case 4 : /* 4 bytes */ 75 dest_u->u32[0] = src_u->u32[0]; 76 return dest; 77 /* 78 * Double-register moves; these are probably still a win. 79 * 80 */ 81 case 3 : /* 12 bytes */ 82 dest_u->u16[0] = src_u->u16[0]; 83 dest_u->u8[2] = src_u->u8[2]; 84 return dest; 85 case 5 : /* 10 bytes */ 86 dest_u->u32[0] = src_u->u32[0]; 87 dest_u->u8[4] = src_u->u8[4]; 88 return dest; 89 case 6 : /* 12 bytes */ 90 dest_u->u32[0] = src_u->u32[0]; 91 dest_u->u16[2] = src_u->u16[2]; 92 return dest; 93 case 8 : /* 10 bytes */ 94 dest_u->u32[0] = src_u->u32[0]; 95 dest_u->u32[1] = src_u->u32[1]; 96 return dest; 97 } 98 99 /* Even if we have to load up esi and edi ready for a string 100 * operation, we can sometimes save space by using multiple 101 * single-byte "movs" operations instead of loading up ecx and 102 * using "rep movsb". 103 * 104 * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte 105 * to allow for saving/restoring ecx 50% of the time. 106 * 107 * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes. 108 * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte, 109 * but "movsl" moves twice as much data, so it balances out). 110 * 111 * The cutoff point therefore occurs around 26 bytes; the byte 112 * requirements for each method are: 113 * 114 * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 115 * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 116 * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10 117 */ 118 119 esi = src; 120 edi = dest; 121 122 if ( len >= 26 ) 123 return __memcpy ( dest, src, len ); 124 125 if ( len >= 6*4 ) 126 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) 127 : "0" ( edi ), "1" ( esi ) : "memory" ); 128 if ( len >= 5*4 ) 129 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) 130 : "0" ( edi ), "1" ( esi ) : "memory" ); 131 if ( len >= 4*4 ) 132 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) 133 : "0" ( edi ), "1" ( esi ) : "memory" ); 134 if ( len >= 3*4 ) 135 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) 136 : "0" ( edi ), "1" ( esi ) : "memory" ); 137 if ( len >= 2*4 ) 138 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) 139 : "0" ( edi ), "1" ( esi ) : "memory" ); 140 if ( len >= 1*4 ) 141 __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) 142 : "0" ( edi ), "1" ( esi ) : "memory" ); 143 if ( ( len % 4 ) >= 2 ) 144 __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi ) 145 : "0" ( edi ), "1" ( esi ) : "memory" ); 146 if ( ( len % 2 ) >= 1 ) 147 __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi ) 148 : "0" ( edi ), "1" ( esi ) : "memory" ); 149 150 return dest; 151 } 152 153 #define memcpy( dest, src, len ) \ 154 ( __builtin_constant_p ( (len) ) ? \ 155 __constant_memcpy ( (dest), (src), (len) ) : \ 156 __memcpy ( (dest), (src), (len) ) ) 157 158 #define __HAVE_ARCH_MEMMOVE 159 static inline void * memmove(void * dest,const void * src, size_t n) 160 { 161 int d0, d1, d2; 162 if (dest<src) 163 __asm__ __volatile__( 164 "cld\n\t" 165 "rep\n\t" 166 "movsb" 167 : "=&c" (d0), "=&S" (d1), "=&D" (d2) 168 :"0" (n),"1" (src),"2" (dest) 169 : "memory"); 170 else 171 __asm__ __volatile__( 172 "std\n\t" 173 "rep\n\t" 174 "movsb\n\t" 175 "cld" 176 : "=&c" (d0), "=&S" (d1), "=&D" (d2) 177 :"0" (n), 178 "1" (n-1+(const char *)src), 179 "2" (n-1+(char *)dest) 180 :"memory"); 181 return dest; 182 } 183 184 #define __HAVE_ARCH_MEMSET 185 static inline void * memset(void *s, int c,size_t count) 186 { 187 int d0, d1; 188 __asm__ __volatile__( 189 "cld\n\t" 190 "rep\n\t" 191 "stosb" 192 : "=&c" (d0), "=&D" (d1) 193 :"a" (c),"1" (s),"0" (count) 194 :"memory"); 195 return s; 196 } 197 198 #define __HAVE_ARCH_MEMSWAP 199 static inline void * memswap(void *dest, void *src, size_t n) 200 { 201 int d0, d1, d2, d3; 202 __asm__ __volatile__( 203 "\n1:\t" 204 "movb (%%edi),%%al\n\t" 205 "xchgb (%%esi),%%al\n\t" 206 "incl %%esi\n\t" 207 "stosb\n\t" 208 "loop 1b" 209 : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=&a" (d3) 210 : "0" (n), "1" (src), "2" (dest) 211 : "memory" ); 212 return dest; 213 } 214 215 #define __HAVE_ARCH_STRNCMP 216 static inline int strncmp(const char * cs,const char * ct,size_t count) 217 { 218 register int __res; 219 int d0, d1, d2; 220 __asm__ __volatile__( 221 "1:\tdecl %3\n\t" 222 "js 2f\n\t" 223 "lodsb\n\t" 224 "scasb\n\t" 225 "jne 3f\n\t" 226 "testb %%al,%%al\n\t" 227 "jne 1b\n" 228 "2:\txorl %%eax,%%eax\n\t" 229 "jmp 4f\n" 230 "3:\tsbbl %%eax,%%eax\n\t" 231 "orb $1,%%al\n" 232 "4:" 233 :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) 234 :"1" (cs),"2" (ct),"3" (count)); 235 return __res; 236 } 237 238 #define __HAVE_ARCH_STRLEN 239 static inline size_t strlen(const char * s) 240 { 241 int d0; 242 register int __res; 243 __asm__ __volatile__( 244 "repne\n\t" 245 "scasb\n\t" 246 "notl %0\n\t" 247 "decl %0" 248 :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff)); 249 return __res; 250 } 251 252 #endif /* ETHERBOOT_BITS_STRING_H */ 253