Home | History | Annotate | Download | only in bits
      1 #ifndef ETHERBOOT_BITS_STRING_H
      2 #define ETHERBOOT_BITS_STRING_H
      3 /*
      4  * Taken from Linux /usr/include/asm/string.h
      5  * All except memcpy, memmove, memset and memcmp removed.
      6  *
      7  * Non-standard memswap() function added because it saves quite a bit
      8  * of code (mbrown (at) fensystems.co.uk).
      9  */
     10 
     11 /*
     12  * This string-include defines all string functions as inline
     13  * functions. Use gcc. It also assumes ds=es=data space, this should be
     14  * normal. Most of the string-functions are rather heavily hand-optimized,
     15  * see especially strtok,strstr,str[c]spn. They should work, but are not
     16  * very easy to understand. Everything is done entirely within the register
     17  * set, making the functions fast and clean. String instructions have been
     18  * used through-out, making for "slightly" unclear code :-)
     19  *
     20  *		NO Copyright (C) 1991, 1992 Linus Torvalds,
     21  *		consider these trivial functions to be PD.
     22  */
     23 
     24 FILE_LICENCE ( PUBLIC_DOMAIN );
     25 
     26 #define __HAVE_ARCH_MEMCPY
     27 
     28 extern void * __memcpy ( void *dest, const void *src, size_t len );
     29 
     30 #if 0
     31 static inline __attribute__ (( always_inline )) void *
     32 __memcpy ( void *dest, const void *src, size_t len ) {
     33 	int d0, d1, d2;
     34 	__asm__ __volatile__ ( "rep ; movsb"
     35 			       : "=&c" ( d0 ), "=&S" ( d1 ), "=&D" ( d2 )
     36 			       : "0" ( len ), "1" ( src ), "2" ( dest )
     37 			       : "memory" );
     38 	return dest;
     39 }
     40 #endif
     41 
     42 static inline __attribute__ (( always_inline )) void *
     43 __constant_memcpy ( void *dest, const void *src, size_t len ) {
     44 	union {
     45 		uint32_t u32[2];
     46 		uint16_t u16[4];
     47 		uint8_t  u8[8];
     48 	} __attribute__ (( __may_alias__ )) *dest_u = dest;
     49 	const union {
     50 		uint32_t u32[2];
     51 		uint16_t u16[4];
     52 		uint8_t  u8[8];
     53 	} __attribute__ (( __may_alias__ )) *src_u = src;
     54 	const void *esi;
     55 	void *edi;
     56 
     57 	switch ( len ) {
     58 	case 0 : /* 0 bytes */
     59 		return dest;
     60 	/*
     61 	 * Single-register moves; these are always better than a
     62 	 * string operation.  We can clobber an arbitrary two
     63 	 * registers (data, source, dest can re-use source register)
     64 	 * instead of being restricted to esi and edi.  There's also a
     65 	 * much greater potential for optimising with nearby code.
     66 	 *
     67 	 */
     68 	case 1 : /* 4 bytes */
     69 		dest_u->u8[0]  = src_u->u8[0];
     70 		return dest;
     71 	case 2 : /* 6 bytes */
     72 		dest_u->u16[0] = src_u->u16[0];
     73 		return dest;
     74 	case 4 : /* 4 bytes */
     75 		dest_u->u32[0] = src_u->u32[0];
     76 		return dest;
     77 	/*
     78 	 * Double-register moves; these are probably still a win.
     79 	 *
     80 	 */
     81 	case 3 : /* 12 bytes */
     82 		dest_u->u16[0] = src_u->u16[0];
     83 		dest_u->u8[2]  = src_u->u8[2];
     84 		return dest;
     85 	case 5 : /* 10 bytes */
     86 		dest_u->u32[0] = src_u->u32[0];
     87 		dest_u->u8[4]  = src_u->u8[4];
     88 		return dest;
     89 	case 6 : /* 12 bytes */
     90 		dest_u->u32[0] = src_u->u32[0];
     91 		dest_u->u16[2] = src_u->u16[2];
     92 		return dest;
     93 	case 8 : /* 10 bytes */
     94 		dest_u->u32[0] = src_u->u32[0];
     95 		dest_u->u32[1] = src_u->u32[1];
     96 		return dest;
     97 	}
     98 
     99 	/* Even if we have to load up esi and edi ready for a string
    100 	 * operation, we can sometimes save space by using multiple
    101 	 * single-byte "movs" operations instead of loading up ecx and
    102 	 * using "rep movsb".
    103 	 *
    104 	 * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte
    105 	 * to allow for saving/restoring ecx 50% of the time.
    106 	 *
    107 	 * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes.
    108 	 * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte,
    109 	 * but "movsl" moves twice as much data, so it balances out).
    110 	 *
    111 	 * The cutoff point therefore occurs around 26 bytes; the byte
    112 	 * requirements for each method are:
    113 	 *
    114 	 * len		   16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
    115 	 * #bytes (ecx)	    8  8  8  8  8  8  8  8  8  8  8  8  8  8  8  8
    116 	 * #bytes (no ecx)  4  5  6  7  5  6  7  8  6  7  8  9  7  8  9 10
    117 	 */
    118 
    119 	esi = src;
    120 	edi = dest;
    121 
    122 	if ( len >= 26 )
    123 		return __memcpy ( dest, src, len );
    124 
    125 	if ( len >= 6*4 )
    126 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
    127 				       : "0" ( edi ), "1" ( esi ) : "memory" );
    128 	if ( len >= 5*4 )
    129 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
    130 				       : "0" ( edi ), "1" ( esi ) : "memory" );
    131 	if ( len >= 4*4 )
    132 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
    133 				       : "0" ( edi ), "1" ( esi ) : "memory" );
    134 	if ( len >= 3*4 )
    135 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
    136 				       : "0" ( edi ), "1" ( esi ) : "memory" );
    137 	if ( len >= 2*4 )
    138 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
    139 				       : "0" ( edi ), "1" ( esi ) : "memory" );
    140 	if ( len >= 1*4 )
    141 		__asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi )
    142 				       : "0" ( edi ), "1" ( esi ) : "memory" );
    143 	if ( ( len % 4 ) >= 2 )
    144 		__asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi )
    145 				       : "0" ( edi ), "1" ( esi ) : "memory" );
    146 	if ( ( len % 2 ) >= 1 )
    147 		__asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi )
    148 				       : "0" ( edi ), "1" ( esi ) : "memory" );
    149 
    150 	return dest;
    151 }
    152 
    153 #define memcpy( dest, src, len )			\
    154 	( __builtin_constant_p ( (len) ) ?		\
    155 	  __constant_memcpy ( (dest), (src), (len) ) :	\
    156 	  __memcpy ( (dest), (src), (len) ) )
    157 
    158 #define __HAVE_ARCH_MEMMOVE
    159 static inline void * memmove(void * dest,const void * src, size_t n)
    160 {
    161 int d0, d1, d2;
    162 if (dest<src)
    163 __asm__ __volatile__(
    164 	"cld\n\t"
    165 	"rep\n\t"
    166 	"movsb"
    167 	: "=&c" (d0), "=&S" (d1), "=&D" (d2)
    168 	:"0" (n),"1" (src),"2" (dest)
    169 	: "memory");
    170 else
    171 __asm__ __volatile__(
    172 	"std\n\t"
    173 	"rep\n\t"
    174 	"movsb\n\t"
    175 	"cld"
    176 	: "=&c" (d0), "=&S" (d1), "=&D" (d2)
    177 	:"0" (n),
    178 	 "1" (n-1+(const char *)src),
    179 	 "2" (n-1+(char *)dest)
    180 	:"memory");
    181 return dest;
    182 }
    183 
    184 #define __HAVE_ARCH_MEMSET
    185 static inline void * memset(void *s, int c,size_t count)
    186 {
    187 int d0, d1;
    188 __asm__ __volatile__(
    189 	"cld\n\t"
    190 	"rep\n\t"
    191 	"stosb"
    192 	: "=&c" (d0), "=&D" (d1)
    193 	:"a" (c),"1" (s),"0" (count)
    194 	:"memory");
    195 return s;
    196 }
    197 
    198 #define __HAVE_ARCH_MEMSWAP
    199 static inline void * memswap(void *dest, void *src, size_t n)
    200 {
    201 int d0, d1, d2, d3;
    202 __asm__ __volatile__(
    203 	"\n1:\t"
    204 	"movb (%%edi),%%al\n\t"
    205 	"xchgb (%%esi),%%al\n\t"
    206 	"incl %%esi\n\t"
    207 	"stosb\n\t"
    208 	"loop 1b"
    209 	: "=&c" (d0), "=&S" (d1), "=&D" (d2), "=&a" (d3)
    210 	: "0" (n), "1" (src), "2" (dest)
    211 	: "memory" );
    212 return dest;
    213 }
    214 
    215 #define __HAVE_ARCH_STRNCMP
    216 static inline int strncmp(const char * cs,const char * ct,size_t count)
    217 {
    218 register int __res;
    219 int d0, d1, d2;
    220 __asm__ __volatile__(
    221 	"1:\tdecl %3\n\t"
    222 	"js 2f\n\t"
    223 	"lodsb\n\t"
    224 	"scasb\n\t"
    225 	"jne 3f\n\t"
    226 	"testb %%al,%%al\n\t"
    227 	"jne 1b\n"
    228 	"2:\txorl %%eax,%%eax\n\t"
    229 	"jmp 4f\n"
    230 	"3:\tsbbl %%eax,%%eax\n\t"
    231 	"orb $1,%%al\n"
    232 	"4:"
    233 		     :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
    234 		     :"1" (cs),"2" (ct),"3" (count));
    235 return __res;
    236 }
    237 
    238 #define __HAVE_ARCH_STRLEN
    239 static inline size_t strlen(const char * s)
    240 {
    241 int d0;
    242 register int __res;
    243 __asm__ __volatile__(
    244 	"repne\n\t"
    245 	"scasb\n\t"
    246 	"notl %0\n\t"
    247 	"decl %0"
    248 	:"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
    249 return __res;
    250 }
    251 
    252 #endif /* ETHERBOOT_BITS_STRING_H */
    253