1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1997-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * File CSTRING.C 10 * 11 * @author Helena Shih 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 6/18/98 hshih Created 17 * 09/08/98 stephen Added include for ctype, for Mac Port 18 * 11/15/99 helena Integrated S/390 IEEE changes. 19 ****************************************************************************** 20 */ 21 22 23 24 #include <stdlib.h> 25 #include <stdio.h> 26 #include "unicode/utypes.h" 27 #include "cmemory.h" 28 #include "cstring.h" 29 #include "uassert.h" 30 31 /* 32 * We hardcode case conversion for invariant characters to match our expectation 33 * and the compiler execution charset. 34 * This prevents problems on systems 35 * - with non-default casing behavior, like Turkish system locales where 36 * tolower('I') maps to dotless i and toupper('i') maps to dotted I 37 * - where there are no lowercase Latin characters at all, or using different 38 * codes (some old EBCDIC codepages) 39 * 40 * This works because the compiler usually runs on a platform where the execution 41 * charset includes all of the invariant characters at their expected 42 * code positions, so that the char * string literals in ICU code match 43 * the char literals here. 44 * 45 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC 46 * and the set of uppercase Latin letters is discontiguous as well. 47 */ 48 49 U_CAPI UBool U_EXPORT2 50 uprv_isASCIILetter(char c) { 51 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 52 return 53 ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || 54 ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); 55 #else 56 return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); 57 #endif 58 } 59 60 U_CAPI char U_EXPORT2 61 uprv_toupper(char c) { 62 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 63 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { 64 c=(char)(c+('A'-'a')); 65 } 66 #else 67 if('a'<=c && c<='z') { 68 c=(char)(c+('A'-'a')); 69 } 70 #endif 71 return c; 72 } 73 74 75 #if 0 76 /* 77 * Commented out because cstring.h defines uprv_tolower() to be 78 * the same as either uprv_asciitolower() or uprv_ebcdictolower() 79 * to reduce the amount of code to cover with tests. 80 * 81 * Note that this uprv_tolower() definition is likely to work for most 82 * charset families, not just ASCII and EBCDIC, because its #else branch 83 * is written generically. 84 */ 85 U_CAPI char U_EXPORT2 86 uprv_tolower(char c) { 87 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 88 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { 89 c=(char)(c+('a'-'A')); 90 } 91 #else 92 if('A'<=c && c<='Z') { 93 c=(char)(c+('a'-'A')); 94 } 95 #endif 96 return c; 97 } 98 #endif 99 100 U_CAPI char U_EXPORT2 101 uprv_asciitolower(char c) { 102 if(0x41<=c && c<=0x5a) { 103 c=(char)(c+0x20); 104 } 105 return c; 106 } 107 108 U_CAPI char U_EXPORT2 109 uprv_ebcdictolower(char c) { 110 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || 111 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || 112 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) 113 ) { 114 c=(char)(c-0x40); 115 } 116 return c; 117 } 118 119 120 U_CAPI char* U_EXPORT2 121 T_CString_toLowerCase(char* str) 122 { 123 char* origPtr = str; 124 125 if (str) { 126 do 127 *str = (char)uprv_tolower(*str); 128 while (*(str++)); 129 } 130 131 return origPtr; 132 } 133 134 U_CAPI char* U_EXPORT2 135 T_CString_toUpperCase(char* str) 136 { 137 char* origPtr = str; 138 139 if (str) { 140 do 141 *str = (char)uprv_toupper(*str); 142 while (*(str++)); 143 } 144 145 return origPtr; 146 } 147 148 /* 149 * Takes a int32_t and fills in a char* string with that number "radix"-based. 150 * Does not handle negative values (makes an empty string for them). 151 * Writes at most 12 chars ("-2147483647" plus NUL). 152 * Returns the length of the string (not including the NUL). 153 */ 154 U_CAPI int32_t U_EXPORT2 155 T_CString_integerToString(char* buffer, int32_t v, int32_t radix) 156 { 157 char tbuf[30]; 158 int32_t tbx = sizeof(tbuf); 159 uint8_t digit; 160 int32_t length = 0; 161 uint32_t uval; 162 163 U_ASSERT(radix>=2 && radix<=16); 164 uval = (uint32_t) v; 165 if(v<0 && radix == 10) { 166 /* Only in base 10 do we conside numbers to be signed. */ 167 uval = (uint32_t)(-v); 168 buffer[length++] = '-'; 169 } 170 171 tbx = sizeof(tbuf)-1; 172 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 173 do { 174 digit = (uint8_t)(uval % radix); 175 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 176 uval = uval / radix; 177 } while (uval != 0); 178 179 /* copy converted number into user buffer */ 180 uprv_strcpy(buffer+length, tbuf+tbx); 181 length += sizeof(tbuf) - tbx -1; 182 return length; 183 } 184 185 186 187 /* 188 * Takes a int64_t and fills in a char* string with that number "radix"-based. 189 * Writes at most 21: chars ("-9223372036854775807" plus NUL). 190 * Returns the length of the string, not including the terminating NULL. 191 */ 192 U_CAPI int32_t U_EXPORT2 193 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) 194 { 195 char tbuf[30]; 196 int32_t tbx = sizeof(tbuf); 197 uint8_t digit; 198 int32_t length = 0; 199 uint64_t uval; 200 201 U_ASSERT(radix>=2 && radix<=16); 202 uval = (uint64_t) v; 203 if(v<0 && radix == 10) { 204 /* Only in base 10 do we conside numbers to be signed. */ 205 uval = (uint64_t)(-v); 206 buffer[length++] = '-'; 207 } 208 209 tbx = sizeof(tbuf)-1; 210 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 211 do { 212 digit = (uint8_t)(uval % radix); 213 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 214 uval = uval / radix; 215 } while (uval != 0); 216 217 /* copy converted number into user buffer */ 218 uprv_strcpy(buffer+length, tbuf+tbx); 219 length += sizeof(tbuf) - tbx -1; 220 return length; 221 } 222 223 224 U_CAPI int32_t U_EXPORT2 225 T_CString_stringToInteger(const char *integerString, int32_t radix) 226 { 227 char *end; 228 return uprv_strtoul(integerString, &end, radix); 229 230 } 231 232 U_CAPI int U_EXPORT2 233 uprv_stricmp(const char *str1, const char *str2) { 234 if(str1==NULL) { 235 if(str2==NULL) { 236 return 0; 237 } else { 238 return -1; 239 } 240 } else if(str2==NULL) { 241 return 1; 242 } else { 243 /* compare non-NULL strings lexically with lowercase */ 244 int rc; 245 unsigned char c1, c2; 246 247 for(;;) { 248 c1=(unsigned char)*str1; 249 c2=(unsigned char)*str2; 250 if(c1==0) { 251 if(c2==0) { 252 return 0; 253 } else { 254 return -1; 255 } 256 } else if(c2==0) { 257 return 1; 258 } else { 259 /* compare non-zero characters with lowercase */ 260 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 261 if(rc!=0) { 262 return rc; 263 } 264 } 265 ++str1; 266 ++str2; 267 } 268 } 269 } 270 271 U_CAPI int U_EXPORT2 272 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { 273 if(str1==NULL) { 274 if(str2==NULL) { 275 return 0; 276 } else { 277 return -1; 278 } 279 } else if(str2==NULL) { 280 return 1; 281 } else { 282 /* compare non-NULL strings lexically with lowercase */ 283 int rc; 284 unsigned char c1, c2; 285 286 for(; n--;) { 287 c1=(unsigned char)*str1; 288 c2=(unsigned char)*str2; 289 if(c1==0) { 290 if(c2==0) { 291 return 0; 292 } else { 293 return -1; 294 } 295 } else if(c2==0) { 296 return 1; 297 } else { 298 /* compare non-zero characters with lowercase */ 299 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 300 if(rc!=0) { 301 return rc; 302 } 303 } 304 ++str1; 305 ++str2; 306 } 307 } 308 309 return 0; 310 } 311 312 U_CAPI char* U_EXPORT2 313 uprv_strdup(const char *src) { 314 size_t len = uprv_strlen(src) + 1; 315 char *dup = (char *) uprv_malloc(len); 316 317 if (dup) { 318 uprv_memcpy(dup, src, len); 319 } 320 321 return dup; 322 } 323 324 U_CAPI char* U_EXPORT2 325 uprv_strndup(const char *src, int32_t n) { 326 char *dup; 327 328 if(n < 0) { 329 dup = uprv_strdup(src); 330 } else { 331 dup = (char*)uprv_malloc(n+1); 332 if (dup) { 333 uprv_memcpy(dup, src, n); 334 dup[n] = 0; 335 } 336 } 337 338 return dup; 339 } 340