Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1997-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *
      9 * File CSTRING.C
     10 *
     11 * @author       Helena Shih
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   6/18/98     hshih       Created
     17 *   09/08/98    stephen     Added include for ctype, for Mac Port
     18 *   11/15/99    helena      Integrated S/390 IEEE changes.
     19 ******************************************************************************
     20 */
     21 
     22 
     23 
     24 #include <stdlib.h>
     25 #include <stdio.h>
     26 #include "unicode/utypes.h"
     27 #include "cmemory.h"
     28 #include "cstring.h"
     29 #include "uassert.h"
     30 
     31 /*
     32  * We hardcode case conversion for invariant characters to match our expectation
     33  * and the compiler execution charset.
     34  * This prevents problems on systems
     35  * - with non-default casing behavior, like Turkish system locales where
     36  *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
     37  * - where there are no lowercase Latin characters at all, or using different
     38  *   codes (some old EBCDIC codepages)
     39  *
     40  * This works because the compiler usually runs on a platform where the execution
     41  * charset includes all of the invariant characters at their expected
     42  * code positions, so that the char * string literals in ICU code match
     43  * the char literals here.
     44  *
     45  * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
     46  * and the set of uppercase Latin letters is discontiguous as well.
     47  */
     48 
     49 U_CAPI UBool U_EXPORT2
     50 uprv_isASCIILetter(char c) {
     51 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     52     return
     53         ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
     54         ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
     55 #else
     56     return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
     57 #endif
     58 }
     59 
     60 U_CAPI char U_EXPORT2
     61 uprv_toupper(char c) {
     62 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     63     if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
     64         c=(char)(c+('A'-'a'));
     65     }
     66 #else
     67     if('a'<=c && c<='z') {
     68         c=(char)(c+('A'-'a'));
     69     }
     70 #endif
     71     return c;
     72 }
     73 
     74 
     75 #if 0
     76 /*
     77  * Commented out because cstring.h defines uprv_tolower() to be
     78  * the same as either uprv_asciitolower() or uprv_ebcdictolower()
     79  * to reduce the amount of code to cover with tests.
     80  *
     81  * Note that this uprv_tolower() definition is likely to work for most
     82  * charset families, not just ASCII and EBCDIC, because its #else branch
     83  * is written generically.
     84  */
     85 U_CAPI char U_EXPORT2
     86 uprv_tolower(char c) {
     87 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     88     if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
     89         c=(char)(c+('a'-'A'));
     90     }
     91 #else
     92     if('A'<=c && c<='Z') {
     93         c=(char)(c+('a'-'A'));
     94     }
     95 #endif
     96     return c;
     97 }
     98 #endif
     99 
    100 U_CAPI char U_EXPORT2
    101 uprv_asciitolower(char c) {
    102     if(0x41<=c && c<=0x5a) {
    103         c=(char)(c+0x20);
    104     }
    105     return c;
    106 }
    107 
    108 U_CAPI char U_EXPORT2
    109 uprv_ebcdictolower(char c) {
    110     if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
    111         (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
    112         (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
    113     ) {
    114         c=(char)(c-0x40);
    115     }
    116     return c;
    117 }
    118 
    119 
    120 U_CAPI char* U_EXPORT2
    121 T_CString_toLowerCase(char* str)
    122 {
    123     char* origPtr = str;
    124 
    125     if (str) {
    126         do
    127             *str = (char)uprv_tolower(*str);
    128         while (*(str++));
    129     }
    130 
    131     return origPtr;
    132 }
    133 
    134 U_CAPI char* U_EXPORT2
    135 T_CString_toUpperCase(char* str)
    136 {
    137     char* origPtr = str;
    138 
    139     if (str) {
    140         do
    141             *str = (char)uprv_toupper(*str);
    142         while (*(str++));
    143     }
    144 
    145     return origPtr;
    146 }
    147 
    148 /*
    149  * Takes a int32_t and fills in  a char* string with that number "radix"-based.
    150  * Does not handle negative values (makes an empty string for them).
    151  * Writes at most 12 chars ("-2147483647" plus NUL).
    152  * Returns the length of the string (not including the NUL).
    153  */
    154 U_CAPI int32_t U_EXPORT2
    155 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
    156 {
    157     char      tbuf[30];
    158     int32_t   tbx    = sizeof(tbuf);
    159     uint8_t   digit;
    160     int32_t   length = 0;
    161     uint32_t  uval;
    162 
    163     U_ASSERT(radix>=2 && radix<=16);
    164     uval = (uint32_t) v;
    165     if(v<0 && radix == 10) {
    166         /* Only in base 10 do we conside numbers to be signed. */
    167         uval = (uint32_t)(-v);
    168         buffer[length++] = '-';
    169     }
    170 
    171     tbx = sizeof(tbuf)-1;
    172     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    173     do {
    174         digit = (uint8_t)(uval % radix);
    175         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
    176         uval  = uval / radix;
    177     } while (uval != 0);
    178 
    179     /* copy converted number into user buffer  */
    180     uprv_strcpy(buffer+length, tbuf+tbx);
    181     length += sizeof(tbuf) - tbx -1;
    182     return length;
    183 }
    184 
    185 
    186 
    187 /*
    188  * Takes a int64_t and fills in  a char* string with that number "radix"-based.
    189  * Writes at most 21: chars ("-9223372036854775807" plus NUL).
    190  * Returns the length of the string, not including the terminating NULL.
    191  */
    192 U_CAPI int32_t U_EXPORT2
    193 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
    194 {
    195     char      tbuf[30];
    196     int32_t   tbx    = sizeof(tbuf);
    197     uint8_t   digit;
    198     int32_t   length = 0;
    199     uint64_t  uval;
    200 
    201     U_ASSERT(radix>=2 && radix<=16);
    202     uval = (uint64_t) v;
    203     if(v<0 && radix == 10) {
    204         /* Only in base 10 do we conside numbers to be signed. */
    205         uval = (uint64_t)(-v);
    206         buffer[length++] = '-';
    207     }
    208 
    209     tbx = sizeof(tbuf)-1;
    210     tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    211     do {
    212         digit = (uint8_t)(uval % radix);
    213         tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
    214         uval  = uval / radix;
    215     } while (uval != 0);
    216 
    217     /* copy converted number into user buffer  */
    218     uprv_strcpy(buffer+length, tbuf+tbx);
    219     length += sizeof(tbuf) - tbx -1;
    220     return length;
    221 }
    222 
    223 
    224 U_CAPI int32_t U_EXPORT2
    225 T_CString_stringToInteger(const char *integerString, int32_t radix)
    226 {
    227     char *end;
    228     return uprv_strtoul(integerString, &end, radix);
    229 
    230 }
    231 
    232 U_CAPI int U_EXPORT2
    233 uprv_stricmp(const char *str1, const char *str2) {
    234     if(str1==NULL) {
    235         if(str2==NULL) {
    236             return 0;
    237         } else {
    238             return -1;
    239         }
    240     } else if(str2==NULL) {
    241         return 1;
    242     } else {
    243         /* compare non-NULL strings lexically with lowercase */
    244         int rc;
    245         unsigned char c1, c2;
    246 
    247         for(;;) {
    248             c1=(unsigned char)*str1;
    249             c2=(unsigned char)*str2;
    250             if(c1==0) {
    251                 if(c2==0) {
    252                     return 0;
    253                 } else {
    254                     return -1;
    255                 }
    256             } else if(c2==0) {
    257                 return 1;
    258             } else {
    259                 /* compare non-zero characters with lowercase */
    260                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
    261                 if(rc!=0) {
    262                     return rc;
    263                 }
    264             }
    265             ++str1;
    266             ++str2;
    267         }
    268     }
    269 }
    270 
    271 U_CAPI int U_EXPORT2
    272 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
    273     if(str1==NULL) {
    274         if(str2==NULL) {
    275             return 0;
    276         } else {
    277             return -1;
    278         }
    279     } else if(str2==NULL) {
    280         return 1;
    281     } else {
    282         /* compare non-NULL strings lexically with lowercase */
    283         int rc;
    284         unsigned char c1, c2;
    285 
    286         for(; n--;) {
    287             c1=(unsigned char)*str1;
    288             c2=(unsigned char)*str2;
    289             if(c1==0) {
    290                 if(c2==0) {
    291                     return 0;
    292                 } else {
    293                     return -1;
    294                 }
    295             } else if(c2==0) {
    296                 return 1;
    297             } else {
    298                 /* compare non-zero characters with lowercase */
    299                 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
    300                 if(rc!=0) {
    301                     return rc;
    302                 }
    303             }
    304             ++str1;
    305             ++str2;
    306         }
    307     }
    308 
    309     return 0;
    310 }
    311 
    312 U_CAPI char* U_EXPORT2
    313 uprv_strdup(const char *src) {
    314     size_t len = uprv_strlen(src) + 1;
    315     char *dup = (char *) uprv_malloc(len);
    316 
    317     if (dup) {
    318         uprv_memcpy(dup, src, len);
    319     }
    320 
    321     return dup;
    322 }
    323 
    324 U_CAPI char* U_EXPORT2
    325 uprv_strndup(const char *src, int32_t n) {
    326     char *dup;
    327 
    328     if(n < 0) {
    329         dup = uprv_strdup(src);
    330     } else {
    331         dup = (char*)uprv_malloc(n+1);
    332         if (dup) {
    333             uprv_memcpy(dup, src, n);
    334             dup[n] = 0;
    335         }
    336     }
    337 
    338     return dup;
    339 }
    340