Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2007, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  unistr_case.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:2
     12 *
     13 *   created on: 2004aug19
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Case-mapping functions moved here from unistr.cpp
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/putil.h"
     21 #include "unicode/locid.h"
     22 #include "cstring.h"
     23 #include "cmemory.h"
     24 #include "unicode/ustring.h"
     25 #include "unicode/unistr.h"
     26 #include "unicode/uchar.h"
     27 #include "unicode/ubrk.h"
     28 #include "ustr_imp.h"
     29 #include "unormimp.h"
     30 #include "uhash.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 //========================================
     35 // Read-only implementation
     36 //========================================
     37 
     38 int8_t
     39 UnicodeString::doCaseCompare(int32_t start,
     40                              int32_t length,
     41                              const UChar *srcChars,
     42                              int32_t srcStart,
     43                              int32_t srcLength,
     44                              uint32_t options) const
     45 {
     46   // compare illegal string values
     47   // treat const UChar *srcChars==NULL as an empty string
     48   if(isBogus()) {
     49     return -1;
     50   }
     51 
     52   // pin indices to legal values
     53   pinIndices(start, length);
     54 
     55   if(srcChars == NULL) {
     56     srcStart = srcLength = 0;
     57   }
     58 
     59   // get the correct pointer
     60   const UChar *chars = getArrayStart();
     61 
     62   chars += start;
     63   srcChars += srcStart;
     64 
     65   if(chars != srcChars) {
     66     UErrorCode errorCode=U_ZERO_ERROR;
     67     int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
     68                                 options|U_COMPARE_IGNORE_CASE, &errorCode);
     69     if(result!=0) {
     70       return (int8_t)(result >> 24 | 1);
     71     }
     72   } else {
     73     // get the srcLength if necessary
     74     if(srcLength < 0) {
     75       srcLength = u_strlen(srcChars + srcStart);
     76     }
     77     if(length != srcLength) {
     78       return (int8_t)((length - srcLength) >> 24 | 1);
     79     }
     80   }
     81   return 0;
     82 }
     83 
     84 //========================================
     85 // Write implementation
     86 //========================================
     87 
     88 /*
     89  * Implement argument checking and buffer handling
     90  * for string case mapping as a common function.
     91  */
     92 
     93 UnicodeString &
     94 UnicodeString::caseMap(BreakIterator *titleIter,
     95                        const char *locale,
     96                        uint32_t options,
     97                        int32_t toWhichCase) {
     98   if(isEmpty() || !isWritable()) {
     99     // nothing to do
    100     return *this;
    101   }
    102 
    103   UErrorCode errorCode;
    104 
    105   errorCode = U_ZERO_ERROR;
    106   const UCaseProps *csp=ucase_getSingleton(&errorCode);
    107   if(U_FAILURE(errorCode)) {
    108     setToBogus();
    109     return *this;
    110   }
    111 
    112   // We need to allocate a new buffer for the internal string case mapping function.
    113   // This is very similar to how doReplace() keeps the old array pointer
    114   // and deletes the old array itself after it is done.
    115   // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
    116   UChar oldStackBuffer[US_STACKBUF_SIZE];
    117   UChar *oldArray;
    118   int32_t oldLength;
    119 
    120   if(fFlags&kUsingStackBuffer) {
    121     // copy the stack buffer contents because it will be overwritten
    122     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
    123     oldArray = oldStackBuffer;
    124     oldLength = fShortLength;
    125   } else {
    126     oldArray = getArrayStart();
    127     oldLength = length();
    128   }
    129 
    130   int32_t capacity;
    131   if(oldLength <= US_STACKBUF_SIZE) {
    132     capacity = US_STACKBUF_SIZE;
    133   } else {
    134     capacity = oldLength + 20;
    135   }
    136   int32_t *bufferToDelete = 0;
    137   if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
    138     return *this;
    139   }
    140 
    141   // Case-map, and if the result is too long, then reallocate and repeat.
    142   int32_t newLength;
    143   do {
    144     errorCode = U_ZERO_ERROR;
    145     if(toWhichCase==TO_LOWER) {
    146       newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
    147                                oldArray, oldLength,
    148                                locale, &errorCode);
    149     } else if(toWhichCase==TO_UPPER) {
    150       newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
    151                                oldArray, oldLength,
    152                                locale, &errorCode);
    153     } else if(toWhichCase==TO_TITLE) {
    154 #if UCONFIG_NO_BREAK_ITERATION
    155         errorCode=U_UNSUPPORTED_ERROR;
    156 #else
    157       newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
    158                                oldArray, oldLength,
    159                                (UBreakIterator *)titleIter, locale, options, &errorCode);
    160 #endif
    161     } else {
    162       newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
    163                                 oldArray, oldLength,
    164                                 options,
    165                                 &errorCode);
    166     }
    167     setLength(newLength);
    168   } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
    169 
    170   if (bufferToDelete) {
    171     uprv_free(bufferToDelete);
    172   }
    173   if(U_FAILURE(errorCode)) {
    174     setToBogus();
    175   }
    176   return *this;
    177 }
    178 
    179 UnicodeString &
    180 UnicodeString::toLower() {
    181   return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
    182 }
    183 
    184 UnicodeString &
    185 UnicodeString::toLower(const Locale &locale) {
    186   return caseMap(0, locale.getName(), 0, TO_LOWER);
    187 }
    188 
    189 UnicodeString &
    190 UnicodeString::toUpper() {
    191   return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
    192 }
    193 
    194 UnicodeString &
    195 UnicodeString::toUpper(const Locale &locale) {
    196   return caseMap(0, locale.getName(), 0, TO_UPPER);
    197 }
    198 
    199 #if !UCONFIG_NO_BREAK_ITERATION
    200 
    201 UnicodeString &
    202 UnicodeString::toTitle(BreakIterator *titleIter) {
    203   return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
    204 }
    205 
    206 UnicodeString &
    207 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
    208   return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
    209 }
    210 
    211 UnicodeString &
    212 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
    213   return caseMap(titleIter, locale.getName(), options, TO_TITLE);
    214 }
    215 
    216 #endif
    217 
    218 UnicodeString &
    219 UnicodeString::foldCase(uint32_t options) {
    220     /* The Locale parameter isn't used. Use "" instead. */
    221     return caseMap(0, "", options, FOLD_CASE);
    222 }
    223 
    224 U_NAMESPACE_END
    225 
    226 // Defined here to reduce dependencies on break iterator
    227 U_CAPI int32_t U_EXPORT2
    228 uhash_hashCaselessUnicodeString(const UHashTok key) {
    229     U_NAMESPACE_USE
    230     const UnicodeString *str = (const UnicodeString*) key.pointer;
    231     if (str == NULL) {
    232         return 0;
    233     }
    234     // Inefficient; a better way would be to have a hash function in
    235     // UnicodeString that does case folding on the fly.
    236     UnicodeString copy(*str);
    237     return copy.foldCase().hashCode();
    238 }
    239 
    240 // Defined here to reduce dependencies on break iterator
    241 U_CAPI UBool U_EXPORT2
    242 uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
    243     U_NAMESPACE_USE
    244     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
    245     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
    246     if (str1 == str2) {
    247         return TRUE;
    248     }
    249     if (str1 == NULL || str2 == NULL) {
    250         return FALSE;
    251     }
    252     return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
    253 }
    254 
    255