Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2014, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  unistr_case.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:2
     12 *
     13 *   created on: 2004aug19
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Case-mapping functions moved here from unistr.cpp
     17 */
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/putil.h"
     21 #include "cstring.h"
     22 #include "cmemory.h"
     23 #include "unicode/ustring.h"
     24 #include "unicode/unistr.h"
     25 #include "unicode/uchar.h"
     26 #include "uelement.h"
     27 #include "ustr_imp.h"
     28 
     29 U_NAMESPACE_BEGIN
     30 
     31 //========================================
     32 // Read-only implementation
     33 //========================================
     34 
     35 int8_t
     36 UnicodeString::doCaseCompare(int32_t start,
     37                              int32_t length,
     38                              const UChar *srcChars,
     39                              int32_t srcStart,
     40                              int32_t srcLength,
     41                              uint32_t options) const
     42 {
     43   // compare illegal string values
     44   // treat const UChar *srcChars==NULL as an empty string
     45   if(isBogus()) {
     46     return -1;
     47   }
     48 
     49   // pin indices to legal values
     50   pinIndices(start, length);
     51 
     52   if(srcChars == NULL) {
     53     srcStart = srcLength = 0;
     54   }
     55 
     56   // get the correct pointer
     57   const UChar *chars = getArrayStart();
     58 
     59   chars += start;
     60   if(srcStart!=0) {
     61     srcChars += srcStart;
     62   }
     63 
     64   if(chars != srcChars) {
     65     UErrorCode errorCode=U_ZERO_ERROR;
     66     int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
     67                                 options|U_COMPARE_IGNORE_CASE, &errorCode);
     68     if(result!=0) {
     69       return (int8_t)(result >> 24 | 1);
     70     }
     71   } else {
     72     // get the srcLength if necessary
     73     if(srcLength < 0) {
     74       srcLength = u_strlen(srcChars + srcStart);
     75     }
     76     if(length != srcLength) {
     77       return (int8_t)((length - srcLength) >> 24 | 1);
     78     }
     79   }
     80   return 0;
     81 }
     82 
     83 //========================================
     84 // Write implementation
     85 //========================================
     86 
     87 UnicodeString &
     88 UnicodeString::caseMap(const UCaseMap *csm,
     89                        UStringCaseMapper *stringCaseMapper) {
     90   if(isEmpty() || !isWritable()) {
     91     // nothing to do
     92     return *this;
     93   }
     94 
     95   // We need to allocate a new buffer for the internal string case mapping function.
     96   // This is very similar to how doReplace() keeps the old array pointer
     97   // and deletes the old array itself after it is done.
     98   // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
     99   UChar oldStackBuffer[US_STACKBUF_SIZE];
    100   UChar *oldArray;
    101   int32_t oldLength;
    102 
    103   if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
    104     // copy the stack buffer contents because it will be overwritten
    105     oldArray = oldStackBuffer;
    106     oldLength = getShortLength();
    107     u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
    108   } else {
    109     oldArray = getArrayStart();
    110     oldLength = length();
    111   }
    112 
    113   int32_t capacity;
    114   if(oldLength <= US_STACKBUF_SIZE) {
    115     capacity = US_STACKBUF_SIZE;
    116   } else {
    117     capacity = oldLength + 20;
    118   }
    119   int32_t *bufferToDelete = 0;
    120   if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
    121     return *this;
    122   }
    123 
    124   // Case-map, and if the result is too long, then reallocate and repeat.
    125   UErrorCode errorCode;
    126   int32_t newLength;
    127   do {
    128     errorCode = U_ZERO_ERROR;
    129     newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
    130                                  oldArray, oldLength, &errorCode);
    131     setLength(newLength);
    132   } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
    133 
    134   if (bufferToDelete) {
    135     uprv_free(bufferToDelete);
    136   }
    137   if(U_FAILURE(errorCode)) {
    138     setToBogus();
    139   }
    140   return *this;
    141 }
    142 
    143 UnicodeString &
    144 UnicodeString::foldCase(uint32_t options) {
    145   UCaseMap csm=UCASEMAP_INITIALIZER;
    146   csm.csp=ucase_getSingleton();
    147   csm.options=options;
    148   return caseMap(&csm, ustrcase_internalFold);
    149 }
    150 
    151 U_NAMESPACE_END
    152 
    153 // Defined here to reduce dependencies on break iterator
    154 U_CAPI int32_t U_EXPORT2
    155 uhash_hashCaselessUnicodeString(const UElement key) {
    156     U_NAMESPACE_USE
    157     const UnicodeString *str = (const UnicodeString*) key.pointer;
    158     if (str == NULL) {
    159         return 0;
    160     }
    161     // Inefficient; a better way would be to have a hash function in
    162     // UnicodeString that does case folding on the fly.
    163     UnicodeString copy(*str);
    164     return copy.foldCase().hashCode();
    165 }
    166 
    167 // Defined here to reduce dependencies on break iterator
    168 U_CAPI UBool U_EXPORT2
    169 uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
    170     U_NAMESPACE_USE
    171     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
    172     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
    173     if (str1 == str2) {
    174         return TRUE;
    175     }
    176     if (str1 == NULL || str2 == NULL) {
    177         return FALSE;
    178     }
    179     return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
    180 }
    181