Home | History | Annotate | Download | only in common
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2014, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  unistr_case.cpp
     11 *   encoding:   US-ASCII
     12 *   tab size:   8 (not used)
     13 *   indentation:2
     14 *
     15 *   created on: 2004aug19
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Case-mapping functions moved here from unistr.cpp
     19 */
     20 
     21 #include "unicode/utypes.h"
     22 #include "unicode/putil.h"
     23 #include "cstring.h"
     24 #include "cmemory.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/unistr.h"
     27 #include "unicode/uchar.h"
     28 #include "uelement.h"
     29 #include "ustr_imp.h"
     30 
     31 U_NAMESPACE_BEGIN
     32 
     33 //========================================
     34 // Read-only implementation
     35 //========================================
     36 
     37 int8_t
     38 UnicodeString::doCaseCompare(int32_t start,
     39                              int32_t length,
     40                              const UChar *srcChars,
     41                              int32_t srcStart,
     42                              int32_t srcLength,
     43                              uint32_t options) const
     44 {
     45   // compare illegal string values
     46   // treat const UChar *srcChars==NULL as an empty string
     47   if(isBogus()) {
     48     return -1;
     49   }
     50 
     51   // pin indices to legal values
     52   pinIndices(start, length);
     53 
     54   if(srcChars == NULL) {
     55     srcStart = srcLength = 0;
     56   }
     57 
     58   // get the correct pointer
     59   const UChar *chars = getArrayStart();
     60 
     61   chars += start;
     62   if(srcStart!=0) {
     63     srcChars += srcStart;
     64   }
     65 
     66   if(chars != srcChars) {
     67     UErrorCode errorCode=U_ZERO_ERROR;
     68     int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
     69                                 options|U_COMPARE_IGNORE_CASE, &errorCode);
     70     if(result!=0) {
     71       return (int8_t)(result >> 24 | 1);
     72     }
     73   } else {
     74     // get the srcLength if necessary
     75     if(srcLength < 0) {
     76       srcLength = u_strlen(srcChars + srcStart);
     77     }
     78     if(length != srcLength) {
     79       return (int8_t)((length - srcLength) >> 24 | 1);
     80     }
     81   }
     82   return 0;
     83 }
     84 
     85 //========================================
     86 // Write implementation
     87 //========================================
     88 
     89 UnicodeString &
     90 UnicodeString::caseMap(const UCaseMap *csm,
     91                        UStringCaseMapper *stringCaseMapper) {
     92   if(isEmpty() || !isWritable()) {
     93     // nothing to do
     94     return *this;
     95   }
     96 
     97   // We need to allocate a new buffer for the internal string case mapping function.
     98   // This is very similar to how doReplace() keeps the old array pointer
     99   // and deletes the old array itself after it is done.
    100   // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
    101   UChar oldStackBuffer[US_STACKBUF_SIZE];
    102   UChar *oldArray;
    103   int32_t oldLength;
    104 
    105   if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
    106     // copy the stack buffer contents because it will be overwritten
    107     oldArray = oldStackBuffer;
    108     oldLength = getShortLength();
    109     u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
    110   } else {
    111     oldArray = getArrayStart();
    112     oldLength = length();
    113   }
    114 
    115   int32_t capacity;
    116   if(oldLength <= US_STACKBUF_SIZE) {
    117     capacity = US_STACKBUF_SIZE;
    118   } else {
    119     capacity = oldLength + 20;
    120   }
    121   int32_t *bufferToDelete = 0;
    122   if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
    123     return *this;
    124   }
    125 
    126   // Case-map, and if the result is too long, then reallocate and repeat.
    127   UErrorCode errorCode;
    128   int32_t newLength;
    129   do {
    130     errorCode = U_ZERO_ERROR;
    131     newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
    132                                  oldArray, oldLength, &errorCode);
    133     setLength(newLength);
    134   } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
    135 
    136   if (bufferToDelete) {
    137     uprv_free(bufferToDelete);
    138   }
    139   if(U_FAILURE(errorCode)) {
    140     setToBogus();
    141   }
    142   return *this;
    143 }
    144 
    145 UnicodeString &
    146 UnicodeString::foldCase(uint32_t options) {
    147   UCaseMap csm=UCASEMAP_INITIALIZER;
    148   csm.csp=ucase_getSingleton();
    149   csm.options=options;
    150   return caseMap(&csm, ustrcase_internalFold);
    151 }
    152 
    153 U_NAMESPACE_END
    154 
    155 // Defined here to reduce dependencies on break iterator
    156 U_CAPI int32_t U_EXPORT2
    157 uhash_hashCaselessUnicodeString(const UElement key) {
    158     U_NAMESPACE_USE
    159     const UnicodeString *str = (const UnicodeString*) key.pointer;
    160     if (str == NULL) {
    161         return 0;
    162     }
    163     // Inefficient; a better way would be to have a hash function in
    164     // UnicodeString that does case folding on the fly.
    165     UnicodeString copy(*str);
    166     return copy.foldCase().hashCode();
    167 }
    168 
    169 // Defined here to reduce dependencies on break iterator
    170 U_CAPI UBool U_EXPORT2
    171 uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
    172     U_NAMESPACE_USE
    173     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
    174     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
    175     if (str1 == str2) {
    176         return TRUE;
    177     }
    178     if (str1 == NULL || str2 == NULL) {
    179         return FALSE;
    180     }
    181     return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
    182 }
    183