1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1999-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: unistr_case.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:2 12 * 13 * created on: 2004aug19 14 * created by: Markus W. Scherer 15 * 16 * Case-mapping functions moved here from unistr.cpp 17 */ 18 19 #include "unicode/utypes.h" 20 #include "unicode/putil.h" 21 #include "cstring.h" 22 #include "cmemory.h" 23 #include "unicode/ustring.h" 24 #include "unicode/unistr.h" 25 #include "unicode/uchar.h" 26 #include "uelement.h" 27 #include "ustr_imp.h" 28 29 U_NAMESPACE_BEGIN 30 31 //======================================== 32 // Read-only implementation 33 //======================================== 34 35 int8_t 36 UnicodeString::doCaseCompare(int32_t start, 37 int32_t length, 38 const UChar *srcChars, 39 int32_t srcStart, 40 int32_t srcLength, 41 uint32_t options) const 42 { 43 // compare illegal string values 44 // treat const UChar *srcChars==NULL as an empty string 45 if(isBogus()) { 46 return -1; 47 } 48 49 // pin indices to legal values 50 pinIndices(start, length); 51 52 if(srcChars == NULL) { 53 srcStart = srcLength = 0; 54 } 55 56 // get the correct pointer 57 const UChar *chars = getArrayStart(); 58 59 chars += start; 60 if(srcStart!=0) { 61 srcChars += srcStart; 62 } 63 64 if(chars != srcChars) { 65 UErrorCode errorCode=U_ZERO_ERROR; 66 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, 67 options|U_COMPARE_IGNORE_CASE, &errorCode); 68 if(result!=0) { 69 return (int8_t)(result >> 24 | 1); 70 } 71 } else { 72 // get the srcLength if necessary 73 if(srcLength < 0) { 74 srcLength = u_strlen(srcChars + srcStart); 75 } 76 if(length != srcLength) { 77 return (int8_t)((length - srcLength) >> 24 | 1); 78 } 79 } 80 return 0; 81 } 82 83 //======================================== 84 // Write implementation 85 //======================================== 86 87 UnicodeString & 88 UnicodeString::caseMap(const UCaseMap *csm, 89 UStringCaseMapper *stringCaseMapper) { 90 if(isEmpty() || !isWritable()) { 91 // nothing to do 92 return *this; 93 } 94 95 // We need to allocate a new buffer for the internal string case mapping function. 96 // This is very similar to how doReplace() keeps the old array pointer 97 // and deletes the old array itself after it is done. 98 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. 99 UChar oldStackBuffer[US_STACKBUF_SIZE]; 100 UChar *oldArray; 101 int32_t oldLength; 102 103 if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { 104 // copy the stack buffer contents because it will be overwritten 105 oldArray = oldStackBuffer; 106 oldLength = getShortLength(); 107 u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength); 108 } else { 109 oldArray = getArrayStart(); 110 oldLength = length(); 111 } 112 113 int32_t capacity; 114 if(oldLength <= US_STACKBUF_SIZE) { 115 capacity = US_STACKBUF_SIZE; 116 } else { 117 capacity = oldLength + 20; 118 } 119 int32_t *bufferToDelete = 0; 120 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { 121 return *this; 122 } 123 124 // Case-map, and if the result is too long, then reallocate and repeat. 125 UErrorCode errorCode; 126 int32_t newLength; 127 do { 128 errorCode = U_ZERO_ERROR; 129 newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(), 130 oldArray, oldLength, &errorCode); 131 setLength(newLength); 132 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); 133 134 if (bufferToDelete) { 135 uprv_free(bufferToDelete); 136 } 137 if(U_FAILURE(errorCode)) { 138 setToBogus(); 139 } 140 return *this; 141 } 142 143 UnicodeString & 144 UnicodeString::foldCase(uint32_t options) { 145 UCaseMap csm=UCASEMAP_INITIALIZER; 146 csm.csp=ucase_getSingleton(); 147 csm.options=options; 148 return caseMap(&csm, ustrcase_internalFold); 149 } 150 151 U_NAMESPACE_END 152 153 // Defined here to reduce dependencies on break iterator 154 U_CAPI int32_t U_EXPORT2 155 uhash_hashCaselessUnicodeString(const UElement key) { 156 U_NAMESPACE_USE 157 const UnicodeString *str = (const UnicodeString*) key.pointer; 158 if (str == NULL) { 159 return 0; 160 } 161 // Inefficient; a better way would be to have a hash function in 162 // UnicodeString that does case folding on the fly. 163 UnicodeString copy(*str); 164 return copy.foldCase().hashCode(); 165 } 166 167 // Defined here to reduce dependencies on break iterator 168 U_CAPI UBool U_EXPORT2 169 uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { 170 U_NAMESPACE_USE 171 const UnicodeString *str1 = (const UnicodeString*) key1.pointer; 172 const UnicodeString *str2 = (const UnicodeString*) key2.pointer; 173 if (str1 == str2) { 174 return TRUE; 175 } 176 if (str1 == NULL || str2 == NULL) { 177 return FALSE; 178 } 179 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; 180 } 181