1 /* 2 ********************************************************************** 3 * Copyright (C) 2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * scriptset.cpp 8 * 9 * created on: 2013 Jan 7 10 * created by: Andy Heninger 11 */ 12 13 #include "unicode/utypes.h" 14 15 #include "unicode/uchar.h" 16 #include "unicode/unistr.h" 17 18 #include "scriptset.h" 19 #include "uassert.h" 20 #include "cmemory.h" 21 22 U_NAMESPACE_BEGIN 23 24 //---------------------------------------------------------------------------- 25 // 26 // ScriptSet implementation 27 // 28 //---------------------------------------------------------------------------- 29 ScriptSet::ScriptSet() { 30 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 31 bits[i] = 0; 32 } 33 } 34 35 ScriptSet::~ScriptSet() { 36 } 37 38 ScriptSet::ScriptSet(const ScriptSet &other) { 39 *this = other; 40 } 41 42 43 ScriptSet & ScriptSet::operator =(const ScriptSet &other) { 44 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 45 bits[i] = other.bits[i]; 46 } 47 return *this; 48 } 49 50 51 UBool ScriptSet::operator == (const ScriptSet &other) const { 52 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 53 if (bits[i] != other.bits[i]) { 54 return FALSE; 55 } 56 } 57 return TRUE; 58 } 59 60 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { 61 if (U_FAILURE(status)) { 62 return FALSE; 63 } 64 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 65 status = U_ILLEGAL_ARGUMENT_ERROR; 66 return FALSE; 67 } 68 uint32_t index = script / 32; 69 uint32_t bit = 1 << (script & 31); 70 return ((bits[index] & bit) != 0); 71 } 72 73 74 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { 75 if (U_FAILURE(status)) { 76 return *this; 77 } 78 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 79 status = U_ILLEGAL_ARGUMENT_ERROR; 80 return *this; 81 } 82 uint32_t index = script / 32; 83 uint32_t bit = 1 << (script & 31); 84 bits[index] |= bit; 85 return *this; 86 } 87 88 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { 89 if (U_FAILURE(status)) { 90 return *this; 91 } 92 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 93 status = U_ILLEGAL_ARGUMENT_ERROR; 94 return *this; 95 } 96 uint32_t index = script / 32; 97 uint32_t bit = 1 << (script & 31); 98 bits[index] &= ~bit; 99 return *this; 100 } 101 102 103 104 ScriptSet &ScriptSet::Union(const ScriptSet &other) { 105 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 106 bits[i] |= other.bits[i]; 107 } 108 return *this; 109 } 110 111 ScriptSet &ScriptSet::intersect(const ScriptSet &other) { 112 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 113 bits[i] &= other.bits[i]; 114 } 115 return *this; 116 } 117 118 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { 119 ScriptSet t; 120 t.set(script, status); 121 if (U_SUCCESS(status)) { 122 this->intersect(t); 123 } 124 return *this; 125 } 126 127 UBool ScriptSet::intersects(const ScriptSet &other) const { 128 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 129 if ((bits[i] & other.bits[i]) != 0) { 130 return true; 131 } 132 } 133 return false; 134 } 135 136 UBool ScriptSet::contains(const ScriptSet &other) const { 137 ScriptSet t(*this); 138 t.intersect(other); 139 return (t == other); 140 } 141 142 143 ScriptSet &ScriptSet::setAll() { 144 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 145 bits[i] = 0xffffffffu; 146 } 147 return *this; 148 } 149 150 151 ScriptSet &ScriptSet::resetAll() { 152 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 153 bits[i] = 0; 154 } 155 return *this; 156 } 157 158 int32_t ScriptSet::countMembers() const { 159 // This bit counter is good for sparse numbers of '1's, which is 160 // very much the case that we will usually have. 161 int32_t count = 0; 162 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 163 uint32_t x = bits[i]; 164 while (x > 0) { 165 count++; 166 x &= (x - 1); // and off the least significant one bit. 167 } 168 } 169 return count; 170 } 171 172 int32_t ScriptSet::hashCode() const { 173 int32_t hash = 0; 174 for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 175 hash ^= bits[i]; 176 } 177 return hash; 178 } 179 180 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const { 181 // TODO: Wants a better implementation. 182 if (fromIndex < 0) { 183 return -1; 184 } 185 UErrorCode status = U_ZERO_ERROR; 186 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) { 187 if (test((UScriptCode)scriptIndex, status)) { 188 return scriptIndex; 189 } 190 } 191 return -1; 192 } 193 194 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { 195 UBool firstTime = TRUE; 196 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { 197 if (!firstTime) { 198 dest.append((UChar)0x20); 199 } 200 firstTime = FALSE; 201 const char *scriptName = uscript_getShortName((UScriptCode(i))); 202 dest.append(UnicodeString(scriptName, -1, US_INV)); 203 } 204 return dest; 205 } 206 207 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { 208 resetAll(); 209 if (U_FAILURE(status)) { 210 return *this; 211 } 212 UnicodeString oneScriptName; 213 for (int32_t i=0; i<scriptString.length();) { 214 UChar32 c = scriptString.char32At(i); 215 i = scriptString.moveIndex32(i, 1); 216 if (!u_isUWhiteSpace(c)) { 217 oneScriptName.append(c); 218 if (i < scriptString.length()) { 219 continue; 220 } 221 } 222 if (oneScriptName.length() > 0) { 223 char buf[40]; 224 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); 225 buf[sizeof(buf)-1] = 0; 226 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); 227 if (sc == UCHAR_INVALID_CODE) { 228 status = U_ILLEGAL_ARGUMENT_ERROR; 229 } else { 230 this->set((UScriptCode)sc, status); 231 } 232 if (U_FAILURE(status)) { 233 return *this; 234 } 235 oneScriptName.remove(); 236 } 237 } 238 return *this; 239 } 240 241 U_NAMESPACE_END 242 243 U_CAPI UBool U_EXPORT2 244 uhash_equalsScriptSet(const UElement key1, const UElement key2) { 245 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 246 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); 247 return (*s1 == *s2); 248 } 249 250 U_CAPI int8_t U_EXPORT2 251 uhash_compareScriptSet(UElement key0, UElement key1) { 252 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); 253 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 254 int32_t diff = s0->countMembers() - s1->countMembers(); 255 if (diff != 0) return diff; 256 int32_t i0 = s0->nextSetBit(0); 257 int32_t i1 = s1->nextSetBit(0); 258 while ((diff = i0-i1) == 0 && i0 > 0) { 259 i0 = s0->nextSetBit(i0+1); 260 i1 = s1->nextSetBit(i1+1); 261 } 262 return (int8_t)diff; 263 } 264 265 U_CAPI int32_t U_EXPORT2 266 uhash_hashScriptSet(const UElement key) { 267 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); 268 return s->hashCode(); 269 } 270 271 U_CAPI void U_EXPORT2 272 uhash_deleteScriptSet(void *obj) { 273 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); 274 delete s; 275 } 276