1 /* 2 ********************************************************************** 3 * Copyright (C) 2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * scriptset.cpp 8 * 9 * created on: 2013 Jan 7 10 * created by: Andy Heninger 11 */ 12 13 #include "unicode/utypes.h" 14 15 #include "unicode/uchar.h" 16 #include "unicode/unistr.h" 17 18 #include "scriptset.h" 19 #include "uassert.h" 20 21 U_NAMESPACE_BEGIN 22 23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 24 25 //---------------------------------------------------------------------------- 26 // 27 // ScriptSet implementation 28 // 29 //---------------------------------------------------------------------------- 30 ScriptSet::ScriptSet() { 31 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 32 bits[i] = 0; 33 } 34 } 35 36 ScriptSet::~ScriptSet() { 37 } 38 39 ScriptSet::ScriptSet(const ScriptSet &other) { 40 *this = other; 41 } 42 43 44 ScriptSet & ScriptSet::operator =(const ScriptSet &other) { 45 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 46 bits[i] = other.bits[i]; 47 } 48 return *this; 49 } 50 51 52 UBool ScriptSet::operator == (const ScriptSet &other) const { 53 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 54 if (bits[i] != other.bits[i]) { 55 return FALSE; 56 } 57 } 58 return TRUE; 59 } 60 61 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { 62 if (U_FAILURE(status)) { 63 return FALSE; 64 } 65 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 66 status = U_ILLEGAL_ARGUMENT_ERROR; 67 return FALSE; 68 } 69 uint32_t index = script / 32; 70 uint32_t bit = 1 << (script & 31); 71 return ((bits[index] & bit) != 0); 72 } 73 74 75 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { 76 if (U_FAILURE(status)) { 77 return *this; 78 } 79 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 80 status = U_ILLEGAL_ARGUMENT_ERROR; 81 return *this; 82 } 83 uint32_t index = script / 32; 84 uint32_t bit = 1 << (script & 31); 85 bits[index] |= bit; 86 return *this; 87 } 88 89 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { 90 if (U_FAILURE(status)) { 91 return *this; 92 } 93 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 94 status = U_ILLEGAL_ARGUMENT_ERROR; 95 return *this; 96 } 97 uint32_t index = script / 32; 98 uint32_t bit = 1 << (script & 31); 99 bits[index] &= ~bit; 100 return *this; 101 } 102 103 104 105 ScriptSet &ScriptSet::Union(const ScriptSet &other) { 106 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 107 bits[i] |= other.bits[i]; 108 } 109 return *this; 110 } 111 112 ScriptSet &ScriptSet::intersect(const ScriptSet &other) { 113 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 114 bits[i] &= other.bits[i]; 115 } 116 return *this; 117 } 118 119 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { 120 ScriptSet t; 121 t.set(script, status); 122 if (U_SUCCESS(status)) { 123 this->intersect(t); 124 } 125 return *this; 126 } 127 128 UBool ScriptSet::intersects(const ScriptSet &other) const { 129 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 130 if ((bits[i] & other.bits[i]) != 0) { 131 return true; 132 } 133 } 134 return false; 135 } 136 137 UBool ScriptSet::contains(const ScriptSet &other) const { 138 ScriptSet t(*this); 139 t.intersect(other); 140 return (t == other); 141 } 142 143 144 ScriptSet &ScriptSet::setAll() { 145 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 146 bits[i] = 0xffffffffu; 147 } 148 return *this; 149 } 150 151 152 ScriptSet &ScriptSet::resetAll() { 153 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 154 bits[i] = 0; 155 } 156 return *this; 157 } 158 159 int32_t ScriptSet::countMembers() const { 160 // This bit counter is good for sparse numbers of '1's, which is 161 // very much the case that we will usually have. 162 int32_t count = 0; 163 for (uint32_t i=0; i<LENGTHOF(bits); i++) { 164 uint32_t x = bits[i]; 165 while (x > 0) { 166 count++; 167 x &= (x - 1); // and off the least significant one bit. 168 } 169 } 170 return count; 171 } 172 173 int32_t ScriptSet::hashCode() const { 174 int32_t hash = 0; 175 for (int32_t i=0; i<LENGTHOF(bits); i++) { 176 hash ^= bits[i]; 177 } 178 return hash; 179 } 180 181 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const { 182 // TODO: Wants a better implementation. 183 if (fromIndex < 0) { 184 return -1; 185 } 186 UErrorCode status = U_ZERO_ERROR; 187 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) { 188 if (test((UScriptCode)scriptIndex, status)) { 189 return scriptIndex; 190 } 191 } 192 return -1; 193 } 194 195 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { 196 UBool firstTime = TRUE; 197 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { 198 if (!firstTime) { 199 dest.append(0x20); 200 } 201 firstTime = FALSE; 202 const char *scriptName = uscript_getShortName((UScriptCode(i))); 203 dest.append(UnicodeString(scriptName, -1, US_INV)); 204 } 205 return dest; 206 } 207 208 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { 209 resetAll(); 210 if (U_FAILURE(status)) { 211 return *this; 212 } 213 UnicodeString oneScriptName; 214 for (int32_t i=0; i<scriptString.length();) { 215 UChar32 c = scriptString.char32At(i); 216 i = scriptString.moveIndex32(i, 1); 217 if (!u_isUWhiteSpace(c)) { 218 oneScriptName.append(c); 219 if (i < scriptString.length()) { 220 continue; 221 } 222 } 223 if (oneScriptName.length() > 0) { 224 char buf[40]; 225 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); 226 buf[sizeof(buf)-1] = 0; 227 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); 228 if (sc == UCHAR_INVALID_CODE) { 229 status = U_ILLEGAL_ARGUMENT_ERROR; 230 } else { 231 this->set((UScriptCode)sc, status); 232 } 233 if (U_FAILURE(status)) { 234 return *this; 235 } 236 oneScriptName.remove(); 237 } 238 } 239 return *this; 240 } 241 242 U_NAMESPACE_END 243 244 U_CAPI UBool U_EXPORT2 245 uhash_equalsScriptSet(const UElement key1, const UElement key2) { 246 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 247 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); 248 return (*s1 == *s2); 249 } 250 251 U_CAPI int8_t U_EXPORT2 252 uhash_compareScriptSet(UElement key0, UElement key1) { 253 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); 254 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 255 int32_t diff = s0->countMembers() - s1->countMembers(); 256 if (diff != 0) return diff; 257 int32_t i0 = s0->nextSetBit(0); 258 int32_t i1 = s1->nextSetBit(0); 259 while ((diff = i0-i1) == 0 && i0 > 0) { 260 i0 = s0->nextSetBit(i0+1); 261 i1 = s1->nextSetBit(i1+1); 262 } 263 return (int8_t)diff; 264 } 265 266 U_CAPI int32_t U_EXPORT2 267 uhash_hashScriptSet(const UElement key) { 268 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); 269 return s->hashCode(); 270 } 271 272 U_CAPI void U_EXPORT2 273 uhash_deleteScriptSet(void *obj) { 274 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); 275 delete s; 276 } 277