Home | History | Annotate | Download | only in i18n
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *
      9 * scriptset.cpp
     10 *
     11 * created on: 2013 Jan 7
     12 * created by: Andy Heninger
     13 */
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #include "unicode/uchar.h"
     18 #include "unicode/unistr.h"
     19 
     20 #include "scriptset.h"
     21 #include "uassert.h"
     22 #include "cmemory.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 //----------------------------------------------------------------------------
     27 //
     28 //  ScriptSet implementation
     29 //
     30 //----------------------------------------------------------------------------
     31 ScriptSet::ScriptSet() {
     32     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
     33         bits[i] = 0;
     34     }
     35 }
     36 
     37 ScriptSet::~ScriptSet() {
     38 }
     39 
     40 ScriptSet::ScriptSet(const ScriptSet &other) {
     41     *this = other;
     42 }
     43 
     44 
     45 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
     46     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
     47         bits[i] = other.bits[i];
     48     }
     49     return *this;
     50 }
     51 
     52 
     53 UBool ScriptSet::operator == (const ScriptSet &other) const {
     54     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
     55         if (bits[i] != other.bits[i]) {
     56             return FALSE;
     57         }
     58     }
     59     return TRUE;
     60 }
     61 
     62 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
     63     if (U_FAILURE(status)) {
     64         return FALSE;
     65     }
     66     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
     67         status = U_ILLEGAL_ARGUMENT_ERROR;
     68         return FALSE;
     69     }
     70     uint32_t index = script / 32;
     71     uint32_t bit   = 1 << (script & 31);
     72     return ((bits[index] & bit) != 0);
     73 }
     74 
     75 
     76 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
     77     if (U_FAILURE(status)) {
     78         return *this;
     79     }
     80     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
     81         status = U_ILLEGAL_ARGUMENT_ERROR;
     82         return *this;
     83     }
     84     uint32_t index = script / 32;
     85     uint32_t bit   = 1 << (script & 31);
     86     bits[index] |= bit;
     87     return *this;
     88 }
     89 
     90 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
     91     if (U_FAILURE(status)) {
     92         return *this;
     93     }
     94     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
     95         status = U_ILLEGAL_ARGUMENT_ERROR;
     96         return *this;
     97     }
     98     uint32_t index = script / 32;
     99     uint32_t bit   = 1 << (script & 31);
    100     bits[index] &= ~bit;
    101     return *this;
    102 }
    103 
    104 
    105 
    106 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
    107     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    108         bits[i] |= other.bits[i];
    109     }
    110     return *this;
    111 }
    112 
    113 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
    114     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    115         bits[i] &= other.bits[i];
    116     }
    117     return *this;
    118 }
    119 
    120 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
    121     ScriptSet t;
    122     t.set(script, status);
    123     if (U_SUCCESS(status)) {
    124         this->intersect(t);
    125     }
    126     return *this;
    127 }
    128 
    129 UBool ScriptSet::intersects(const ScriptSet &other) const {
    130     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    131         if ((bits[i] & other.bits[i]) != 0) {
    132             return true;
    133         }
    134     }
    135     return false;
    136 }
    137 
    138 UBool ScriptSet::contains(const ScriptSet &other) const {
    139     ScriptSet t(*this);
    140     t.intersect(other);
    141     return (t == other);
    142 }
    143 
    144 
    145 ScriptSet &ScriptSet::setAll() {
    146     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    147         bits[i] = 0xffffffffu;
    148     }
    149     return *this;
    150 }
    151 
    152 
    153 ScriptSet &ScriptSet::resetAll() {
    154     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    155         bits[i] = 0;
    156     }
    157     return *this;
    158 }
    159 
    160 int32_t ScriptSet::countMembers() const {
    161     // This bit counter is good for sparse numbers of '1's, which is
    162     //  very much the case that we will usually have.
    163     int32_t count = 0;
    164     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    165         uint32_t x = bits[i];
    166         while (x > 0) {
    167             count++;
    168             x &= (x - 1);    // and off the least significant one bit.
    169         }
    170     }
    171     return count;
    172 }
    173 
    174 int32_t ScriptSet::hashCode() const {
    175     int32_t hash = 0;
    176     for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    177         hash ^= bits[i];
    178     }
    179     return hash;
    180 }
    181 
    182 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
    183     // TODO: Wants a better implementation.
    184     if (fromIndex < 0) {
    185         return -1;
    186     }
    187     UErrorCode status = U_ZERO_ERROR;
    188     for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
    189         if (test((UScriptCode)scriptIndex, status)) {
    190             return scriptIndex;
    191         }
    192     }
    193     return -1;
    194 }
    195 
    196 UBool ScriptSet::isEmpty() const {
    197     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    198         if (bits[i] != 0) {
    199             return FALSE;
    200         }
    201     }
    202     return TRUE;
    203 }
    204 
    205 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
    206     UBool firstTime = TRUE;
    207     for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
    208         if (!firstTime) {
    209             dest.append((UChar)0x20);
    210         }
    211         firstTime = FALSE;
    212         const char *scriptName = uscript_getShortName((UScriptCode(i)));
    213         dest.append(UnicodeString(scriptName, -1, US_INV));
    214     }
    215     return dest;
    216 }
    217 
    218 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
    219     resetAll();
    220     if (U_FAILURE(status)) {
    221         return *this;
    222     }
    223     UnicodeString oneScriptName;
    224     for (int32_t i=0; i<scriptString.length();) {
    225         UChar32 c = scriptString.char32At(i);
    226         i = scriptString.moveIndex32(i, 1);
    227         if (!u_isUWhiteSpace(c)) {
    228             oneScriptName.append(c);
    229             if (i < scriptString.length()) {
    230                 continue;
    231             }
    232         }
    233         if (oneScriptName.length() > 0) {
    234             char buf[40];
    235             oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
    236             buf[sizeof(buf)-1] = 0;
    237             int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
    238             if (sc == UCHAR_INVALID_CODE) {
    239                 status = U_ILLEGAL_ARGUMENT_ERROR;
    240             } else {
    241                 this->set((UScriptCode)sc, status);
    242             }
    243             if (U_FAILURE(status)) {
    244                 return *this;
    245             }
    246             oneScriptName.remove();
    247         }
    248     }
    249     return *this;
    250 }
    251 
    252 void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
    253     if (U_FAILURE(status)) { return; }
    254     static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 5;
    255     MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
    256     UErrorCode internalStatus = U_ZERO_ERROR;
    257     int32_t script_count = -1;
    258 
    259     while (TRUE) {
    260         script_count = uscript_getScriptExtensions(
    261             codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);
    262         if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
    263             // Need to allocate more space
    264             if (scripts.resize(script_count) == NULL) {
    265                 status = U_MEMORY_ALLOCATION_ERROR;
    266                 return;
    267             }
    268             internalStatus = U_ZERO_ERROR;
    269         } else {
    270             break;
    271         }
    272     }
    273 
    274     // Check if we failed for some reason other than buffer overflow
    275     if (U_FAILURE(internalStatus)) {
    276         status = internalStatus;
    277         return;
    278     }
    279 
    280     // Load the scripts into the ScriptSet and return
    281     for (int32_t i = 0; i < script_count; i++) {
    282         this->set(scripts[i], status);
    283         if (U_FAILURE(status)) { return; }
    284     }
    285 }
    286 
    287 U_NAMESPACE_END
    288 
    289 U_CAPI UBool U_EXPORT2
    290 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
    291     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    292     icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
    293     return (*s1 == *s2);
    294 }
    295 
    296 U_CAPI int8_t U_EXPORT2
    297 uhash_compareScriptSet(UElement key0, UElement key1) {
    298     icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
    299     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    300     int32_t diff = s0->countMembers() - s1->countMembers();
    301     if (diff != 0) return diff;
    302     int32_t i0 = s0->nextSetBit(0);
    303     int32_t i1 = s1->nextSetBit(0);
    304     while ((diff = i0-i1) == 0 && i0 > 0) {
    305         i0 = s0->nextSetBit(i0+1);
    306         i1 = s1->nextSetBit(i1+1);
    307     }
    308     return (int8_t)diff;
    309 }
    310 
    311 U_CAPI int32_t U_EXPORT2
    312 uhash_hashScriptSet(const UElement key) {
    313     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
    314     return s->hashCode();
    315 }
    316 
    317 U_CAPI void U_EXPORT2
    318 uhash_deleteScriptSet(void *obj) {
    319     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
    320     delete s;
    321 }
    322