Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * scriptset.cpp
      8 *
      9 * created on: 2013 Jan 7
     10 * created by: Andy Heninger
     11 */
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #include "unicode/uchar.h"
     16 #include "unicode/unistr.h"
     17 
     18 #include "scriptset.h"
     19 #include "uassert.h"
     20 #include "cmemory.h"
     21 
     22 U_NAMESPACE_BEGIN
     23 
     24 //----------------------------------------------------------------------------
     25 //
     26 //  ScriptSet implementation
     27 //
     28 //----------------------------------------------------------------------------
     29 ScriptSet::ScriptSet() {
     30     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
     31         bits[i] = 0;
     32     }
     33 }
     34 
     35 ScriptSet::~ScriptSet() {
     36 }
     37 
     38 ScriptSet::ScriptSet(const ScriptSet &other) {
     39     *this = other;
     40 }
     41 
     42 
     43 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
     44     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
     45         bits[i] = other.bits[i];
     46     }
     47     return *this;
     48 }
     49 
     50 
     51 UBool ScriptSet::operator == (const ScriptSet &other) const {
     52     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
     53         if (bits[i] != other.bits[i]) {
     54             return FALSE;
     55         }
     56     }
     57     return TRUE;
     58 }
     59 
     60 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
     61     if (U_FAILURE(status)) {
     62         return FALSE;
     63     }
     64     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
     65         status = U_ILLEGAL_ARGUMENT_ERROR;
     66         return FALSE;
     67     }
     68     uint32_t index = script / 32;
     69     uint32_t bit   = 1 << (script & 31);
     70     return ((bits[index] & bit) != 0);
     71 }
     72 
     73 
     74 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
     75     if (U_FAILURE(status)) {
     76         return *this;
     77     }
     78     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
     79         status = U_ILLEGAL_ARGUMENT_ERROR;
     80         return *this;
     81     }
     82     uint32_t index = script / 32;
     83     uint32_t bit   = 1 << (script & 31);
     84     bits[index] |= bit;
     85     return *this;
     86 }
     87 
     88 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
     89     if (U_FAILURE(status)) {
     90         return *this;
     91     }
     92     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
     93         status = U_ILLEGAL_ARGUMENT_ERROR;
     94         return *this;
     95     }
     96     uint32_t index = script / 32;
     97     uint32_t bit   = 1 << (script & 31);
     98     bits[index] &= ~bit;
     99     return *this;
    100 }
    101 
    102 
    103 
    104 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
    105     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    106         bits[i] |= other.bits[i];
    107     }
    108     return *this;
    109 }
    110 
    111 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
    112     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    113         bits[i] &= other.bits[i];
    114     }
    115     return *this;
    116 }
    117 
    118 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
    119     ScriptSet t;
    120     t.set(script, status);
    121     if (U_SUCCESS(status)) {
    122         this->intersect(t);
    123     }
    124     return *this;
    125 }
    126 
    127 UBool ScriptSet::intersects(const ScriptSet &other) const {
    128     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    129         if ((bits[i] & other.bits[i]) != 0) {
    130             return true;
    131         }
    132     }
    133     return false;
    134 }
    135 
    136 UBool ScriptSet::contains(const ScriptSet &other) const {
    137     ScriptSet t(*this);
    138     t.intersect(other);
    139     return (t == other);
    140 }
    141 
    142 
    143 ScriptSet &ScriptSet::setAll() {
    144     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    145         bits[i] = 0xffffffffu;
    146     }
    147     return *this;
    148 }
    149 
    150 
    151 ScriptSet &ScriptSet::resetAll() {
    152     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    153         bits[i] = 0;
    154     }
    155     return *this;
    156 }
    157 
    158 int32_t ScriptSet::countMembers() const {
    159     // This bit counter is good for sparse numbers of '1's, which is
    160     //  very much the case that we will usually have.
    161     int32_t count = 0;
    162     for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    163         uint32_t x = bits[i];
    164         while (x > 0) {
    165             count++;
    166             x &= (x - 1);    // and off the least significant one bit.
    167         }
    168     }
    169     return count;
    170 }
    171 
    172 int32_t ScriptSet::hashCode() const {
    173     int32_t hash = 0;
    174     for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    175         hash ^= bits[i];
    176     }
    177     return hash;
    178 }
    179 
    180 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
    181     // TODO: Wants a better implementation.
    182     if (fromIndex < 0) {
    183         return -1;
    184     }
    185     UErrorCode status = U_ZERO_ERROR;
    186     for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
    187         if (test((UScriptCode)scriptIndex, status)) {
    188             return scriptIndex;
    189         }
    190     }
    191     return -1;
    192 }
    193 
    194 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
    195     UBool firstTime = TRUE;
    196     for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
    197         if (!firstTime) {
    198             dest.append((UChar)0x20);
    199         }
    200         firstTime = FALSE;
    201         const char *scriptName = uscript_getShortName((UScriptCode(i)));
    202         dest.append(UnicodeString(scriptName, -1, US_INV));
    203     }
    204     return dest;
    205 }
    206 
    207 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
    208     resetAll();
    209     if (U_FAILURE(status)) {
    210         return *this;
    211     }
    212     UnicodeString oneScriptName;
    213     for (int32_t i=0; i<scriptString.length();) {
    214         UChar32 c = scriptString.char32At(i);
    215         i = scriptString.moveIndex32(i, 1);
    216         if (!u_isUWhiteSpace(c)) {
    217             oneScriptName.append(c);
    218             if (i < scriptString.length()) {
    219                 continue;
    220             }
    221         }
    222         if (oneScriptName.length() > 0) {
    223             char buf[40];
    224             oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
    225             buf[sizeof(buf)-1] = 0;
    226             int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
    227             if (sc == UCHAR_INVALID_CODE) {
    228                 status = U_ILLEGAL_ARGUMENT_ERROR;
    229             } else {
    230                 this->set((UScriptCode)sc, status);
    231             }
    232             if (U_FAILURE(status)) {
    233                 return *this;
    234             }
    235             oneScriptName.remove();
    236         }
    237     }
    238     return *this;
    239 }
    240 
    241 U_NAMESPACE_END
    242 
    243 U_CAPI UBool U_EXPORT2
    244 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
    245     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    246     icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
    247     return (*s1 == *s2);
    248 }
    249 
    250 U_CAPI int8_t U_EXPORT2
    251 uhash_compareScriptSet(UElement key0, UElement key1) {
    252     icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
    253     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    254     int32_t diff = s0->countMembers() - s1->countMembers();
    255     if (diff != 0) return diff;
    256     int32_t i0 = s0->nextSetBit(0);
    257     int32_t i1 = s1->nextSetBit(0);
    258     while ((diff = i0-i1) == 0 && i0 > 0) {
    259         i0 = s0->nextSetBit(i0+1);
    260         i1 = s1->nextSetBit(i1+1);
    261     }
    262     return (int8_t)diff;
    263 }
    264 
    265 U_CAPI int32_t U_EXPORT2
    266 uhash_hashScriptSet(const UElement key) {
    267     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
    268     return s->hashCode();
    269 }
    270 
    271 U_CAPI void U_EXPORT2
    272 uhash_deleteScriptSet(void *obj) {
    273     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
    274     delete s;
    275 }
    276