Home | History | Annotate | Download | only in i18n
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 2013, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * scriptset.cpp
      8 *
      9 * created on: 2013 Jan 7
     10 * created by: Andy Heninger
     11 */
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #include "unicode/uchar.h"
     16 #include "unicode/unistr.h"
     17 
     18 #include "scriptset.h"
     19 #include "uassert.h"
     20 
     21 U_NAMESPACE_BEGIN
     22 
     23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     24 
     25 //----------------------------------------------------------------------------
     26 //
     27 //  ScriptSet implementation
     28 //
     29 //----------------------------------------------------------------------------
     30 ScriptSet::ScriptSet() {
     31     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
     32         bits[i] = 0;
     33     }
     34 }
     35 
     36 ScriptSet::~ScriptSet() {
     37 }
     38 
     39 ScriptSet::ScriptSet(const ScriptSet &other) {
     40     *this = other;
     41 }
     42 
     43 
     44 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
     45     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
     46         bits[i] = other.bits[i];
     47     }
     48     return *this;
     49 }
     50 
     51 
     52 UBool ScriptSet::operator == (const ScriptSet &other) const {
     53     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
     54         if (bits[i] != other.bits[i]) {
     55             return FALSE;
     56         }
     57     }
     58     return TRUE;
     59 }
     60 
     61 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
     62     if (U_FAILURE(status)) {
     63         return FALSE;
     64     }
     65     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
     66         status = U_ILLEGAL_ARGUMENT_ERROR;
     67         return FALSE;
     68     }
     69     uint32_t index = script / 32;
     70     uint32_t bit   = 1 << (script & 31);
     71     return ((bits[index] & bit) != 0);
     72 }
     73 
     74 
     75 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
     76     if (U_FAILURE(status)) {
     77         return *this;
     78     }
     79     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
     80         status = U_ILLEGAL_ARGUMENT_ERROR;
     81         return *this;
     82     }
     83     uint32_t index = script / 32;
     84     uint32_t bit   = 1 << (script & 31);
     85     bits[index] |= bit;
     86     return *this;
     87 }
     88 
     89 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
     90     if (U_FAILURE(status)) {
     91         return *this;
     92     }
     93     if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
     94         status = U_ILLEGAL_ARGUMENT_ERROR;
     95         return *this;
     96     }
     97     uint32_t index = script / 32;
     98     uint32_t bit   = 1 << (script & 31);
     99     bits[index] &= ~bit;
    100     return *this;
    101 }
    102 
    103 
    104 
    105 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
    106     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    107         bits[i] |= other.bits[i];
    108     }
    109     return *this;
    110 }
    111 
    112 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
    113     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    114         bits[i] &= other.bits[i];
    115     }
    116     return *this;
    117 }
    118 
    119 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
    120     ScriptSet t;
    121     t.set(script, status);
    122     if (U_SUCCESS(status)) {
    123         this->intersect(t);
    124     }
    125     return *this;
    126 }
    127 
    128 UBool ScriptSet::intersects(const ScriptSet &other) const {
    129     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    130         if ((bits[i] & other.bits[i]) != 0) {
    131             return true;
    132         }
    133     }
    134     return false;
    135 }
    136 
    137 UBool ScriptSet::contains(const ScriptSet &other) const {
    138     ScriptSet t(*this);
    139     t.intersect(other);
    140     return (t == other);
    141 }
    142 
    143 
    144 ScriptSet &ScriptSet::setAll() {
    145     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    146         bits[i] = 0xffffffffu;
    147     }
    148     return *this;
    149 }
    150 
    151 
    152 ScriptSet &ScriptSet::resetAll() {
    153     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    154         bits[i] = 0;
    155     }
    156     return *this;
    157 }
    158 
    159 int32_t ScriptSet::countMembers() const {
    160     // This bit counter is good for sparse numbers of '1's, which is
    161     //  very much the case that we will usually have.
    162     int32_t count = 0;
    163     for (uint32_t i=0; i<LENGTHOF(bits); i++) {
    164         uint32_t x = bits[i];
    165         while (x > 0) {
    166             count++;
    167             x &= (x - 1);    // and off the least significant one bit.
    168         }
    169     }
    170     return count;
    171 }
    172 
    173 int32_t ScriptSet::hashCode() const {
    174     int32_t hash = 0;
    175     for (int32_t i=0; i<LENGTHOF(bits); i++) {
    176         hash ^= bits[i];
    177     }
    178     return hash;
    179 }
    180 
    181 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
    182     // TODO: Wants a better implementation.
    183     if (fromIndex < 0) {
    184         return -1;
    185     }
    186     UErrorCode status = U_ZERO_ERROR;
    187     for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
    188         if (test((UScriptCode)scriptIndex, status)) {
    189             return scriptIndex;
    190         }
    191     }
    192     return -1;
    193 }
    194 
    195 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
    196     UBool firstTime = TRUE;
    197     for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
    198         if (!firstTime) {
    199             dest.append((UChar)0x20);
    200         }
    201         firstTime = FALSE;
    202         const char *scriptName = uscript_getShortName((UScriptCode(i)));
    203         dest.append(UnicodeString(scriptName, -1, US_INV));
    204     }
    205     return dest;
    206 }
    207 
    208 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
    209     resetAll();
    210     if (U_FAILURE(status)) {
    211         return *this;
    212     }
    213     UnicodeString oneScriptName;
    214     for (int32_t i=0; i<scriptString.length();) {
    215         UChar32 c = scriptString.char32At(i);
    216         i = scriptString.moveIndex32(i, 1);
    217         if (!u_isUWhiteSpace(c)) {
    218             oneScriptName.append(c);
    219             if (i < scriptString.length()) {
    220                 continue;
    221             }
    222         }
    223         if (oneScriptName.length() > 0) {
    224             char buf[40];
    225             oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
    226             buf[sizeof(buf)-1] = 0;
    227             int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
    228             if (sc == UCHAR_INVALID_CODE) {
    229                 status = U_ILLEGAL_ARGUMENT_ERROR;
    230             } else {
    231                 this->set((UScriptCode)sc, status);
    232             }
    233             if (U_FAILURE(status)) {
    234                 return *this;
    235             }
    236             oneScriptName.remove();
    237         }
    238     }
    239     return *this;
    240 }
    241 
    242 U_NAMESPACE_END
    243 
    244 U_CAPI UBool U_EXPORT2
    245 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
    246     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    247     icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
    248     return (*s1 == *s2);
    249 }
    250 
    251 U_CAPI int8_t U_EXPORT2
    252 uhash_compareScriptSet(UElement key0, UElement key1) {
    253     icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
    254     icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    255     int32_t diff = s0->countMembers() - s1->countMembers();
    256     if (diff != 0) return diff;
    257     int32_t i0 = s0->nextSetBit(0);
    258     int32_t i1 = s1->nextSetBit(0);
    259     while ((diff = i0-i1) == 0 && i0 > 0) {
    260         i0 = s0->nextSetBit(i0+1);
    261         i1 = s1->nextSetBit(i1+1);
    262     }
    263     return (int8_t)diff;
    264 }
    265 
    266 U_CAPI int32_t U_EXPORT2
    267 uhash_hashScriptSet(const UElement key) {
    268     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
    269     return s->hashCode();
    270 }
    271 
    272 U_CAPI void U_EXPORT2
    273 uhash_deleteScriptSet(void *obj) {
    274     icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
    275     delete s;
    276 }
    277