Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 //
      4 //  rbbisetb.cpp
      5 //
      6 /*
      7 ***************************************************************************
      8 *   Copyright (C) 2002-2008 International Business Machines Corporation   *
      9 *   and others. All rights reserved.                                      *
     10 ***************************************************************************
     11 */
     12 //
     13 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules
     14 //                   (part of the rule building process.)
     15 //
     16 //      Starting with the rules parse tree from the scanner,
     17 //
     18 //                   -  Enumerate the set of UnicodeSets that are referenced
     19 //                      by the RBBI rules.
     20 //                   -  compute a set of non-overlapping character ranges
     21 //                      with all characters within a range belonging to the same
     22 //                      set of input uniocde sets.
     23 //                   -  Derive a set of non-overlapping UnicodeSet (like things)
     24 //                      that will correspond to columns in the state table for
     25 //                      the RBBI execution engine.  All characters within one
     26 //                      of these sets belong to the same set of the original
     27 //                      UnicodeSets from the user's rules.
     28 //                   -  construct the trie table that maps input characters
     29 //                      to the index of the matching non-overlapping set of set from
     30 //                      the previous step.
     31 //
     32 
     33 #include "unicode/utypes.h"
     34 
     35 #if !UCONFIG_NO_BREAK_ITERATION
     36 
     37 #include "unicode/uniset.h"
     38 #include "utrie2.h"
     39 #include "uvector.h"
     40 #include "uassert.h"
     41 #include "cmemory.h"
     42 #include "cstring.h"
     43 
     44 #include "rbbisetb.h"
     45 #include "rbbinode.h"
     46 
     47 U_NAMESPACE_BEGIN
     48 
     49 //------------------------------------------------------------------------
     50 //
     51 //   Constructor
     52 //
     53 //------------------------------------------------------------------------
     54 RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
     55 {
     56     fRB             = rb;
     57     fStatus         = rb->fStatus;
     58     fRangeList      = 0;
     59     fTrie           = 0;
     60     fTrieSize       = 0;
     61     fGroupCount     = 0;
     62     fSawBOF         = FALSE;
     63 }
     64 
     65 
     66 //------------------------------------------------------------------------
     67 //
     68 //   Destructor
     69 //
     70 //------------------------------------------------------------------------
     71 RBBISetBuilder::~RBBISetBuilder()
     72 {
     73     RangeDescriptor   *nextRangeDesc;
     74 
     75     // Walk through & delete the linked list of RangeDescriptors
     76     for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) {
     77         RangeDescriptor *r = nextRangeDesc;
     78         nextRangeDesc      = r->fNext;
     79         delete r;
     80     }
     81 
     82     utrie2_close(fTrie);
     83 }
     84 
     85 
     86 
     87 
     88 //------------------------------------------------------------------------
     89 //
     90 //   build          Build the list of non-overlapping character ranges
     91 //                  from the Unicode Sets.
     92 //
     93 //------------------------------------------------------------------------
     94 void RBBISetBuilder::buildRanges() {
     95     RBBINode        *usetNode;
     96     RangeDescriptor *rlRange;
     97 
     98     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
     99 
    100     //
    101     //  Initialize the process by creating a single range encompassing all characters
    102     //  that is in no sets.
    103     //
    104     fRangeList                = new RangeDescriptor(*fStatus); // will check for status here
    105     if (fRangeList == NULL) {
    106         *fStatus = U_MEMORY_ALLOCATION_ERROR;
    107         return;
    108     }
    109     fRangeList->fStartChar    = 0;
    110     fRangeList->fEndChar      = 0x10ffff;
    111 
    112     if (U_FAILURE(*fStatus)) {
    113         return;
    114     }
    115 
    116     //
    117     //  Find the set of non-overlapping ranges of characters
    118     //
    119     int  ni;
    120     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
    121         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
    122         if (usetNode==NULL) {
    123             break;
    124         }
    125 
    126         UnicodeSet      *inputSet             = usetNode->fInputSet;
    127         int32_t          inputSetRangeCount   = inputSet->getRangeCount();
    128         int              inputSetRangeIndex   = 0;
    129                          rlRange              = fRangeList;
    130 
    131         for (;;) {
    132             if (inputSetRangeIndex >= inputSetRangeCount) {
    133                 break;
    134             }
    135             UChar32      inputSetRangeBegin  = inputSet->getRangeStart(inputSetRangeIndex);
    136             UChar32      inputSetRangeEnd    = inputSet->getRangeEnd(inputSetRangeIndex);
    137 
    138             // skip over ranges from the range list that are completely
    139             //   below the current range from the input unicode set.
    140             while (rlRange->fEndChar < inputSetRangeBegin) {
    141                 rlRange = rlRange->fNext;
    142             }
    143 
    144             // If the start of the range from the range list is before with
    145             //   the start of the range from the unicode set, split the range list range
    146             //   in two, with one part being before (wholly outside of) the unicode set
    147             //   and the other containing the rest.
    148             //   Then continue the loop; the post-split current range will then be skipped
    149             //     over
    150             if (rlRange->fStartChar < inputSetRangeBegin) {
    151                 rlRange->split(inputSetRangeBegin, *fStatus);
    152                 if (U_FAILURE(*fStatus)) {
    153                     return;
    154                 }
    155                 continue;
    156             }
    157 
    158             // Same thing at the end of the ranges...
    159             // If the end of the range from the range list doesn't coincide with
    160             //   the end of the range from the unicode set, split the range list
    161             //   range in two.  The first part of the split range will be
    162             //   wholly inside the Unicode set.
    163             if (rlRange->fEndChar > inputSetRangeEnd) {
    164                 rlRange->split(inputSetRangeEnd+1, *fStatus);
    165                 if (U_FAILURE(*fStatus)) {
    166                     return;
    167                 }
    168             }
    169 
    170             // The current rlRange is now entirely within the UnicodeSet range.
    171             // Add this unicode set to the list of sets for this rlRange
    172             if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
    173                 rlRange->fIncludesSets->addElement(usetNode, *fStatus);
    174                 if (U_FAILURE(*fStatus)) {
    175                     return;
    176                 }
    177             }
    178 
    179             // Advance over ranges that we are finished with.
    180             if (inputSetRangeEnd == rlRange->fEndChar) {
    181                 inputSetRangeIndex++;
    182             }
    183             rlRange = rlRange->fNext;
    184         }
    185     }
    186 
    187     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
    188 
    189     //
    190     //  Group the above ranges, with each group consisting of one or more
    191     //    ranges that are in exactly the same set of original UnicodeSets.
    192     //    The groups are numbered, and these group numbers are the set of
    193     //    input symbols recognized by the run-time state machine.
    194     //
    195     //    Numbering: # 0  (state table column 0) is unused.
    196     //               # 1  is reserved - table column 1 is for end-of-input
    197     //               # 2  is reserved - table column 2 is for beginning-in-input
    198     //               # 3  is the first range list.
    199     //
    200     RangeDescriptor *rlSearchRange;
    201     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
    202         for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
    203             if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
    204                 rlRange->fNum = rlSearchRange->fNum;
    205                 break;
    206             }
    207         }
    208         if (rlRange->fNum == 0) {
    209             fGroupCount ++;
    210             rlRange->fNum = fGroupCount+2;
    211             rlRange->setDictionaryFlag();
    212             addValToSets(rlRange->fIncludesSets, fGroupCount+2);
    213         }
    214     }
    215 
    216     // Handle input sets that contain the special string {eof}.
    217     //   Column 1 of the state table is reserved for EOF on input.
    218     //   Column 2 is reserved for before-the-start-input.
    219     //            (This column can be optimized away later if there are no rule
    220     //             references to {bof}.)
    221     //   Add this column value (1 or 2) to the equivalent expression
    222     //     subtree for each UnicodeSet that contains the string {eof}
    223     //   Because {bof} and {eof} are not a characters in the normal sense,
    224     //   they doesn't affect the computation of ranges or TRIE.
    225     static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0};
    226     static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0};
    227 
    228     UnicodeString eofString(eofUString);
    229     UnicodeString bofString(bofUString);
    230     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
    231         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
    232         if (usetNode==NULL) {
    233             break;
    234         }
    235         UnicodeSet      *inputSet = usetNode->fInputSet;
    236         if (inputSet->contains(eofString)) {
    237             addValToSet(usetNode, 1);
    238         }
    239         if (inputSet->contains(bofString)) {
    240             addValToSet(usetNode, 2);
    241             fSawBOF = TRUE;
    242         }
    243     }
    244 
    245 
    246     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
    247     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
    248 }
    249 
    250 
    251 //
    252 // Build the Trie table for mapping UChar32 values to the corresponding
    253 // range group number.
    254 //
    255 void RBBISetBuilder::buildTrie() {
    256     RangeDescriptor *rlRange;
    257 
    258     fTrie = utrie2_open(0,       //  Initial value for all code points.
    259                         0,       //  Error value for out-of-range input.
    260                         fStatus);
    261 
    262     for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) {
    263         utrie2_setRange32(fTrie,
    264                           rlRange->fStartChar,     // Range start
    265                           rlRange->fEndChar,       // Range end (inclusive)
    266                           rlRange->fNum,           // value for range
    267                           TRUE,                    // Overwrite previously written values
    268                           fStatus);
    269     }
    270 }
    271 
    272 
    273 void RBBISetBuilder::mergeCategories(IntPair categories) {
    274     U_ASSERT(categories.first >= 1);
    275     U_ASSERT(categories.second > categories.first);
    276     for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
    277         int32_t rangeNum = rd->fNum & ~DICT_BIT;
    278         int32_t rangeDict = rd->fNum & DICT_BIT;
    279         if (rangeNum == categories.second) {
    280             rd->fNum = categories.first | rangeDict;
    281         } else if (rangeNum > categories.second) {
    282             rd->fNum--;
    283         }
    284     }
    285     --fGroupCount;
    286 }
    287 
    288 
    289 //-----------------------------------------------------------------------------------
    290 //
    291 //  getTrieSize()    Return the size that will be required to serialize the Trie.
    292 //
    293 //-----------------------------------------------------------------------------------
    294 int32_t RBBISetBuilder::getTrieSize()  {
    295     if (U_FAILURE(*fStatus)) {
    296         return 0;
    297     }
    298     utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus);
    299     fTrieSize  = utrie2_serialize(fTrie,
    300                                   NULL,                // Buffer
    301                                   0,                   // Capacity
    302                                   fStatus);
    303     if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
    304         *fStatus = U_ZERO_ERROR;
    305     }
    306     // RBBIDebugPrintf("Trie table size is %d\n", trieSize);
    307     return fTrieSize;
    308 }
    309 
    310 
    311 //-----------------------------------------------------------------------------------
    312 //
    313 //  serializeTrie()   Put the serialized trie at the specified address.
    314 //                    Trust the caller to have given us enough memory.
    315 //                    getTrieSize() MUST be called first.
    316 //
    317 //-----------------------------------------------------------------------------------
    318 void RBBISetBuilder::serializeTrie(uint8_t *where) {
    319     utrie2_serialize(fTrie,
    320                      where,                   // Buffer
    321                      fTrieSize,               // Capacity
    322                      fStatus);
    323 }
    324 
    325 //------------------------------------------------------------------------
    326 //
    327 //  addValToSets     Add a runtime-mapped input value to each uset from a
    328 //                   list of uset nodes. (val corresponds to a state table column.)
    329 //                   For each of the original Unicode sets - which correspond
    330 //                   directly to uset nodes - a logically equivalent expression
    331 //                   is constructed in terms of the remapped runtime input
    332 //                   symbol set.  This function adds one runtime input symbol to
    333 //                   a list of sets.
    334 //
    335 //                   The "logically equivalent expression" is the tree for an
    336 //                   or-ing together of all of the symbols that go into the set.
    337 //
    338 //------------------------------------------------------------------------
    339 void  RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
    340     int32_t       ix;
    341 
    342     for (ix=0; ix<sets->size(); ix++) {
    343         RBBINode *usetNode = (RBBINode *)sets->elementAt(ix);
    344         addValToSet(usetNode, val);
    345     }
    346 }
    347 
    348 void  RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
    349     RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
    350     if (leafNode == NULL) {
    351         *fStatus = U_MEMORY_ALLOCATION_ERROR;
    352         return;
    353     }
    354     leafNode->fVal = (unsigned short)val;
    355     if (usetNode->fLeftChild == NULL) {
    356         usetNode->fLeftChild = leafNode;
    357         leafNode->fParent    = usetNode;
    358     } else {
    359         // There are already input symbols present for this set.
    360         // Set up an OR node, with the previous stuff as the left child
    361         //   and the new value as the right child.
    362         RBBINode *orNode = new RBBINode(RBBINode::opOr);
    363         if (orNode == NULL) {
    364             *fStatus = U_MEMORY_ALLOCATION_ERROR;
    365             return;
    366         }
    367         orNode->fLeftChild  = usetNode->fLeftChild;
    368         orNode->fRightChild = leafNode;
    369         orNode->fLeftChild->fParent  = orNode;
    370         orNode->fRightChild->fParent = orNode;
    371         usetNode->fLeftChild = orNode;
    372         orNode->fParent = usetNode;
    373     }
    374 }
    375 
    376 
    377 //------------------------------------------------------------------------
    378 //
    379 //   getNumCharCategories
    380 //
    381 //------------------------------------------------------------------------
    382 int32_t  RBBISetBuilder::getNumCharCategories() const {
    383     return fGroupCount + 3;
    384 }
    385 
    386 
    387 //------------------------------------------------------------------------
    388 //
    389 //   sawBOF
    390 //
    391 //------------------------------------------------------------------------
    392 UBool  RBBISetBuilder::sawBOF() const {
    393     return fSawBOF;
    394 }
    395 
    396 
    397 //------------------------------------------------------------------------
    398 //
    399 //   getFirstChar      Given a runtime RBBI character category, find
    400 //                     the first UChar32 that is in the set of chars
    401 //                     in the category.
    402 //------------------------------------------------------------------------
    403 UChar32  RBBISetBuilder::getFirstChar(int32_t category) const {
    404     RangeDescriptor   *rlRange;
    405     UChar32            retVal = (UChar32)-1;
    406     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
    407         if (rlRange->fNum == category) {
    408             retVal = rlRange->fStartChar;
    409             break;
    410         }
    411     }
    412     return retVal;
    413 }
    414 
    415 
    416 
    417 //------------------------------------------------------------------------
    418 //
    419 //   printRanges        A debugging function.
    420 //                      dump out all of the range definitions.
    421 //
    422 //------------------------------------------------------------------------
    423 #ifdef RBBI_DEBUG
    424 void RBBISetBuilder::printRanges() {
    425     RangeDescriptor       *rlRange;
    426     int                    i;
    427 
    428     RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
    429     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
    430         RBBIDebugPrintf("%2i  %4x-%4x  ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar);
    431 
    432         for (i=0; i<rlRange->fIncludesSets->size(); i++) {
    433             RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
    434             UnicodeString   setName = UNICODE_STRING("anon", 4);
    435             RBBINode       *setRef = usetNode->fParent;
    436             if (setRef != NULL) {
    437                 RBBINode *varRef = setRef->fParent;
    438                 if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
    439                     setName = varRef->fText;
    440                 }
    441             }
    442             RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf("  ");
    443         }
    444         RBBIDebugPrintf("\n");
    445     }
    446 }
    447 #endif
    448 
    449 
    450 //------------------------------------------------------------------------
    451 //
    452 //   printRangeGroups     A debugging function.
    453 //                        dump out all of the range groups.
    454 //
    455 //------------------------------------------------------------------------
    456 #ifdef RBBI_DEBUG
    457 void RBBISetBuilder::printRangeGroups() {
    458     RangeDescriptor       *rlRange;
    459     RangeDescriptor       *tRange;
    460     int                    i;
    461     int                    lastPrintedGroupNum = 0;
    462 
    463     RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
    464     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
    465         int groupNum = rlRange->fNum & 0xbfff;
    466         if (groupNum > lastPrintedGroupNum) {
    467             lastPrintedGroupNum = groupNum;
    468             RBBIDebugPrintf("%2i  ", groupNum);
    469 
    470             if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");}
    471 
    472             for (i=0; i<rlRange->fIncludesSets->size(); i++) {
    473                 RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
    474                 UnicodeString   setName = UNICODE_STRING("anon", 4);
    475                 RBBINode       *setRef = usetNode->fParent;
    476                 if (setRef != NULL) {
    477                     RBBINode *varRef = setRef->fParent;
    478                     if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
    479                         setName = varRef->fText;
    480                     }
    481                 }
    482                 RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
    483             }
    484 
    485             i = 0;
    486             for (tRange = rlRange; tRange != 0; tRange = tRange->fNext) {
    487                 if (tRange->fNum == rlRange->fNum) {
    488                     if (i++ % 5 == 0) {
    489                         RBBIDebugPrintf("\n    ");
    490                     }
    491                     RBBIDebugPrintf("  %05x-%05x", tRange->fStartChar, tRange->fEndChar);
    492                 }
    493             }
    494             RBBIDebugPrintf("\n");
    495         }
    496     }
    497     RBBIDebugPrintf("\n");
    498 }
    499 #endif
    500 
    501 
    502 //------------------------------------------------------------------------
    503 //
    504 //   printSets          A debugging function.
    505 //                      dump out all of the set definitions.
    506 //
    507 //------------------------------------------------------------------------
    508 #ifdef RBBI_DEBUG
    509 void RBBISetBuilder::printSets() {
    510     int                   i;
    511 
    512     RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
    513     for (i=0; ; i++) {
    514         RBBINode        *usetNode;
    515         RBBINode        *setRef;
    516         RBBINode        *varRef;
    517         UnicodeString    setName;
    518 
    519         usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
    520         if (usetNode == NULL) {
    521             break;
    522         }
    523 
    524         RBBIDebugPrintf("%3d    ", i);
    525         setName = UNICODE_STRING("anonymous", 9);
    526         setRef = usetNode->fParent;
    527         if (setRef != NULL) {
    528             varRef = setRef->fParent;
    529             if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
    530                 setName = varRef->fText;
    531             }
    532         }
    533         RBBI_DEBUG_printUnicodeString(setName);
    534         RBBIDebugPrintf("   ");
    535         RBBI_DEBUG_printUnicodeString(usetNode->fText);
    536         RBBIDebugPrintf("\n");
    537         if (usetNode->fLeftChild != NULL) {
    538             RBBINode::printTree(usetNode->fLeftChild, TRUE);
    539         }
    540     }
    541     RBBIDebugPrintf("\n");
    542 }
    543 #endif
    544 
    545 
    546 
    547 //-------------------------------------------------------------------------------------
    548 //
    549 //  RangeDescriptor copy constructor
    550 //
    551 //-------------------------------------------------------------------------------------
    552 
    553 RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) {
    554     int  i;
    555 
    556     this->fStartChar    = other.fStartChar;
    557     this->fEndChar      = other.fEndChar;
    558     this->fNum          = other.fNum;
    559     this->fNext         = NULL;
    560     UErrorCode oldstatus = status;
    561     this->fIncludesSets = new UVector(status);
    562     if (U_FAILURE(oldstatus)) {
    563         status = oldstatus;
    564     }
    565     if (U_FAILURE(status)) {
    566         return;
    567     }
    568     /* test for NULL */
    569     if (this->fIncludesSets == 0) {
    570         status = U_MEMORY_ALLOCATION_ERROR;
    571         return;
    572     }
    573 
    574     for (i=0; i<other.fIncludesSets->size(); i++) {
    575         this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
    576     }
    577 }
    578 
    579 
    580 //-------------------------------------------------------------------------------------
    581 //
    582 //  RangeDesriptor default constructor
    583 //
    584 //-------------------------------------------------------------------------------------
    585 RangeDescriptor::RangeDescriptor(UErrorCode &status) {
    586     this->fStartChar    = 0;
    587     this->fEndChar      = 0;
    588     this->fNum          = 0;
    589     this->fNext         = NULL;
    590     UErrorCode oldstatus = status;
    591     this->fIncludesSets = new UVector(status);
    592     if (U_FAILURE(oldstatus)) {
    593         status = oldstatus;
    594     }
    595     if (U_FAILURE(status)) {
    596         return;
    597     }
    598     /* test for NULL */
    599     if(this->fIncludesSets == 0) {
    600         status = U_MEMORY_ALLOCATION_ERROR;
    601         return;
    602     }
    603 
    604 }
    605 
    606 
    607 //-------------------------------------------------------------------------------------
    608 //
    609 //  RangeDesriptor Destructor
    610 //
    611 //-------------------------------------------------------------------------------------
    612 RangeDescriptor::~RangeDescriptor() {
    613     delete  fIncludesSets;
    614     fIncludesSets = NULL;
    615 }
    616 
    617 //-------------------------------------------------------------------------------------
    618 //
    619 //  RangeDesriptor::split()
    620 //
    621 //-------------------------------------------------------------------------------------
    622 void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
    623     U_ASSERT(where>fStartChar && where<=fEndChar);
    624     RangeDescriptor *nr = new RangeDescriptor(*this, status);
    625     if(nr == 0) {
    626         status = U_MEMORY_ALLOCATION_ERROR;
    627         return;
    628     }
    629     if (U_FAILURE(status)) {
    630         delete nr;
    631         return;
    632     }
    633     //  RangeDescriptor copy constructor copies all fields.
    634     //  Only need to update those that are different after the split.
    635     nr->fStartChar = where;
    636     this->fEndChar = where-1;
    637     nr->fNext      = this->fNext;
    638     this->fNext    = nr;
    639 }
    640 
    641 
    642 //-------------------------------------------------------------------------------------
    643 //
    644 //   RangeDescriptor::setDictionaryFlag
    645 //
    646 //            Character Category Numbers that include characters from
    647 //            the original Unicode Set named "dictionary" have bit 14
    648 //            set to 1.  The RBBI runtime engine uses this to trigger
    649 //            use of the word dictionary.
    650 //
    651 //            This function looks through the Unicode Sets that it
    652 //            (the range) includes, and sets the bit in fNum when
    653 //            "dictionary" is among them.
    654 //
    655 //            TODO:  a faster way would be to find the set node for
    656 //                   "dictionary" just once, rather than looking it
    657 //                   up by name every time.
    658 //
    659 //-------------------------------------------------------------------------------------
    660 void RangeDescriptor::setDictionaryFlag() {
    661     int i;
    662 
    663     static const char16_t *dictionary = u"dictionary";
    664     for (i=0; i<fIncludesSets->size(); i++) {
    665         RBBINode *usetNode  = (RBBINode *)fIncludesSets->elementAt(i);
    666         RBBINode *setRef = usetNode->fParent;
    667         if (setRef != nullptr) {
    668             RBBINode *varRef = setRef->fParent;
    669             if (varRef && varRef->fType == RBBINode::varRef) {
    670                 const UnicodeString *setName = &varRef->fText;
    671                 if (setName->compare(dictionary, -1) == 0) {
    672                     fNum |= RBBISetBuilder::DICT_BIT;
    673                     break;
    674                 }
    675             }
    676         }
    677     }
    678 }
    679 
    680 
    681 
    682 U_NAMESPACE_END
    683 
    684 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    685