Home | History | Annotate | Download | only in common
      1 //
      2 //  rbbisetb.cpp
      3 //
      4 /*
      5 ***************************************************************************
      6 *   Copyright (C) 2002-2008 International Business Machines Corporation   *
      7 *   and others. All rights reserved.                                      *
      8 ***************************************************************************
      9 */
     10 //
     11 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules
     12 //                   (part of the rule building process.)
     13 //
     14 //      Starting with the rules parse tree from the scanner,
     15 //
     16 //                   -  Enumerate the set of UnicodeSets that are referenced
     17 //                      by the RBBI rules.
     18 //                   -  compute a set of non-overlapping character ranges
     19 //                      with all characters within a range belonging to the same
     20 //                      set of input uniocde sets.
     21 //                   -  Derive a set of non-overlapping UnicodeSet (like things)
     22 //                      that will correspond to columns in the state table for
     23 //                      the RBBI execution engine.  All characters within one
     24 //                      of these sets belong to the same set of the original
     25 //                      UnicodeSets from the user's rules.
     26 //                   -  construct the trie table that maps input characters
     27 //                      to the index of the matching non-overlapping set of set from
     28 //                      the previous step.
     29 //
     30 
     31 #include "unicode/utypes.h"
     32 
     33 #if !UCONFIG_NO_BREAK_ITERATION
     34 
     35 #include "unicode/uniset.h"
     36 #include "utrie.h"
     37 #include "uvector.h"
     38 #include "uassert.h"
     39 #include "cmemory.h"
     40 #include "cstring.h"
     41 
     42 #include "rbbisetb.h"
     43 #include "rbbinode.h"
     44 
     45 
     46 //------------------------------------------------------------------------
     47 //
     48 //   getFoldedRBBIValue        Call-back function used during building of Trie table.
     49 //                             Folding value: just store the offset (16 bits)
     50 //                             if there is any non-0 entry.
     51 //                             (It'd really be nice if the Trie builder would provide a
     52 //                             simple default, so this function could go away from here.)
     53 //
     54 //------------------------------------------------------------------------
     55 /* folding value: just store the offset (16 bits) if there is any non-0 entry */
     56 U_CDECL_BEGIN
     57 static uint32_t U_CALLCONV
     58 getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
     59     uint32_t value;
     60     UChar32 limit;
     61     UBool inBlockZero;
     62 
     63     limit=start+0x400;
     64     while(start<limit) {
     65         value=utrie_get32(trie, start, &inBlockZero);
     66         if(inBlockZero) {
     67             start+=UTRIE_DATA_BLOCK_LENGTH;
     68         } else if(value!=0) {
     69             return (uint32_t)(offset|0x8000);
     70         } else {
     71             ++start;
     72         }
     73     }
     74     return 0;
     75 }
     76 
     77 
     78 U_CDECL_END
     79 
     80 
     81 
     82 U_NAMESPACE_BEGIN
     83 
     84 //------------------------------------------------------------------------
     85 //
     86 //   Constructor
     87 //
     88 //------------------------------------------------------------------------
     89 RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
     90 {
     91     fRB             = rb;
     92     fStatus         = rb->fStatus;
     93     fRangeList      = 0;
     94     fTrie           = 0;
     95     fTrieSize       = 0;
     96     fGroupCount     = 0;
     97     fSawBOF         = FALSE;
     98 }
     99 
    100 
    101 //------------------------------------------------------------------------
    102 //
    103 //   Destructor
    104 //
    105 //------------------------------------------------------------------------
    106 RBBISetBuilder::~RBBISetBuilder()
    107 {
    108     RangeDescriptor   *nextRangeDesc;
    109 
    110     // Walk through & delete the linked list of RangeDescriptors
    111     for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) {
    112         RangeDescriptor *r = nextRangeDesc;
    113         nextRangeDesc      = r->fNext;
    114         delete r;
    115     }
    116 
    117     utrie_close(fTrie);
    118 }
    119 
    120 
    121 
    122 
    123 //------------------------------------------------------------------------
    124 //
    125 //   build          Build the list of non-overlapping character ranges
    126 //                  from the Unicode Sets.
    127 //
    128 //------------------------------------------------------------------------
    129 void RBBISetBuilder::build() {
    130     RBBINode        *usetNode;
    131     RangeDescriptor *rlRange;
    132 
    133     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
    134 
    135     //
    136     //  Initialize the process by creating a single range encompassing all characters
    137     //  that is in no sets.
    138     //
    139     fRangeList                = new RangeDescriptor(*fStatus); // will check for status here
    140     if (fRangeList == NULL) {
    141         *fStatus = U_MEMORY_ALLOCATION_ERROR;
    142         return;
    143     }
    144     fRangeList->fStartChar    = 0;
    145     fRangeList->fEndChar      = 0x10ffff;
    146 
    147     if (U_FAILURE(*fStatus)) {
    148         return;
    149     }
    150 
    151     //
    152     //  Find the set of non-overlapping ranges of characters
    153     //
    154     int  ni;
    155     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
    156         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
    157         if (usetNode==NULL) {
    158             break;
    159         }
    160 
    161         UnicodeSet      *inputSet             = usetNode->fInputSet;
    162         int32_t          inputSetRangeCount   = inputSet->getRangeCount();
    163         int              inputSetRangeIndex   = 0;
    164                          rlRange              = fRangeList;
    165 
    166         for (;;) {
    167             if (inputSetRangeIndex >= inputSetRangeCount) {
    168                 break;
    169             }
    170             UChar32      inputSetRangeBegin  = inputSet->getRangeStart(inputSetRangeIndex);
    171             UChar32      inputSetRangeEnd    = inputSet->getRangeEnd(inputSetRangeIndex);
    172 
    173             // skip over ranges from the range list that are completely
    174             //   below the current range from the input unicode set.
    175             while (rlRange->fEndChar < inputSetRangeBegin) {
    176                 rlRange = rlRange->fNext;
    177             }
    178 
    179             // If the start of the range from the range list is before with
    180             //   the start of the range from the unicode set, split the range list range
    181             //   in two, with one part being before (wholly outside of) the unicode set
    182             //   and the other containing the rest.
    183             //   Then continue the loop; the post-split current range will then be skipped
    184             //     over
    185             if (rlRange->fStartChar < inputSetRangeBegin) {
    186                 rlRange->split(inputSetRangeBegin, *fStatus);
    187                 if (U_FAILURE(*fStatus)) {
    188                     return;
    189                 }
    190                 continue;
    191             }
    192 
    193             // Same thing at the end of the ranges...
    194             // If the end of the range from the range list doesn't coincide with
    195             //   the end of the range from the unicode set, split the range list
    196             //   range in two.  The first part of the split range will be
    197             //   wholly inside the Unicode set.
    198             if (rlRange->fEndChar > inputSetRangeEnd) {
    199                 rlRange->split(inputSetRangeEnd+1, *fStatus);
    200                 if (U_FAILURE(*fStatus)) {
    201                     return;
    202                 }
    203             }
    204 
    205             // The current rlRange is now entirely within the UnicodeSet range.
    206             // Add this unicode set to the list of sets for this rlRange
    207             if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
    208                 rlRange->fIncludesSets->addElement(usetNode, *fStatus);
    209                 if (U_FAILURE(*fStatus)) {
    210                     return;
    211                 }
    212             }
    213 
    214             // Advance over ranges that we are finished with.
    215             if (inputSetRangeEnd == rlRange->fEndChar) {
    216                 inputSetRangeIndex++;
    217             }
    218             rlRange = rlRange->fNext;
    219         }
    220     }
    221 
    222     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
    223 
    224     //
    225     //  Group the above ranges, with each group consisting of one or more
    226     //    ranges that are in exactly the same set of original UnicodeSets.
    227     //    The groups are numbered, and these group numbers are the set of
    228     //    input symbols recognized by the run-time state machine.
    229     //
    230     //    Numbering: # 0  (state table column 0) is unused.
    231     //               # 1  is reserved - table column 1 is for end-of-input
    232     //               # 2  is reserved - table column 2 is for beginning-in-input
    233     //               # 3  is the first range list.
    234     //
    235     RangeDescriptor *rlSearchRange;
    236     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
    237         for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
    238             if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
    239                 rlRange->fNum = rlSearchRange->fNum;
    240                 break;
    241             }
    242         }
    243         if (rlRange->fNum == 0) {
    244             fGroupCount ++;
    245             rlRange->fNum = fGroupCount+2;
    246             rlRange->setDictionaryFlag();
    247             addValToSets(rlRange->fIncludesSets, fGroupCount+2);
    248         }
    249     }
    250 
    251     // Handle input sets that contain the special string {eof}.
    252     //   Column 1 of the state table is reserved for EOF on input.
    253     //   Column 2 is reserved for before-the-start-input.
    254     //            (This column can be optimized away later if there are no rule
    255     //             references to {bof}.)
    256     //   Add this column value (1 or 2) to the equivalent expression
    257     //     subtree for each UnicodeSet that contains the string {eof}
    258     //   Because {bof} and {eof} are not a characters in the normal sense,
    259     //   they doesn't affect the computation of ranges or TRIE.
    260     static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0};
    261     static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0};
    262 
    263     UnicodeString eofString(eofUString);
    264     UnicodeString bofString(bofUString);
    265     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
    266         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
    267         if (usetNode==NULL) {
    268             break;
    269         }
    270         UnicodeSet      *inputSet = usetNode->fInputSet;
    271         if (inputSet->contains(eofString)) {
    272             addValToSet(usetNode, 1);
    273         }
    274         if (inputSet->contains(bofString)) {
    275             addValToSet(usetNode, 2);
    276             fSawBOF = TRUE;
    277         }
    278     }
    279 
    280 
    281     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
    282     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
    283 
    284     //
    285     // Build the Trie table for mapping UChar32 values to the corresponding
    286     //   range group number
    287     //
    288     fTrie = utrie_open(NULL,    //  Pre-existing trie to be filled in
    289                       NULL,    //  Data array  (utrie will allocate one)
    290                       100000,  //  Max Data Length
    291                       0,       //  Initial value for all code points
    292                       0,       //  Lead surrogate unit value
    293                       TRUE);   //  Keep Latin 1 in separately
    294 
    295 
    296     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
    297         utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
    298     }
    299 }
    300 
    301 
    302 
    303 //-----------------------------------------------------------------------------------
    304 //
    305 //  getTrieSize()    Return the size that will be required to serialize the Trie.
    306 //
    307 //-----------------------------------------------------------------------------------
    308 int32_t RBBISetBuilder::getTrieSize() /*const*/ {
    309     fTrieSize  = utrie_serialize(fTrie,
    310                                     NULL,                // Buffer
    311                                     0,                   // Capacity
    312                                     getFoldedRBBIValue,
    313                                     TRUE,                // Reduce to 16 bits
    314                                     fStatus);
    315     // RBBIDebugPrintf("Trie table size is %d\n", trieSize);
    316     return fTrieSize;
    317 }
    318 
    319 
    320 //-----------------------------------------------------------------------------------
    321 //
    322 //  serializeTrie()   Put the serialized trie at the specified address.
    323 //                    Trust the caller to have given us enough memory.
    324 //                    getTrieSize() MUST be called first.
    325 //
    326 //-----------------------------------------------------------------------------------
    327 void RBBISetBuilder::serializeTrie(uint8_t *where) {
    328     utrie_serialize(fTrie,
    329                     where,                   // Buffer
    330                     fTrieSize,               // Capacity
    331                     getFoldedRBBIValue,
    332                     TRUE,                    // Reduce to 16 bits
    333                     fStatus);
    334 }
    335 
    336 //------------------------------------------------------------------------
    337 //
    338 //  addValToSets     Add a runtime-mapped input value to each uset from a
    339 //                   list of uset nodes. (val corresponds to a state table column.)
    340 //                   For each of the original Unicode sets - which correspond
    341 //                   directly to uset nodes - a logically equivalent expression
    342 //                   is constructed in terms of the remapped runtime input
    343 //                   symbol set.  This function adds one runtime input symbol to
    344 //                   a list of sets.
    345 //
    346 //                   The "logically equivalent expression" is the tree for an
    347 //                   or-ing together of all of the symbols that go into the set.
    348 //
    349 //------------------------------------------------------------------------
    350 void  RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
    351     int32_t       ix;
    352 
    353     for (ix=0; ix<sets->size(); ix++) {
    354         RBBINode *usetNode = (RBBINode *)sets->elementAt(ix);
    355         addValToSet(usetNode, val);
    356     }
    357 }
    358 
    359 void  RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
    360     RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
    361     if (leafNode == NULL) {
    362         *fStatus = U_MEMORY_ALLOCATION_ERROR;
    363         return;
    364     }
    365     leafNode->fVal = (unsigned short)val;
    366     if (usetNode->fLeftChild == NULL) {
    367         usetNode->fLeftChild = leafNode;
    368         leafNode->fParent    = usetNode;
    369     } else {
    370         // There are already input symbols present for this set.
    371         // Set up an OR node, with the previous stuff as the left child
    372         //   and the new value as the right child.
    373         RBBINode *orNode = new RBBINode(RBBINode::opOr);
    374         if (orNode == NULL) {
    375             *fStatus = U_MEMORY_ALLOCATION_ERROR;
    376             return;
    377         }
    378         orNode->fLeftChild  = usetNode->fLeftChild;
    379         orNode->fRightChild = leafNode;
    380         orNode->fLeftChild->fParent  = orNode;
    381         orNode->fRightChild->fParent = orNode;
    382         usetNode->fLeftChild = orNode;
    383         orNode->fParent = usetNode;
    384     }
    385 }
    386 
    387 
    388 //------------------------------------------------------------------------
    389 //
    390 //   getNumCharCategories
    391 //
    392 //------------------------------------------------------------------------
    393 int32_t  RBBISetBuilder::getNumCharCategories() const {
    394     return fGroupCount + 3;
    395 }
    396 
    397 
    398 //------------------------------------------------------------------------
    399 //
    400 //   sawBOF
    401 //
    402 //------------------------------------------------------------------------
    403 UBool  RBBISetBuilder::sawBOF() const {
    404     return fSawBOF;
    405 }
    406 
    407 
    408 //------------------------------------------------------------------------
    409 //
    410 //   getFirstChar      Given a runtime RBBI character category, find
    411 //                     the first UChar32 that is in the set of chars
    412 //                     in the category.
    413 //------------------------------------------------------------------------
    414 UChar32  RBBISetBuilder::getFirstChar(int32_t category) const {
    415     RangeDescriptor   *rlRange;
    416     UChar32            retVal = (UChar32)-1;
    417     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
    418         if (rlRange->fNum == category) {
    419             retVal = rlRange->fStartChar;
    420             break;
    421         }
    422     }
    423     return retVal;
    424 }
    425 
    426 
    427 
    428 //------------------------------------------------------------------------
    429 //
    430 //   printRanges        A debugging function.
    431 //                      dump out all of the range definitions.
    432 //
    433 //------------------------------------------------------------------------
    434 #ifdef RBBI_DEBUG
    435 void RBBISetBuilder::printRanges() {
    436     RangeDescriptor       *rlRange;
    437     int                    i;
    438 
    439     RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
    440     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
    441         RBBIDebugPrintf("%2i  %4x-%4x  ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar);
    442 
    443         for (i=0; i<rlRange->fIncludesSets->size(); i++) {
    444             RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
    445             UnicodeString   setName = UNICODE_STRING("anon", 4);
    446             RBBINode       *setRef = usetNode->fParent;
    447             if (setRef != NULL) {
    448                 RBBINode *varRef = setRef->fParent;
    449                 if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
    450                     setName = varRef->fText;
    451                 }
    452             }
    453             RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf("  ");
    454         }
    455         RBBIDebugPrintf("\n");
    456     }
    457 }
    458 #endif
    459 
    460 
    461 //------------------------------------------------------------------------
    462 //
    463 //   printRangeGroups     A debugging function.
    464 //                        dump out all of the range groups.
    465 //
    466 //------------------------------------------------------------------------
    467 #ifdef RBBI_DEBUG
    468 void RBBISetBuilder::printRangeGroups() {
    469     RangeDescriptor       *rlRange;
    470     RangeDescriptor       *tRange;
    471     int                    i;
    472     int                    lastPrintedGroupNum = 0;
    473 
    474     RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
    475     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
    476         int groupNum = rlRange->fNum & 0xbfff;
    477         if (groupNum > lastPrintedGroupNum) {
    478             lastPrintedGroupNum = groupNum;
    479             RBBIDebugPrintf("%2i  ", groupNum);
    480 
    481             if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
    482 
    483             for (i=0; i<rlRange->fIncludesSets->size(); i++) {
    484                 RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
    485                 UnicodeString   setName = UNICODE_STRING("anon", 4);
    486                 RBBINode       *setRef = usetNode->fParent;
    487                 if (setRef != NULL) {
    488                     RBBINode *varRef = setRef->fParent;
    489                     if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
    490                         setName = varRef->fText;
    491                     }
    492                 }
    493                 RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
    494             }
    495 
    496             i = 0;
    497             for (tRange = rlRange; tRange != 0; tRange = tRange->fNext) {
    498                 if (tRange->fNum == rlRange->fNum) {
    499                     if (i++ % 5 == 0) {
    500                         RBBIDebugPrintf("\n    ");
    501                     }
    502                     RBBIDebugPrintf("  %05x-%05x", tRange->fStartChar, tRange->fEndChar);
    503                 }
    504             }
    505             RBBIDebugPrintf("\n");
    506         }
    507     }
    508     RBBIDebugPrintf("\n");
    509 }
    510 #endif
    511 
    512 
    513 //------------------------------------------------------------------------
    514 //
    515 //   printSets          A debugging function.
    516 //                      dump out all of the set definitions.
    517 //
    518 //------------------------------------------------------------------------
    519 #ifdef RBBI_DEBUG
    520 void RBBISetBuilder::printSets() {
    521     int                   i;
    522 
    523     RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
    524     for (i=0; ; i++) {
    525         RBBINode        *usetNode;
    526         RBBINode        *setRef;
    527         RBBINode        *varRef;
    528         UnicodeString    setName;
    529 
    530         usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
    531         if (usetNode == NULL) {
    532             break;
    533         }
    534 
    535         RBBIDebugPrintf("%3d    ", i);
    536         setName = UNICODE_STRING("anonymous", 9);
    537         setRef = usetNode->fParent;
    538         if (setRef != NULL) {
    539             varRef = setRef->fParent;
    540             if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
    541                 setName = varRef->fText;
    542             }
    543         }
    544         RBBI_DEBUG_printUnicodeString(setName);
    545         RBBIDebugPrintf("   ");
    546         RBBI_DEBUG_printUnicodeString(usetNode->fText);
    547         RBBIDebugPrintf("\n");
    548         if (usetNode->fLeftChild != NULL) {
    549             usetNode->fLeftChild->printTree(TRUE);
    550         }
    551     }
    552     RBBIDebugPrintf("\n");
    553 }
    554 #endif
    555 
    556 
    557 
    558 //-------------------------------------------------------------------------------------
    559 //
    560 //  RangeDescriptor copy constructor
    561 //
    562 //-------------------------------------------------------------------------------------
    563 
    564 RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) {
    565     int  i;
    566 
    567     this->fStartChar    = other.fStartChar;
    568     this->fEndChar      = other.fEndChar;
    569     this->fNum          = other.fNum;
    570     this->fNext         = NULL;
    571     UErrorCode oldstatus = status;
    572     this->fIncludesSets = new UVector(status);
    573     if (U_FAILURE(oldstatus)) {
    574         status = oldstatus;
    575     }
    576     if (U_FAILURE(status)) {
    577         return;
    578     }
    579     /* test for NULL */
    580     if (this->fIncludesSets == 0) {
    581         status = U_MEMORY_ALLOCATION_ERROR;
    582         return;
    583     }
    584 
    585     for (i=0; i<other.fIncludesSets->size(); i++) {
    586         this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
    587     }
    588 }
    589 
    590 
    591 //-------------------------------------------------------------------------------------
    592 //
    593 //  RangeDesriptor default constructor
    594 //
    595 //-------------------------------------------------------------------------------------
    596 RangeDescriptor::RangeDescriptor(UErrorCode &status) {
    597     this->fStartChar    = 0;
    598     this->fEndChar      = 0;
    599     this->fNum          = 0;
    600     this->fNext         = NULL;
    601     UErrorCode oldstatus = status;
    602     this->fIncludesSets = new UVector(status);
    603     if (U_FAILURE(oldstatus)) {
    604         status = oldstatus;
    605     }
    606     if (U_FAILURE(status)) {
    607         return;
    608     }
    609     /* test for NULL */
    610     if(this->fIncludesSets == 0) {
    611         status = U_MEMORY_ALLOCATION_ERROR;
    612         return;
    613     }
    614 
    615 }
    616 
    617 
    618 //-------------------------------------------------------------------------------------
    619 //
    620 //  RangeDesriptor Destructor
    621 //
    622 //-------------------------------------------------------------------------------------
    623 RangeDescriptor::~RangeDescriptor() {
    624     delete  fIncludesSets;
    625     fIncludesSets = NULL;
    626 }
    627 
    628 //-------------------------------------------------------------------------------------
    629 //
    630 //  RangeDesriptor::split()
    631 //
    632 //-------------------------------------------------------------------------------------
    633 void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
    634     U_ASSERT(where>fStartChar && where<=fEndChar);
    635     RangeDescriptor *nr = new RangeDescriptor(*this, status);
    636     if(nr == 0) {
    637         status = U_MEMORY_ALLOCATION_ERROR;
    638         return;
    639     }
    640     if (U_FAILURE(status)) {
    641         delete nr;
    642         return;
    643     }
    644     //  RangeDescriptor copy constructor copies all fields.
    645     //  Only need to update those that are different after the split.
    646     nr->fStartChar = where;
    647     this->fEndChar = where-1;
    648     nr->fNext      = this->fNext;
    649     this->fNext    = nr;
    650 }
    651 
    652 
    653 //-------------------------------------------------------------------------------------
    654 //
    655 //   RangeDescriptor::setDictionaryFlag
    656 //
    657 //            Character Category Numbers that include characters from
    658 //            the original Unicode Set named "dictionary" have bit 14
    659 //            set to 1.  The RBBI runtime engine uses this to trigger
    660 //            use of the word dictionary.
    661 //
    662 //            This function looks through the Unicode Sets that it
    663 //            (the range) includes, and sets the bit in fNum when
    664 //            "dictionary" is among them.
    665 //
    666 //            TODO:  a faster way would be to find the set node for
    667 //                   "dictionary" just once, rather than looking it
    668 //                   up by name every time.
    669 //
    670 //-------------------------------------------------------------------------------------
    671 void RangeDescriptor::setDictionaryFlag() {
    672     int i;
    673 
    674     for (i=0; i<this->fIncludesSets->size(); i++) {
    675         RBBINode       *usetNode    = (RBBINode *)fIncludesSets->elementAt(i);
    676         UnicodeString   setName;
    677         RBBINode       *setRef = usetNode->fParent;
    678         if (setRef != NULL) {
    679             RBBINode *varRef = setRef->fParent;
    680             if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
    681                 setName = varRef->fText;
    682             }
    683         }
    684         if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) {   // TODO:  no string literals.
    685             this->fNum |= 0x4000;
    686             break;
    687         }
    688     }
    689 }
    690 
    691 
    692 
    693 U_NAMESPACE_END
    694 
    695 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    696