Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2010-2011, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  ucharstrieiterator.h
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010nov15
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/ucharstrie.h"
     19 #include "unicode/unistr.h"
     20 #include "uvectr32.h"
     21 
     22 U_NAMESPACE_BEGIN
     23 
     24 UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength,
     25                                UErrorCode &errorCode)
     26         : uchars_(trieUChars),
     27           pos_(uchars_), initialPos_(uchars_),
     28           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
     29           skipValue_(FALSE),
     30           maxLength_(maxStringLength), value_(0), stack_(NULL) {
     31     if(U_FAILURE(errorCode)) {
     32         return;
     33     }
     34     // stack_ is a pointer so that it's easy to turn ucharstrie.h into
     35     // a public API header for which we would want it to depend only on
     36     // other public headers.
     37     // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
     38     // via the UnicodeString and UVector32 implementations, so this additional
     39     // cost is minimal.
     40     stack_=new UVector32(errorCode);
     41     if(stack_==NULL) {
     42         errorCode=U_MEMORY_ALLOCATION_ERROR;
     43     }
     44 }
     45 
     46 UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
     47                                UErrorCode &errorCode)
     48         : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
     49           remainingMatchLength_(trie.remainingMatchLength_),
     50           initialRemainingMatchLength_(trie.remainingMatchLength_),
     51           skipValue_(FALSE),
     52           maxLength_(maxStringLength), value_(0), stack_(NULL) {
     53     if(U_FAILURE(errorCode)) {
     54         return;
     55     }
     56     stack_=new UVector32(errorCode);
     57     if(U_FAILURE(errorCode)) {
     58         return;
     59     }
     60     if(stack_==NULL) {
     61         errorCode=U_MEMORY_ALLOCATION_ERROR;
     62         return;
     63     }
     64     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
     65     if(length>=0) {
     66         // Pending linear-match node, append remaining UChars to str_.
     67         ++length;
     68         if(maxLength_>0 && length>maxLength_) {
     69             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
     70         }
     71         str_.append(pos_, length);
     72         pos_+=length;
     73         remainingMatchLength_-=length;
     74     }
     75 }
     76 
     77 UCharsTrie::Iterator::~Iterator() {
     78     delete stack_;
     79 }
     80 
     81 UCharsTrie::Iterator &
     82 UCharsTrie::Iterator::reset() {
     83     pos_=initialPos_;
     84     remainingMatchLength_=initialRemainingMatchLength_;
     85     skipValue_=FALSE;
     86     int32_t length=remainingMatchLength_+1;  // Remaining match length.
     87     if(maxLength_>0 && length>maxLength_) {
     88         length=maxLength_;
     89     }
     90     str_.truncate(length);
     91     pos_+=length;
     92     remainingMatchLength_-=length;
     93     stack_->setSize(0);
     94     return *this;
     95 }
     96 
     97 UBool
     98 UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
     99 
    100 UBool
    101 UCharsTrie::Iterator::next(UErrorCode &errorCode) {
    102     if(U_FAILURE(errorCode)) {
    103         return FALSE;
    104     }
    105     const UChar *pos=pos_;
    106     if(pos==NULL) {
    107         if(stack_->isEmpty()) {
    108             return FALSE;
    109         }
    110         // Pop the state off the stack and continue with the next outbound edge of
    111         // the branch node.
    112         int32_t stackSize=stack_->size();
    113         int32_t length=stack_->elementAti(stackSize-1);
    114         pos=uchars_+stack_->elementAti(stackSize-2);
    115         stack_->setSize(stackSize-2);
    116         str_.truncate(length&0xffff);
    117         length=(int32_t)((uint32_t)length>>16);
    118         if(length>1) {
    119             pos=branchNext(pos, length, errorCode);
    120             if(pos==NULL) {
    121                 return TRUE;  // Reached a final value.
    122             }
    123         } else {
    124             str_.append(*pos++);
    125         }
    126     }
    127     if(remainingMatchLength_>=0) {
    128         // We only get here if we started in a pending linear-match node
    129         // with more than maxLength remaining units.
    130         return truncateAndStop();
    131     }
    132     for(;;) {
    133         int32_t node=*pos++;
    134         if(node>=kMinValueLead) {
    135             if(skipValue_) {
    136                 pos=skipNodeValue(pos, node);
    137                 node&=kNodeTypeMask;
    138                 skipValue_=FALSE;
    139             } else {
    140                 // Deliver value for the string so far.
    141                 UBool isFinal=(UBool)(node>>15);
    142                 if(isFinal) {
    143                     value_=readValue(pos, node&0x7fff);
    144                 } else {
    145                     value_=readNodeValue(pos, node);
    146                 }
    147                 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
    148                     pos_=NULL;
    149                 } else {
    150                     // We cannot skip the value right here because it shares its
    151                     // lead unit with a match node which we have to evaluate
    152                     // next time.
    153                     // Instead, keep pos_ on the node lead unit itself.
    154                     pos_=pos-1;
    155                     skipValue_=TRUE;
    156                 }
    157                 return TRUE;
    158             }
    159         }
    160         if(maxLength_>0 && str_.length()==maxLength_) {
    161             return truncateAndStop();
    162         }
    163         if(node<kMinLinearMatch) {
    164             if(node==0) {
    165                 node=*pos++;
    166             }
    167             pos=branchNext(pos, node+1, errorCode);
    168             if(pos==NULL) {
    169                 return TRUE;  // Reached a final value.
    170             }
    171         } else {
    172             // Linear-match node, append length units to str_.
    173             int32_t length=node-kMinLinearMatch+1;
    174             if(maxLength_>0 && str_.length()+length>maxLength_) {
    175                 str_.append(pos, maxLength_-str_.length());
    176                 return truncateAndStop();
    177             }
    178             str_.append(pos, length);
    179             pos+=length;
    180         }
    181     }
    182 }
    183 
    184 // Branch node, needs to take the first outbound edge and push state for the rest.
    185 const UChar *
    186 UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
    187     while(length>kMaxBranchLinearSubNodeLength) {
    188         ++pos;  // ignore the comparison unit
    189         // Push state for the greater-or-equal edge.
    190         stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
    191         stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
    192         // Follow the less-than edge.
    193         length>>=1;
    194         pos=jumpByDelta(pos);
    195     }
    196     // List of key-value pairs where values are either final values or jump deltas.
    197     // Read the first (key, value) pair.
    198     UChar trieUnit=*pos++;
    199     int32_t node=*pos++;
    200     UBool isFinal=(UBool)(node>>15);
    201     int32_t value=readValue(pos, node&=0x7fff);
    202     pos=skipValue(pos, node);
    203     stack_->addElement((int32_t)(pos-uchars_), errorCode);
    204     stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
    205     str_.append(trieUnit);
    206     if(isFinal) {
    207         pos_=NULL;
    208         value_=value;
    209         return NULL;
    210     } else {
    211         return pos+value;
    212     }
    213 }
    214 
    215 U_NAMESPACE_END
    216