Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2010-2011, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  ucharstrieiterator.h
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2010nov15
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #include "unicode/utypes.h"
     16 #include "unicode/ucharstrie.h"
     17 #include "unicode/unistr.h"
     18 #include "uvectr32.h"
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength,
     23                                UErrorCode &errorCode)
     24         : uchars_(trieUChars),
     25           pos_(uchars_), initialPos_(uchars_),
     26           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
     27           skipValue_(FALSE),
     28           maxLength_(maxStringLength), value_(0), stack_(NULL) {
     29     if(U_FAILURE(errorCode)) {
     30         return;
     31     }
     32     // stack_ is a pointer so that it's easy to turn ucharstrie.h into
     33     // a public API header for which we would want it to depend only on
     34     // other public headers.
     35     // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
     36     // via the UnicodeString and UVector32 implementations, so this additional
     37     // cost is minimal.
     38     stack_=new UVector32(errorCode);
     39     if(stack_==NULL) {
     40         errorCode=U_MEMORY_ALLOCATION_ERROR;
     41     }
     42 }
     43 
     44 UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
     45                                UErrorCode &errorCode)
     46         : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
     47           remainingMatchLength_(trie.remainingMatchLength_),
     48           initialRemainingMatchLength_(trie.remainingMatchLength_),
     49           skipValue_(FALSE),
     50           maxLength_(maxStringLength), value_(0), stack_(NULL) {
     51     if(U_FAILURE(errorCode)) {
     52         return;
     53     }
     54     stack_=new UVector32(errorCode);
     55     if(U_FAILURE(errorCode)) {
     56         return;
     57     }
     58     if(stack_==NULL) {
     59         errorCode=U_MEMORY_ALLOCATION_ERROR;
     60         return;
     61     }
     62     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
     63     if(length>=0) {
     64         // Pending linear-match node, append remaining UChars to str_.
     65         ++length;
     66         if(maxLength_>0 && length>maxLength_) {
     67             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
     68         }
     69         str_.append(pos_, length);
     70         pos_+=length;
     71         remainingMatchLength_-=length;
     72     }
     73 }
     74 
     75 UCharsTrie::Iterator::~Iterator() {
     76     delete stack_;
     77 }
     78 
     79 UCharsTrie::Iterator &
     80 UCharsTrie::Iterator::reset() {
     81     pos_=initialPos_;
     82     remainingMatchLength_=initialRemainingMatchLength_;
     83     skipValue_=FALSE;
     84     int32_t length=remainingMatchLength_+1;  // Remaining match length.
     85     if(maxLength_>0 && length>maxLength_) {
     86         length=maxLength_;
     87     }
     88     str_.truncate(length);
     89     pos_+=length;
     90     remainingMatchLength_-=length;
     91     stack_->setSize(0);
     92     return *this;
     93 }
     94 
     95 UBool
     96 UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
     97 
     98 UBool
     99 UCharsTrie::Iterator::next(UErrorCode &errorCode) {
    100     if(U_FAILURE(errorCode)) {
    101         return FALSE;
    102     }
    103     const UChar *pos=pos_;
    104     if(pos==NULL) {
    105         if(stack_->isEmpty()) {
    106             return FALSE;
    107         }
    108         // Pop the state off the stack and continue with the next outbound edge of
    109         // the branch node.
    110         int32_t stackSize=stack_->size();
    111         int32_t length=stack_->elementAti(stackSize-1);
    112         pos=uchars_+stack_->elementAti(stackSize-2);
    113         stack_->setSize(stackSize-2);
    114         str_.truncate(length&0xffff);
    115         length=(int32_t)((uint32_t)length>>16);
    116         if(length>1) {
    117             pos=branchNext(pos, length, errorCode);
    118             if(pos==NULL) {
    119                 return TRUE;  // Reached a final value.
    120             }
    121         } else {
    122             str_.append(*pos++);
    123         }
    124     }
    125     if(remainingMatchLength_>=0) {
    126         // We only get here if we started in a pending linear-match node
    127         // with more than maxLength remaining units.
    128         return truncateAndStop();
    129     }
    130     for(;;) {
    131         int32_t node=*pos++;
    132         if(node>=kMinValueLead) {
    133             if(skipValue_) {
    134                 pos=skipNodeValue(pos, node);
    135                 node&=kNodeTypeMask;
    136                 skipValue_=FALSE;
    137             } else {
    138                 // Deliver value for the string so far.
    139                 UBool isFinal=(UBool)(node>>15);
    140                 if(isFinal) {
    141                     value_=readValue(pos, node&0x7fff);
    142                 } else {
    143                     value_=readNodeValue(pos, node);
    144                 }
    145                 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
    146                     pos_=NULL;
    147                 } else {
    148                     // We cannot skip the value right here because it shares its
    149                     // lead unit with a match node which we have to evaluate
    150                     // next time.
    151                     // Instead, keep pos_ on the node lead unit itself.
    152                     pos_=pos-1;
    153                     skipValue_=TRUE;
    154                 }
    155                 return TRUE;
    156             }
    157         }
    158         if(maxLength_>0 && str_.length()==maxLength_) {
    159             return truncateAndStop();
    160         }
    161         if(node<kMinLinearMatch) {
    162             if(node==0) {
    163                 node=*pos++;
    164             }
    165             pos=branchNext(pos, node+1, errorCode);
    166             if(pos==NULL) {
    167                 return TRUE;  // Reached a final value.
    168             }
    169         } else {
    170             // Linear-match node, append length units to str_.
    171             int32_t length=node-kMinLinearMatch+1;
    172             if(maxLength_>0 && str_.length()+length>maxLength_) {
    173                 str_.append(pos, maxLength_-str_.length());
    174                 return truncateAndStop();
    175             }
    176             str_.append(pos, length);
    177             pos+=length;
    178         }
    179     }
    180 }
    181 
    182 // Branch node, needs to take the first outbound edge and push state for the rest.
    183 const UChar *
    184 UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
    185     while(length>kMaxBranchLinearSubNodeLength) {
    186         ++pos;  // ignore the comparison unit
    187         // Push state for the greater-or-equal edge.
    188         stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
    189         stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
    190         // Follow the less-than edge.
    191         length>>=1;
    192         pos=jumpByDelta(pos);
    193     }
    194     // List of key-value pairs where values are either final values or jump deltas.
    195     // Read the first (key, value) pair.
    196     UChar trieUnit=*pos++;
    197     int32_t node=*pos++;
    198     UBool isFinal=(UBool)(node>>15);
    199     int32_t value=readValue(pos, node&=0x7fff);
    200     pos=skipValue(pos, node);
    201     stack_->addElement((int32_t)(pos-uchars_), errorCode);
    202     stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
    203     str_.append(trieUnit);
    204     if(isFinal) {
    205         pos_=NULL;
    206         value_=value;
    207         return NULL;
    208     } else {
    209         return pos+value;
    210     }
    211 }
    212 
    213 U_NAMESPACE_END
    214