Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2010-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   file name:  bytestrieiterator.cpp
      7 *   encoding:   US-ASCII
      8 *   tab size:   8 (not used)
      9 *   indentation:4
     10 *
     11 *   created on: 2010nov03
     12 *   created by: Markus W. Scherer
     13 */
     14 
     15 #include "unicode/utypes.h"
     16 #include "unicode/bytestrie.h"
     17 #include "unicode/stringpiece.h"
     18 #include "charstr.h"
     19 #include "uvectr32.h"
     20 
     21 U_NAMESPACE_BEGIN
     22 
     23 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
     24                               UErrorCode &errorCode)
     25         : bytes_(static_cast<const uint8_t *>(trieBytes)),
     26           pos_(bytes_), initialPos_(bytes_),
     27           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
     28           str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
     29     if(U_FAILURE(errorCode)) {
     30         return;
     31     }
     32     // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
     33     // a public API header for which we would want it to depend only on
     34     // other public headers.
     35     // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
     36     // via the CharString and UVector32 implementations, so this additional
     37     // cost is minimal.
     38     str_=new CharString();
     39     stack_=new UVector32(errorCode);
     40     if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
     41         errorCode=U_MEMORY_ALLOCATION_ERROR;
     42     }
     43 }
     44 
     45 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
     46                               UErrorCode &errorCode)
     47         : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
     48           remainingMatchLength_(trie.remainingMatchLength_),
     49           initialRemainingMatchLength_(trie.remainingMatchLength_),
     50           str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
     51     if(U_FAILURE(errorCode)) {
     52         return;
     53     }
     54     str_=new CharString();
     55     stack_=new UVector32(errorCode);
     56     if(U_FAILURE(errorCode)) {
     57         return;
     58     }
     59     if(str_==NULL || stack_==NULL) {
     60         errorCode=U_MEMORY_ALLOCATION_ERROR;
     61         return;
     62     }
     63     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
     64     if(length>=0) {
     65         // Pending linear-match node, append remaining bytes to str_.
     66         ++length;
     67         if(maxLength_>0 && length>maxLength_) {
     68             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
     69         }
     70         str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
     71         pos_+=length;
     72         remainingMatchLength_-=length;
     73     }
     74 }
     75 
     76 BytesTrie::Iterator::~Iterator() {
     77     delete str_;
     78     delete stack_;
     79 }
     80 
     81 BytesTrie::Iterator &
     82 BytesTrie::Iterator::reset() {
     83     pos_=initialPos_;
     84     remainingMatchLength_=initialRemainingMatchLength_;
     85     int32_t length=remainingMatchLength_+1;  // Remaining match length.
     86     if(maxLength_>0 && length>maxLength_) {
     87         length=maxLength_;
     88     }
     89     str_->truncate(length);
     90     pos_+=length;
     91     remainingMatchLength_-=length;
     92     stack_->setSize(0);
     93     return *this;
     94 }
     95 
     96 UBool
     97 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
     98 
     99 UBool
    100 BytesTrie::Iterator::next(UErrorCode &errorCode) {
    101     if(U_FAILURE(errorCode)) {
    102         return FALSE;
    103     }
    104     const uint8_t *pos=pos_;
    105     if(pos==NULL) {
    106         if(stack_->isEmpty()) {
    107             return FALSE;
    108         }
    109         // Pop the state off the stack and continue with the next outbound edge of
    110         // the branch node.
    111         int32_t stackSize=stack_->size();
    112         int32_t length=stack_->elementAti(stackSize-1);
    113         pos=bytes_+stack_->elementAti(stackSize-2);
    114         stack_->setSize(stackSize-2);
    115         str_->truncate(length&0xffff);
    116         length=(int32_t)((uint32_t)length>>16);
    117         if(length>1) {
    118             pos=branchNext(pos, length, errorCode);
    119             if(pos==NULL) {
    120                 return TRUE;  // Reached a final value.
    121             }
    122         } else {
    123             str_->append((char)*pos++, errorCode);
    124         }
    125     }
    126     if(remainingMatchLength_>=0) {
    127         // We only get here if we started in a pending linear-match node
    128         // with more than maxLength remaining bytes.
    129         return truncateAndStop();
    130     }
    131     for(;;) {
    132         int32_t node=*pos++;
    133         if(node>=kMinValueLead) {
    134             // Deliver value for the byte sequence so far.
    135             UBool isFinal=(UBool)(node&kValueIsFinal);
    136             value_=readValue(pos, node>>1);
    137             if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
    138                 pos_=NULL;
    139             } else {
    140                 pos_=skipValue(pos, node);
    141             }
    142             sp_.set(str_->data(), str_->length());
    143             return TRUE;
    144         }
    145         if(maxLength_>0 && str_->length()==maxLength_) {
    146             return truncateAndStop();
    147         }
    148         if(node<kMinLinearMatch) {
    149             if(node==0) {
    150                 node=*pos++;
    151             }
    152             pos=branchNext(pos, node+1, errorCode);
    153             if(pos==NULL) {
    154                 return TRUE;  // Reached a final value.
    155             }
    156         } else {
    157             // Linear-match node, append length bytes to str_.
    158             int32_t length=node-kMinLinearMatch+1;
    159             if(maxLength_>0 && str_->length()+length>maxLength_) {
    160                 str_->append(reinterpret_cast<const char *>(pos),
    161                             maxLength_-str_->length(), errorCode);
    162                 return truncateAndStop();
    163             }
    164             str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
    165             pos+=length;
    166         }
    167     }
    168 }
    169 
    170 UBool
    171 BytesTrie::Iterator::truncateAndStop() {
    172     pos_=NULL;
    173     sp_.set(str_->data(), str_->length());
    174     value_=-1;  // no real value for str
    175     return TRUE;
    176 }
    177 
    178 // Branch node, needs to take the first outbound edge and push state for the rest.
    179 const uint8_t *
    180 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
    181     while(length>kMaxBranchLinearSubNodeLength) {
    182         ++pos;  // ignore the comparison byte
    183         // Push state for the greater-or-equal edge.
    184         stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
    185         stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
    186         // Follow the less-than edge.
    187         length>>=1;
    188         pos=jumpByDelta(pos);
    189     }
    190     // List of key-value pairs where values are either final values or jump deltas.
    191     // Read the first (key, value) pair.
    192     uint8_t trieByte=*pos++;
    193     int32_t node=*pos++;
    194     UBool isFinal=(UBool)(node&kValueIsFinal);
    195     int32_t value=readValue(pos, node>>1);
    196     pos=skipValue(pos, node);
    197     stack_->addElement((int32_t)(pos-bytes_), errorCode);
    198     stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
    199     str_->append((char)trieByte, errorCode);
    200     if(isFinal) {
    201         pos_=NULL;
    202         sp_.set(str_->data(), str_->length());
    203         value_=value;
    204         return NULL;
    205     } else {
    206         return pos+value;
    207     }
    208 }
    209 
    210 U_NAMESPACE_END
    211