Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2010-2012, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  bytestrieiterator.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010nov03
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/bytestrie.h"
     19 #include "unicode/stringpiece.h"
     20 #include "charstr.h"
     21 #include "uvectr32.h"
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
     26                               UErrorCode &errorCode)
     27         : bytes_(static_cast<const uint8_t *>(trieBytes)),
     28           pos_(bytes_), initialPos_(bytes_),
     29           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
     30           str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
     31     if(U_FAILURE(errorCode)) {
     32         return;
     33     }
     34     // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
     35     // a public API header for which we would want it to depend only on
     36     // other public headers.
     37     // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
     38     // via the CharString and UVector32 implementations, so this additional
     39     // cost is minimal.
     40     str_=new CharString();
     41     stack_=new UVector32(errorCode);
     42     if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
     43         errorCode=U_MEMORY_ALLOCATION_ERROR;
     44     }
     45 }
     46 
     47 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
     48                               UErrorCode &errorCode)
     49         : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
     50           remainingMatchLength_(trie.remainingMatchLength_),
     51           initialRemainingMatchLength_(trie.remainingMatchLength_),
     52           str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
     53     if(U_FAILURE(errorCode)) {
     54         return;
     55     }
     56     str_=new CharString();
     57     stack_=new UVector32(errorCode);
     58     if(U_FAILURE(errorCode)) {
     59         return;
     60     }
     61     if(str_==NULL || stack_==NULL) {
     62         errorCode=U_MEMORY_ALLOCATION_ERROR;
     63         return;
     64     }
     65     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
     66     if(length>=0) {
     67         // Pending linear-match node, append remaining bytes to str_.
     68         ++length;
     69         if(maxLength_>0 && length>maxLength_) {
     70             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
     71         }
     72         str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
     73         pos_+=length;
     74         remainingMatchLength_-=length;
     75     }
     76 }
     77 
     78 BytesTrie::Iterator::~Iterator() {
     79     delete str_;
     80     delete stack_;
     81 }
     82 
     83 BytesTrie::Iterator &
     84 BytesTrie::Iterator::reset() {
     85     pos_=initialPos_;
     86     remainingMatchLength_=initialRemainingMatchLength_;
     87     int32_t length=remainingMatchLength_+1;  // Remaining match length.
     88     if(maxLength_>0 && length>maxLength_) {
     89         length=maxLength_;
     90     }
     91     str_->truncate(length);
     92     pos_+=length;
     93     remainingMatchLength_-=length;
     94     stack_->setSize(0);
     95     return *this;
     96 }
     97 
     98 UBool
     99 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
    100 
    101 UBool
    102 BytesTrie::Iterator::next(UErrorCode &errorCode) {
    103     if(U_FAILURE(errorCode)) {
    104         return FALSE;
    105     }
    106     const uint8_t *pos=pos_;
    107     if(pos==NULL) {
    108         if(stack_->isEmpty()) {
    109             return FALSE;
    110         }
    111         // Pop the state off the stack and continue with the next outbound edge of
    112         // the branch node.
    113         int32_t stackSize=stack_->size();
    114         int32_t length=stack_->elementAti(stackSize-1);
    115         pos=bytes_+stack_->elementAti(stackSize-2);
    116         stack_->setSize(stackSize-2);
    117         str_->truncate(length&0xffff);
    118         length=(int32_t)((uint32_t)length>>16);
    119         if(length>1) {
    120             pos=branchNext(pos, length, errorCode);
    121             if(pos==NULL) {
    122                 return TRUE;  // Reached a final value.
    123             }
    124         } else {
    125             str_->append((char)*pos++, errorCode);
    126         }
    127     }
    128     if(remainingMatchLength_>=0) {
    129         // We only get here if we started in a pending linear-match node
    130         // with more than maxLength remaining bytes.
    131         return truncateAndStop();
    132     }
    133     for(;;) {
    134         int32_t node=*pos++;
    135         if(node>=kMinValueLead) {
    136             // Deliver value for the byte sequence so far.
    137             UBool isFinal=(UBool)(node&kValueIsFinal);
    138             value_=readValue(pos, node>>1);
    139             if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
    140                 pos_=NULL;
    141             } else {
    142                 pos_=skipValue(pos, node);
    143             }
    144             return TRUE;
    145         }
    146         if(maxLength_>0 && str_->length()==maxLength_) {
    147             return truncateAndStop();
    148         }
    149         if(node<kMinLinearMatch) {
    150             if(node==0) {
    151                 node=*pos++;
    152             }
    153             pos=branchNext(pos, node+1, errorCode);
    154             if(pos==NULL) {
    155                 return TRUE;  // Reached a final value.
    156             }
    157         } else {
    158             // Linear-match node, append length bytes to str_.
    159             int32_t length=node-kMinLinearMatch+1;
    160             if(maxLength_>0 && str_->length()+length>maxLength_) {
    161                 str_->append(reinterpret_cast<const char *>(pos),
    162                             maxLength_-str_->length(), errorCode);
    163                 return truncateAndStop();
    164             }
    165             str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
    166             pos+=length;
    167         }
    168     }
    169 }
    170 
    171 StringPiece
    172 BytesTrie::Iterator::getString() const {
    173     return str_ == NULL ? StringPiece() : str_->toStringPiece();
    174 }
    175 
    176 UBool
    177 BytesTrie::Iterator::truncateAndStop() {
    178     pos_=NULL;
    179     value_=-1;  // no real value for str
    180     return TRUE;
    181 }
    182 
    183 // Branch node, needs to take the first outbound edge and push state for the rest.
    184 const uint8_t *
    185 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
    186     while(length>kMaxBranchLinearSubNodeLength) {
    187         ++pos;  // ignore the comparison byte
    188         // Push state for the greater-or-equal edge.
    189         stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
    190         stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
    191         // Follow the less-than edge.
    192         length>>=1;
    193         pos=jumpByDelta(pos);
    194     }
    195     // List of key-value pairs where values are either final values or jump deltas.
    196     // Read the first (key, value) pair.
    197     uint8_t trieByte=*pos++;
    198     int32_t node=*pos++;
    199     UBool isFinal=(UBool)(node&kValueIsFinal);
    200     int32_t value=readValue(pos, node>>1);
    201     pos=skipValue(pos, node);
    202     stack_->addElement((int32_t)(pos-bytes_), errorCode);
    203     stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
    204     str_->append((char)trieByte, errorCode);
    205     if(isFinal) {
    206         pos_=NULL;
    207         value_=value;
    208         return NULL;
    209     } else {
    210         return pos+value;
    211     }
    212 }
    213 
    214 U_NAMESPACE_END
    215