1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2012, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: bytestrieiterator.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010nov03 14 * created by: Markus W. Scherer 15 */ 16 17 #include "unicode/utypes.h" 18 #include "unicode/bytestrie.h" 19 #include "unicode/stringpiece.h" 20 #include "charstr.h" 21 #include "uvectr32.h" 22 23 U_NAMESPACE_BEGIN 24 25 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, 26 UErrorCode &errorCode) 27 : bytes_(static_cast<const uint8_t *>(trieBytes)), 28 pos_(bytes_), initialPos_(bytes_), 29 remainingMatchLength_(-1), initialRemainingMatchLength_(-1), 30 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { 31 if(U_FAILURE(errorCode)) { 32 return; 33 } 34 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into 35 // a public API header for which we would want it to depend only on 36 // other public headers. 37 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway 38 // via the CharString and UVector32 implementations, so this additional 39 // cost is minimal. 40 str_=new CharString(); 41 stack_=new UVector32(errorCode); 42 if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { 43 errorCode=U_MEMORY_ALLOCATION_ERROR; 44 } 45 } 46 47 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, 48 UErrorCode &errorCode) 49 : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), 50 remainingMatchLength_(trie.remainingMatchLength_), 51 initialRemainingMatchLength_(trie.remainingMatchLength_), 52 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { 53 if(U_FAILURE(errorCode)) { 54 return; 55 } 56 str_=new CharString(); 57 stack_=new UVector32(errorCode); 58 if(U_FAILURE(errorCode)) { 59 return; 60 } 61 if(str_==NULL || stack_==NULL) { 62 errorCode=U_MEMORY_ALLOCATION_ERROR; 63 return; 64 } 65 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 66 if(length>=0) { 67 // Pending linear-match node, append remaining bytes to str_. 68 ++length; 69 if(maxLength_>0 && length>maxLength_) { 70 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. 71 } 72 str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); 73 pos_+=length; 74 remainingMatchLength_-=length; 75 } 76 } 77 78 BytesTrie::Iterator::~Iterator() { 79 delete str_; 80 delete stack_; 81 } 82 83 BytesTrie::Iterator & 84 BytesTrie::Iterator::reset() { 85 pos_=initialPos_; 86 remainingMatchLength_=initialRemainingMatchLength_; 87 int32_t length=remainingMatchLength_+1; // Remaining match length. 88 if(maxLength_>0 && length>maxLength_) { 89 length=maxLength_; 90 } 91 str_->truncate(length); 92 pos_+=length; 93 remainingMatchLength_-=length; 94 stack_->setSize(0); 95 return *this; 96 } 97 98 UBool 99 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } 100 101 UBool 102 BytesTrie::Iterator::next(UErrorCode &errorCode) { 103 if(U_FAILURE(errorCode)) { 104 return FALSE; 105 } 106 const uint8_t *pos=pos_; 107 if(pos==NULL) { 108 if(stack_->isEmpty()) { 109 return FALSE; 110 } 111 // Pop the state off the stack and continue with the next outbound edge of 112 // the branch node. 113 int32_t stackSize=stack_->size(); 114 int32_t length=stack_->elementAti(stackSize-1); 115 pos=bytes_+stack_->elementAti(stackSize-2); 116 stack_->setSize(stackSize-2); 117 str_->truncate(length&0xffff); 118 length=(int32_t)((uint32_t)length>>16); 119 if(length>1) { 120 pos=branchNext(pos, length, errorCode); 121 if(pos==NULL) { 122 return TRUE; // Reached a final value. 123 } 124 } else { 125 str_->append((char)*pos++, errorCode); 126 } 127 } 128 if(remainingMatchLength_>=0) { 129 // We only get here if we started in a pending linear-match node 130 // with more than maxLength remaining bytes. 131 return truncateAndStop(); 132 } 133 for(;;) { 134 int32_t node=*pos++; 135 if(node>=kMinValueLead) { 136 // Deliver value for the byte sequence so far. 137 UBool isFinal=(UBool)(node&kValueIsFinal); 138 value_=readValue(pos, node>>1); 139 if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { 140 pos_=NULL; 141 } else { 142 pos_=skipValue(pos, node); 143 } 144 return TRUE; 145 } 146 if(maxLength_>0 && str_->length()==maxLength_) { 147 return truncateAndStop(); 148 } 149 if(node<kMinLinearMatch) { 150 if(node==0) { 151 node=*pos++; 152 } 153 pos=branchNext(pos, node+1, errorCode); 154 if(pos==NULL) { 155 return TRUE; // Reached a final value. 156 } 157 } else { 158 // Linear-match node, append length bytes to str_. 159 int32_t length=node-kMinLinearMatch+1; 160 if(maxLength_>0 && str_->length()+length>maxLength_) { 161 str_->append(reinterpret_cast<const char *>(pos), 162 maxLength_-str_->length(), errorCode); 163 return truncateAndStop(); 164 } 165 str_->append(reinterpret_cast<const char *>(pos), length, errorCode); 166 pos+=length; 167 } 168 } 169 } 170 171 StringPiece 172 BytesTrie::Iterator::getString() const { 173 return str_ == NULL ? StringPiece() : str_->toStringPiece(); 174 } 175 176 UBool 177 BytesTrie::Iterator::truncateAndStop() { 178 pos_=NULL; 179 value_=-1; // no real value for str 180 return TRUE; 181 } 182 183 // Branch node, needs to take the first outbound edge and push state for the rest. 184 const uint8_t * 185 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { 186 while(length>kMaxBranchLinearSubNodeLength) { 187 ++pos; // ignore the comparison byte 188 // Push state for the greater-or-equal edge. 189 stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); 190 stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); 191 // Follow the less-than edge. 192 length>>=1; 193 pos=jumpByDelta(pos); 194 } 195 // List of key-value pairs where values are either final values or jump deltas. 196 // Read the first (key, value) pair. 197 uint8_t trieByte=*pos++; 198 int32_t node=*pos++; 199 UBool isFinal=(UBool)(node&kValueIsFinal); 200 int32_t value=readValue(pos, node>>1); 201 pos=skipValue(pos, node); 202 stack_->addElement((int32_t)(pos-bytes_), errorCode); 203 stack_->addElement(((length-1)<<16)|str_->length(), errorCode); 204 str_->append((char)trieByte, errorCode); 205 if(isFinal) { 206 pos_=NULL; 207 value_=value; 208 return NULL; 209 } else { 210 return pos+value; 211 } 212 } 213 214 U_NAMESPACE_END 215