1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2011, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: ucharstrieiterator.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010nov15 14 * created by: Markus W. Scherer 15 */ 16 17 #include "unicode/utypes.h" 18 #include "unicode/ucharstrie.h" 19 #include "unicode/unistr.h" 20 #include "uvectr32.h" 21 22 U_NAMESPACE_BEGIN 23 24 UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, 25 UErrorCode &errorCode) 26 : uchars_(trieUChars), 27 pos_(uchars_), initialPos_(uchars_), 28 remainingMatchLength_(-1), initialRemainingMatchLength_(-1), 29 skipValue_(FALSE), 30 maxLength_(maxStringLength), value_(0), stack_(NULL) { 31 if(U_FAILURE(errorCode)) { 32 return; 33 } 34 // stack_ is a pointer so that it's easy to turn ucharstrie.h into 35 // a public API header for which we would want it to depend only on 36 // other public headers. 37 // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway 38 // via the UnicodeString and UVector32 implementations, so this additional 39 // cost is minimal. 40 stack_=new UVector32(errorCode); 41 if(stack_==NULL) { 42 errorCode=U_MEMORY_ALLOCATION_ERROR; 43 } 44 } 45 46 UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength, 47 UErrorCode &errorCode) 48 : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), 49 remainingMatchLength_(trie.remainingMatchLength_), 50 initialRemainingMatchLength_(trie.remainingMatchLength_), 51 skipValue_(FALSE), 52 maxLength_(maxStringLength), value_(0), stack_(NULL) { 53 if(U_FAILURE(errorCode)) { 54 return; 55 } 56 stack_=new UVector32(errorCode); 57 if(U_FAILURE(errorCode)) { 58 return; 59 } 60 if(stack_==NULL) { 61 errorCode=U_MEMORY_ALLOCATION_ERROR; 62 return; 63 } 64 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 65 if(length>=0) { 66 // Pending linear-match node, append remaining UChars to str_. 67 ++length; 68 if(maxLength_>0 && length>maxLength_) { 69 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. 70 } 71 str_.append(pos_, length); 72 pos_+=length; 73 remainingMatchLength_-=length; 74 } 75 } 76 77 UCharsTrie::Iterator::~Iterator() { 78 delete stack_; 79 } 80 81 UCharsTrie::Iterator & 82 UCharsTrie::Iterator::reset() { 83 pos_=initialPos_; 84 remainingMatchLength_=initialRemainingMatchLength_; 85 skipValue_=FALSE; 86 int32_t length=remainingMatchLength_+1; // Remaining match length. 87 if(maxLength_>0 && length>maxLength_) { 88 length=maxLength_; 89 } 90 str_.truncate(length); 91 pos_+=length; 92 remainingMatchLength_-=length; 93 stack_->setSize(0); 94 return *this; 95 } 96 97 UBool 98 UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } 99 100 UBool 101 UCharsTrie::Iterator::next(UErrorCode &errorCode) { 102 if(U_FAILURE(errorCode)) { 103 return FALSE; 104 } 105 const UChar *pos=pos_; 106 if(pos==NULL) { 107 if(stack_->isEmpty()) { 108 return FALSE; 109 } 110 // Pop the state off the stack and continue with the next outbound edge of 111 // the branch node. 112 int32_t stackSize=stack_->size(); 113 int32_t length=stack_->elementAti(stackSize-1); 114 pos=uchars_+stack_->elementAti(stackSize-2); 115 stack_->setSize(stackSize-2); 116 str_.truncate(length&0xffff); 117 length=(int32_t)((uint32_t)length>>16); 118 if(length>1) { 119 pos=branchNext(pos, length, errorCode); 120 if(pos==NULL) { 121 return TRUE; // Reached a final value. 122 } 123 } else { 124 str_.append(*pos++); 125 } 126 } 127 if(remainingMatchLength_>=0) { 128 // We only get here if we started in a pending linear-match node 129 // with more than maxLength remaining units. 130 return truncateAndStop(); 131 } 132 for(;;) { 133 int32_t node=*pos++; 134 if(node>=kMinValueLead) { 135 if(skipValue_) { 136 pos=skipNodeValue(pos, node); 137 node&=kNodeTypeMask; 138 skipValue_=FALSE; 139 } else { 140 // Deliver value for the string so far. 141 UBool isFinal=(UBool)(node>>15); 142 if(isFinal) { 143 value_=readValue(pos, node&0x7fff); 144 } else { 145 value_=readNodeValue(pos, node); 146 } 147 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { 148 pos_=NULL; 149 } else { 150 // We cannot skip the value right here because it shares its 151 // lead unit with a match node which we have to evaluate 152 // next time. 153 // Instead, keep pos_ on the node lead unit itself. 154 pos_=pos-1; 155 skipValue_=TRUE; 156 } 157 return TRUE; 158 } 159 } 160 if(maxLength_>0 && str_.length()==maxLength_) { 161 return truncateAndStop(); 162 } 163 if(node<kMinLinearMatch) { 164 if(node==0) { 165 node=*pos++; 166 } 167 pos=branchNext(pos, node+1, errorCode); 168 if(pos==NULL) { 169 return TRUE; // Reached a final value. 170 } 171 } else { 172 // Linear-match node, append length units to str_. 173 int32_t length=node-kMinLinearMatch+1; 174 if(maxLength_>0 && str_.length()+length>maxLength_) { 175 str_.append(pos, maxLength_-str_.length()); 176 return truncateAndStop(); 177 } 178 str_.append(pos, length); 179 pos+=length; 180 } 181 } 182 } 183 184 // Branch node, needs to take the first outbound edge and push state for the rest. 185 const UChar * 186 UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { 187 while(length>kMaxBranchLinearSubNodeLength) { 188 ++pos; // ignore the comparison unit 189 // Push state for the greater-or-equal edge. 190 stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode); 191 stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode); 192 // Follow the less-than edge. 193 length>>=1; 194 pos=jumpByDelta(pos); 195 } 196 // List of key-value pairs where values are either final values or jump deltas. 197 // Read the first (key, value) pair. 198 UChar trieUnit=*pos++; 199 int32_t node=*pos++; 200 UBool isFinal=(UBool)(node>>15); 201 int32_t value=readValue(pos, node&=0x7fff); 202 pos=skipValue(pos, node); 203 stack_->addElement((int32_t)(pos-uchars_), errorCode); 204 stack_->addElement(((length-1)<<16)|str_.length(), errorCode); 205 str_.append(trieUnit); 206 if(isFinal) { 207 pos_=NULL; 208 value_=value; 209 return NULL; 210 } else { 211 return pos+value; 212 } 213 } 214 215 U_NAMESPACE_END 216