1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: ucharstrieiterator.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010nov15 12 * created by: Markus W. Scherer 13 */ 14 15 #include "unicode/utypes.h" 16 #include "unicode/ucharstrie.h" 17 #include "unicode/unistr.h" 18 #include "uvectr32.h" 19 20 U_NAMESPACE_BEGIN 21 22 UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, 23 UErrorCode &errorCode) 24 : uchars_(trieUChars), 25 pos_(uchars_), initialPos_(uchars_), 26 remainingMatchLength_(-1), initialRemainingMatchLength_(-1), 27 skipValue_(FALSE), 28 maxLength_(maxStringLength), value_(0), stack_(NULL) { 29 if(U_FAILURE(errorCode)) { 30 return; 31 } 32 // stack_ is a pointer so that it's easy to turn ucharstrie.h into 33 // a public API header for which we would want it to depend only on 34 // other public headers. 35 // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway 36 // via the UnicodeString and UVector32 implementations, so this additional 37 // cost is minimal. 38 stack_=new UVector32(errorCode); 39 if(stack_==NULL) { 40 errorCode=U_MEMORY_ALLOCATION_ERROR; 41 } 42 } 43 44 UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength, 45 UErrorCode &errorCode) 46 : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), 47 remainingMatchLength_(trie.remainingMatchLength_), 48 initialRemainingMatchLength_(trie.remainingMatchLength_), 49 skipValue_(FALSE), 50 maxLength_(maxStringLength), value_(0), stack_(NULL) { 51 if(U_FAILURE(errorCode)) { 52 return; 53 } 54 stack_=new UVector32(errorCode); 55 if(U_FAILURE(errorCode)) { 56 return; 57 } 58 if(stack_==NULL) { 59 errorCode=U_MEMORY_ALLOCATION_ERROR; 60 return; 61 } 62 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 63 if(length>=0) { 64 // Pending linear-match node, append remaining UChars to str_. 65 ++length; 66 if(maxLength_>0 && length>maxLength_) { 67 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. 68 } 69 str_.append(pos_, length); 70 pos_+=length; 71 remainingMatchLength_-=length; 72 } 73 } 74 75 UCharsTrie::Iterator::~Iterator() { 76 delete stack_; 77 } 78 79 UCharsTrie::Iterator & 80 UCharsTrie::Iterator::reset() { 81 pos_=initialPos_; 82 remainingMatchLength_=initialRemainingMatchLength_; 83 skipValue_=FALSE; 84 int32_t length=remainingMatchLength_+1; // Remaining match length. 85 if(maxLength_>0 && length>maxLength_) { 86 length=maxLength_; 87 } 88 str_.truncate(length); 89 pos_+=length; 90 remainingMatchLength_-=length; 91 stack_->setSize(0); 92 return *this; 93 } 94 95 UBool 96 UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } 97 98 UBool 99 UCharsTrie::Iterator::next(UErrorCode &errorCode) { 100 if(U_FAILURE(errorCode)) { 101 return FALSE; 102 } 103 const UChar *pos=pos_; 104 if(pos==NULL) { 105 if(stack_->isEmpty()) { 106 return FALSE; 107 } 108 // Pop the state off the stack and continue with the next outbound edge of 109 // the branch node. 110 int32_t stackSize=stack_->size(); 111 int32_t length=stack_->elementAti(stackSize-1); 112 pos=uchars_+stack_->elementAti(stackSize-2); 113 stack_->setSize(stackSize-2); 114 str_.truncate(length&0xffff); 115 length=(int32_t)((uint32_t)length>>16); 116 if(length>1) { 117 pos=branchNext(pos, length, errorCode); 118 if(pos==NULL) { 119 return TRUE; // Reached a final value. 120 } 121 } else { 122 str_.append(*pos++); 123 } 124 } 125 if(remainingMatchLength_>=0) { 126 // We only get here if we started in a pending linear-match node 127 // with more than maxLength remaining units. 128 return truncateAndStop(); 129 } 130 for(;;) { 131 int32_t node=*pos++; 132 if(node>=kMinValueLead) { 133 if(skipValue_) { 134 pos=skipNodeValue(pos, node); 135 node&=kNodeTypeMask; 136 skipValue_=FALSE; 137 } else { 138 // Deliver value for the string so far. 139 UBool isFinal=(UBool)(node>>15); 140 if(isFinal) { 141 value_=readValue(pos, node&0x7fff); 142 } else { 143 value_=readNodeValue(pos, node); 144 } 145 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { 146 pos_=NULL; 147 } else { 148 // We cannot skip the value right here because it shares its 149 // lead unit with a match node which we have to evaluate 150 // next time. 151 // Instead, keep pos_ on the node lead unit itself. 152 pos_=pos-1; 153 skipValue_=TRUE; 154 } 155 return TRUE; 156 } 157 } 158 if(maxLength_>0 && str_.length()==maxLength_) { 159 return truncateAndStop(); 160 } 161 if(node<kMinLinearMatch) { 162 if(node==0) { 163 node=*pos++; 164 } 165 pos=branchNext(pos, node+1, errorCode); 166 if(pos==NULL) { 167 return TRUE; // Reached a final value. 168 } 169 } else { 170 // Linear-match node, append length units to str_. 171 int32_t length=node-kMinLinearMatch+1; 172 if(maxLength_>0 && str_.length()+length>maxLength_) { 173 str_.append(pos, maxLength_-str_.length()); 174 return truncateAndStop(); 175 } 176 str_.append(pos, length); 177 pos+=length; 178 } 179 } 180 } 181 182 // Branch node, needs to take the first outbound edge and push state for the rest. 183 const UChar * 184 UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { 185 while(length>kMaxBranchLinearSubNodeLength) { 186 ++pos; // ignore the comparison unit 187 // Push state for the greater-or-equal edge. 188 stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode); 189 stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode); 190 // Follow the less-than edge. 191 length>>=1; 192 pos=jumpByDelta(pos); 193 } 194 // List of key-value pairs where values are either final values or jump deltas. 195 // Read the first (key, value) pair. 196 UChar trieUnit=*pos++; 197 int32_t node=*pos++; 198 UBool isFinal=(UBool)(node>>15); 199 int32_t value=readValue(pos, node&=0x7fff); 200 pos=skipValue(pos, node); 201 stack_->addElement((int32_t)(pos-uchars_), errorCode); 202 stack_->addElement(((length-1)<<16)|str_.length(), errorCode); 203 str_.append(trieUnit); 204 if(isFinal) { 205 pos_=NULL; 206 value_=value; 207 return NULL; 208 } else { 209 return pos+value; 210 } 211 } 212 213 U_NAMESPACE_END 214