1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2012, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: bytestrieiterator.cpp 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010nov03 12 * created by: Markus W. Scherer 13 */ 14 15 #include "unicode/utypes.h" 16 #include "unicode/bytestrie.h" 17 #include "unicode/stringpiece.h" 18 #include "charstr.h" 19 #include "uvectr32.h" 20 21 U_NAMESPACE_BEGIN 22 23 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, 24 UErrorCode &errorCode) 25 : bytes_(static_cast<const uint8_t *>(trieBytes)), 26 pos_(bytes_), initialPos_(bytes_), 27 remainingMatchLength_(-1), initialRemainingMatchLength_(-1), 28 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { 29 if(U_FAILURE(errorCode)) { 30 return; 31 } 32 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into 33 // a public API header for which we would want it to depend only on 34 // other public headers. 35 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway 36 // via the CharString and UVector32 implementations, so this additional 37 // cost is minimal. 38 str_=new CharString(); 39 stack_=new UVector32(errorCode); 40 if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { 41 errorCode=U_MEMORY_ALLOCATION_ERROR; 42 } 43 } 44 45 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, 46 UErrorCode &errorCode) 47 : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), 48 remainingMatchLength_(trie.remainingMatchLength_), 49 initialRemainingMatchLength_(trie.remainingMatchLength_), 50 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { 51 if(U_FAILURE(errorCode)) { 52 return; 53 } 54 str_=new CharString(); 55 stack_=new UVector32(errorCode); 56 if(U_FAILURE(errorCode)) { 57 return; 58 } 59 if(str_==NULL || stack_==NULL) { 60 errorCode=U_MEMORY_ALLOCATION_ERROR; 61 return; 62 } 63 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 64 if(length>=0) { 65 // Pending linear-match node, append remaining bytes to str_. 66 ++length; 67 if(maxLength_>0 && length>maxLength_) { 68 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. 69 } 70 str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); 71 pos_+=length; 72 remainingMatchLength_-=length; 73 } 74 } 75 76 BytesTrie::Iterator::~Iterator() { 77 delete str_; 78 delete stack_; 79 } 80 81 BytesTrie::Iterator & 82 BytesTrie::Iterator::reset() { 83 pos_=initialPos_; 84 remainingMatchLength_=initialRemainingMatchLength_; 85 int32_t length=remainingMatchLength_+1; // Remaining match length. 86 if(maxLength_>0 && length>maxLength_) { 87 length=maxLength_; 88 } 89 str_->truncate(length); 90 pos_+=length; 91 remainingMatchLength_-=length; 92 stack_->setSize(0); 93 return *this; 94 } 95 96 UBool 97 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } 98 99 UBool 100 BytesTrie::Iterator::next(UErrorCode &errorCode) { 101 if(U_FAILURE(errorCode)) { 102 return FALSE; 103 } 104 const uint8_t *pos=pos_; 105 if(pos==NULL) { 106 if(stack_->isEmpty()) { 107 return FALSE; 108 } 109 // Pop the state off the stack and continue with the next outbound edge of 110 // the branch node. 111 int32_t stackSize=stack_->size(); 112 int32_t length=stack_->elementAti(stackSize-1); 113 pos=bytes_+stack_->elementAti(stackSize-2); 114 stack_->setSize(stackSize-2); 115 str_->truncate(length&0xffff); 116 length=(int32_t)((uint32_t)length>>16); 117 if(length>1) { 118 pos=branchNext(pos, length, errorCode); 119 if(pos==NULL) { 120 return TRUE; // Reached a final value. 121 } 122 } else { 123 str_->append((char)*pos++, errorCode); 124 } 125 } 126 if(remainingMatchLength_>=0) { 127 // We only get here if we started in a pending linear-match node 128 // with more than maxLength remaining bytes. 129 return truncateAndStop(); 130 } 131 for(;;) { 132 int32_t node=*pos++; 133 if(node>=kMinValueLead) { 134 // Deliver value for the byte sequence so far. 135 UBool isFinal=(UBool)(node&kValueIsFinal); 136 value_=readValue(pos, node>>1); 137 if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { 138 pos_=NULL; 139 } else { 140 pos_=skipValue(pos, node); 141 } 142 sp_.set(str_->data(), str_->length()); 143 return TRUE; 144 } 145 if(maxLength_>0 && str_->length()==maxLength_) { 146 return truncateAndStop(); 147 } 148 if(node<kMinLinearMatch) { 149 if(node==0) { 150 node=*pos++; 151 } 152 pos=branchNext(pos, node+1, errorCode); 153 if(pos==NULL) { 154 return TRUE; // Reached a final value. 155 } 156 } else { 157 // Linear-match node, append length bytes to str_. 158 int32_t length=node-kMinLinearMatch+1; 159 if(maxLength_>0 && str_->length()+length>maxLength_) { 160 str_->append(reinterpret_cast<const char *>(pos), 161 maxLength_-str_->length(), errorCode); 162 return truncateAndStop(); 163 } 164 str_->append(reinterpret_cast<const char *>(pos), length, errorCode); 165 pos+=length; 166 } 167 } 168 } 169 170 UBool 171 BytesTrie::Iterator::truncateAndStop() { 172 pos_=NULL; 173 sp_.set(str_->data(), str_->length()); 174 value_=-1; // no real value for str 175 return TRUE; 176 } 177 178 // Branch node, needs to take the first outbound edge and push state for the rest. 179 const uint8_t * 180 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { 181 while(length>kMaxBranchLinearSubNodeLength) { 182 ++pos; // ignore the comparison byte 183 // Push state for the greater-or-equal edge. 184 stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); 185 stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); 186 // Follow the less-than edge. 187 length>>=1; 188 pos=jumpByDelta(pos); 189 } 190 // List of key-value pairs where values are either final values or jump deltas. 191 // Read the first (key, value) pair. 192 uint8_t trieByte=*pos++; 193 int32_t node=*pos++; 194 UBool isFinal=(UBool)(node&kValueIsFinal); 195 int32_t value=readValue(pos, node>>1); 196 pos=skipValue(pos, node); 197 stack_->addElement((int32_t)(pos-bytes_), errorCode); 198 stack_->addElement(((length-1)<<16)|str_->length(), errorCode); 199 str_->append((char)trieByte, errorCode); 200 if(isFinal) { 201 pos_=NULL; 202 sp_.set(str_->data(), str_->length()); 203 value_=value; 204 return NULL; 205 } else { 206 return pos+value; 207 } 208 } 209 210 U_NAMESPACE_END 211