1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18 #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" 19 20 #include "defines.h" 21 #include "suggest/core/dicnode/dic_node.h" 22 #include "suggest/core/dicnode/dic_node_vector.h" 23 #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" 24 #include "suggest/core/dictionary/ngram_listener.h" 25 #include "suggest/core/session/prev_words_info.h" 26 #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" 27 #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" 28 #include "suggest/policyimpl/dictionary/utils/probability_utils.h" 29 #include "utils/char_utils.h" 30 31 namespace latinime { 32 33 void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode, 34 DicNodeVector *const childDicNodes) const { 35 if (!dicNode->hasChildren()) { 36 return; 37 } 38 int nextPos = dicNode->getChildrenPtNodeArrayPos(); 39 if (nextPos < 0 || nextPos >= mDictBufferSize) { 40 AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d", 41 nextPos, mDictBufferSize); 42 mIsCorrupted = true; 43 ASSERT(false); 44 return; 45 } 46 const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( 47 mDictRoot, &nextPos); 48 for (int i = 0; i < childCount; i++) { 49 if (nextPos < 0 || nextPos >= mDictBufferSize) { 50 AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d", 51 nextPos, mDictBufferSize, i, childCount); 52 mIsCorrupted = true; 53 ASSERT(false); 54 return; 55 } 56 nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes); 57 } 58 } 59 60 // This retrieves code points and the probability of the word by its terminal position. 61 // Due to the fact that words are ordered in the dictionary in a strict breadth-first order, 62 // it is possible to check for this with advantageous complexity. For each PtNode array, we search 63 // for PtNodes with children and compare the children position with the position we look for. 64 // When we shoot the position we look for, it means the word we look for is in the children 65 // of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a 66 // PtNode array with the last PtNode's children position still less than what we are searching for, 67 // we must descend the last PtNode's children (for example, if the word we are searching for starts 68 // with a z, it's the last PtNode of the root array, so all children addresses will be smaller 69 // than the position we look for, and we have to descend the z PtNode). 70 /* Parameters : 71 * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is 72 * what is stored as the "bigram position" in each bigram) 73 * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size. 74 * outUnigramProbability: a pointer to an int to write the probability into. 75 * Return value : the code point count, of 0 if the word was not found. 76 */ 77 // TODO: Split this function to be more readable 78 int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount( 79 const int ptNodePos, const int maxCodePointCount, int *const outCodePoints, 80 int *const outUnigramProbability) const { 81 int pos = getRootPosition(); 82 int wordPos = 0; 83 // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will 84 // only traverse PtNodes that are actually a part of the terminal we are searching, so each 85 // time we enter this loop we are one depth level further than last time. 86 // The only reason we count PtNodes is because we want to reduce the probability of infinite 87 // looping in case there is a bug. Since we know there is an upper bound to the depth we are 88 // supposed to traverse, it does not hurt to count iterations. 89 for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) { 90 int lastCandidatePtNodePos = 0; 91 // Let's loop through PtNodes in this PtNode array searching for either the terminal 92 // or one of its ascendants. 93 if (pos < 0 || pos >= mDictBufferSize) { 94 AKLOGE("PtNode array position is invalid. pos: %d, dict size: %d", 95 pos, mDictBufferSize); 96 mIsCorrupted = true; 97 ASSERT(false); 98 *outUnigramProbability = NOT_A_PROBABILITY; 99 return 0; 100 } 101 for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition( 102 mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) { 103 const int startPos = pos; 104 if (pos < 0 || pos >= mDictBufferSize) { 105 AKLOGE("PtNode position is invalid. pos: %d, dict size: %d", pos, mDictBufferSize); 106 mIsCorrupted = true; 107 ASSERT(false); 108 *outUnigramProbability = NOT_A_PROBABILITY; 109 return 0; 110 } 111 const PatriciaTrieReadingUtils::NodeFlags flags = 112 PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos); 113 const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 114 mDictRoot, &pos); 115 if (ptNodePos == startPos) { 116 // We found the position. Copy the rest of the code points in the buffer and return 117 // the length. 118 outCodePoints[wordPos] = character; 119 if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { 120 int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 121 mDictRoot, &pos); 122 // We count code points in order to avoid infinite loops if the file is broken 123 // or if there is some other bug 124 int charCount = maxCodePointCount; 125 while (NOT_A_CODE_POINT != nextChar && --charCount > 0) { 126 outCodePoints[++wordPos] = nextChar; 127 nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 128 mDictRoot, &pos); 129 } 130 } 131 *outUnigramProbability = 132 PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, 133 &pos); 134 return ++wordPos; 135 } 136 // We need to skip past this PtNode, so skip any remaining code points after the 137 // first and possibly the probability. 138 if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) { 139 PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos); 140 } 141 if (PatriciaTrieReadingUtils::isTerminal(flags)) { 142 PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos); 143 } 144 // The fact that this PtNode has children is very important. Since we already know 145 // that this PtNode does not match, if it has no children we know it is irrelevant 146 // to what we are searching for. 147 const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags); 148 // We will write in `found' whether we have passed the children position we are 149 // searching for. For example if we search for "beer", the children of b are less 150 // than the address we are searching for and the children of c are greater. When we 151 // come here for c, we realize this is too big, and that we should descend b. 152 bool found; 153 if (hasChildren) { 154 int currentPos = pos; 155 // Here comes the tricky part. First, read the children position. 156 const int childrenPos = PatriciaTrieReadingUtils 157 ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, ¤tPos); 158 if (childrenPos > ptNodePos) { 159 // If the children pos is greater than the position, it means the previous 160 // PtNode, which position is stored in lastCandidatePtNodePos, was the right 161 // one. 162 found = true; 163 } else if (1 >= ptNodeCount) { 164 // However if we are on the LAST PtNode of this array, and we have NOT shot the 165 // position we should descend THIS PtNode. So we trick the 166 // lastCandidatePtNodePos so that we will descend this PtNode, not the previous 167 // one. 168 lastCandidatePtNodePos = startPos; 169 found = true; 170 } else { 171 // Else, we should continue looking. 172 found = false; 173 } 174 } else { 175 // Even if we don't have children here, we could still be on the last PtNode of 176 // this array. If this is the case, we should descend the last PtNode that had 177 // children, and their position is already in lastCandidatePtNodePos. 178 found = (1 >= ptNodeCount); 179 } 180 181 if (found) { 182 // Okay, we found the PtNode we should descend. Its position is in 183 // the lastCandidatePtNodePos variable, so we just re-read it. 184 if (0 != lastCandidatePtNodePos) { 185 const PatriciaTrieReadingUtils::NodeFlags lastFlags = 186 PatriciaTrieReadingUtils::getFlagsAndAdvancePosition( 187 mDictRoot, &lastCandidatePtNodePos); 188 const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 189 mDictRoot, &lastCandidatePtNodePos); 190 // We copy all the characters in this PtNode to the buffer 191 outCodePoints[wordPos] = lastChar; 192 if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) { 193 int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 194 mDictRoot, &lastCandidatePtNodePos); 195 int charCount = maxCodePointCount; 196 while (-1 != nextChar && --charCount > 0) { 197 outCodePoints[++wordPos] = nextChar; 198 nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition( 199 mDictRoot, &lastCandidatePtNodePos); 200 } 201 } 202 ++wordPos; 203 // Now we only need to branch to the children address. Skip the probability if 204 // it's there, read pos, and break to resume the search at pos. 205 if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) { 206 PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, 207 &lastCandidatePtNodePos); 208 } 209 pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 210 mDictRoot, lastFlags, &lastCandidatePtNodePos); 211 break; 212 } else { 213 // Here is a little tricky part: we come here if we found out that all children 214 // addresses in this PtNode are bigger than the address we are searching for. 215 // Should we conclude the word is not in the dictionary? No! It could still be 216 // one of the remaining PtNodes in this array, so we have to keep looking in 217 // this array until we find it (or we realize it's not there either, in which 218 // case it's actually not in the dictionary). Pass the end of this PtNode, 219 // ready to start the next one. 220 if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 221 PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 222 mDictRoot, flags, &pos); 223 } 224 if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 225 mShortcutListPolicy.skipAllShortcuts(&pos); 226 } 227 if (PatriciaTrieReadingUtils::hasBigrams(flags)) { 228 if (!mBigramListPolicy.skipAllBigrams(&pos)) { 229 AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, 230 pos); 231 mIsCorrupted = true; 232 ASSERT(false); 233 *outUnigramProbability = NOT_A_PROBABILITY; 234 return 0; 235 } 236 } 237 } 238 } else { 239 // If we did not find it, we should record the last children address for the next 240 // iteration. 241 if (hasChildren) lastCandidatePtNodePos = startPos; 242 // Now skip the end of this PtNode (children pos and the attributes if any) so that 243 // our pos is after the end of this PtNode, at the start of the next one. 244 if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) { 245 PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition( 246 mDictRoot, flags, &pos); 247 } 248 if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) { 249 mShortcutListPolicy.skipAllShortcuts(&pos); 250 } 251 if (PatriciaTrieReadingUtils::hasBigrams(flags)) { 252 if (!mBigramListPolicy.skipAllBigrams(&pos)) { 253 AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos); 254 mIsCorrupted = true; 255 ASSERT(false); 256 *outUnigramProbability = NOT_A_PROBABILITY; 257 return 0; 258 } 259 } 260 } 261 262 } 263 } 264 // If we have looked through all the PtNodes and found no match, the ptNodePos is 265 // not the position of a terminal in this dictionary. 266 return 0; 267 } 268 269 // This function gets the position of the terminal PtNode of the exact matching word in the 270 // dictionary. If no match is found, it returns NOT_A_DICT_POS. 271 int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord, 272 const int length, const bool forceLowerCaseSearch) const { 273 DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); 274 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 275 const int ptNodePos = 276 readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch); 277 if (readingHelper.isError()) { 278 mIsCorrupted = true; 279 AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes()."); 280 } 281 return ptNodePos; 282 } 283 284 int PatriciaTriePolicy::getProbability(const int unigramProbability, 285 const int bigramProbability) const { 286 // Due to space constraints, the probability for bigrams is approximate - the lower the unigram 287 // probability, the worse the precision. The theoritical maximum error in resulting probability 288 // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means 289 // that sometimes, we'll see some bigrams interverted here, but it can't get too bad. 290 if (unigramProbability == NOT_A_PROBABILITY) { 291 return NOT_A_PROBABILITY; 292 } else if (bigramProbability == NOT_A_PROBABILITY) { 293 return ProbabilityUtils::backoff(unigramProbability); 294 } else { 295 return ProbabilityUtils::computeProbabilityForBigram(unigramProbability, 296 bigramProbability); 297 } 298 } 299 300 int PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos, 301 const int ptNodePos) const { 302 if (ptNodePos == NOT_A_DICT_POS) { 303 return NOT_A_PROBABILITY; 304 } 305 const PtNodeParams ptNodeParams = 306 mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); 307 if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) { 308 // If this is not a word, or if it's a blacklisted entry, it should behave as 309 // having no probability outside of the suggestion process (where it should be used 310 // for shortcuts). 311 return NOT_A_PROBABILITY; 312 } 313 if (prevWordsPtNodePos) { 314 const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); 315 BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition); 316 while (bigramsIt.hasNext()) { 317 bigramsIt.next(); 318 if (bigramsIt.getBigramPos() == ptNodePos 319 && bigramsIt.getProbability() != NOT_A_PROBABILITY) { 320 return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability()); 321 } 322 } 323 return NOT_A_PROBABILITY; 324 } 325 return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); 326 } 327 328 void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos, 329 NgramListener *const listener) const { 330 if (!prevWordsPtNodePos) { 331 return; 332 } 333 const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]); 334 BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition); 335 while (bigramsIt.hasNext()) { 336 bigramsIt.next(); 337 listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos()); 338 } 339 } 340 341 int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const { 342 if (ptNodePos == NOT_A_DICT_POS) { 343 return NOT_A_DICT_POS; 344 } 345 return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos(); 346 } 347 348 int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { 349 if (ptNodePos == NOT_A_DICT_POS) { 350 return NOT_A_DICT_POS; 351 } 352 return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getBigramsPos(); 353 } 354 355 int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode, 356 const int ptNodePos, DicNodeVector *childDicNodes) const { 357 PatriciaTrieReadingUtils::NodeFlags flags; 358 int mergedNodeCodePointCount = 0; 359 int mergedNodeCodePoints[MAX_WORD_LENGTH]; 360 int probability = NOT_A_PROBABILITY; 361 int childrenPos = NOT_A_DICT_POS; 362 int shortcutPos = NOT_A_DICT_POS; 363 int bigramPos = NOT_A_DICT_POS; 364 int siblingPos = NOT_A_DICT_POS; 365 PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), 366 &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, 367 &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); 368 // Skip PtNodes don't start with Unicode code point because they represent non-word information. 369 if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { 370 childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, 371 PatriciaTrieReadingUtils::isTerminal(flags), 372 PatriciaTrieReadingUtils::hasChildrenInFlags(flags), 373 PatriciaTrieReadingUtils::isBlacklisted(flags) 374 || PatriciaTrieReadingUtils::isNotAWord(flags), 375 mergedNodeCodePointCount, mergedNodeCodePoints); 376 } 377 return siblingPos; 378 } 379 380 const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints, 381 const int codePointCount) const { 382 const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount, 383 false /* forceLowerCaseSearch */); 384 if (ptNodePos == NOT_A_DICT_POS) { 385 AKLOGE("getWordProperty was called for invalid word."); 386 return WordProperty(); 387 } 388 const PtNodeParams ptNodeParams = 389 mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); 390 std::vector<int> codePointVector(ptNodeParams.getCodePoints(), 391 ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount()); 392 // Fetch bigram information. 393 std::vector<BigramProperty> bigrams; 394 const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); 395 int bigramWord1CodePoints[MAX_WORD_LENGTH]; 396 BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos); 397 while (bigramsIt.hasNext()) { 398 // Fetch the next bigram information and forward the iterator. 399 bigramsIt.next(); 400 // Skip the entry if the entry has been deleted. This never happens for ver2 dicts. 401 if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) { 402 int word1Probability = NOT_A_PROBABILITY; 403 const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount( 404 bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints, 405 &word1Probability); 406 const std::vector<int> word1(bigramWord1CodePoints, 407 bigramWord1CodePoints + word1CodePointCount); 408 const int probability = getProbability(word1Probability, bigramsIt.getProbability()); 409 bigrams.emplace_back(&word1, probability, 410 NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */); 411 } 412 } 413 // Fetch shortcut information. 414 std::vector<UnigramProperty::ShortcutProperty> shortcuts; 415 int shortcutPos = getShortcutPositionOfPtNode(ptNodePos); 416 if (shortcutPos != NOT_A_DICT_POS) { 417 int shortcutTargetCodePoints[MAX_WORD_LENGTH]; 418 ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos); 419 bool hasNext = true; 420 while (hasNext) { 421 const ShortcutListReadingUtils::ShortcutFlags shortcutFlags = 422 ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos); 423 hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags); 424 const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget( 425 mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos); 426 const std::vector<int> shortcutTarget(shortcutTargetCodePoints, 427 shortcutTargetCodePoints + shortcutTargetLength); 428 const int shortcutProbability = 429 ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags); 430 shortcuts.emplace_back(&shortcutTarget, shortcutProbability); 431 } 432 } 433 const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(), 434 ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(), 435 NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts); 436 return WordProperty(&codePointVector, &unigramProperty, &bigrams); 437 } 438 439 int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints, 440 int *const outCodePointCount) { 441 *outCodePointCount = 0; 442 if (token == 0) { 443 // Start iterating the dictionary. 444 mTerminalPtNodePositionsForIteratingWords.clear(); 445 DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy( 446 &mTerminalPtNodePositionsForIteratingWords); 447 DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader); 448 readingHelper.initWithPtNodeArrayPos(getRootPosition()); 449 readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy); 450 } 451 const int terminalPtNodePositionsVectorSize = 452 static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size()); 453 if (token < 0 || token >= terminalPtNodePositionsVectorSize) { 454 AKLOGE("Given token %d is invalid.", token); 455 return 0; 456 } 457 const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token]; 458 int unigramProbability = NOT_A_PROBABILITY; 459 *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos, 460 MAX_WORD_LENGTH, outCodePoints, &unigramProbability); 461 const int nextToken = token + 1; 462 if (nextToken >= terminalPtNodePositionsVectorSize) { 463 // All words have been iterated. 464 mTerminalPtNodePositionsForIteratingWords.clear(); 465 return 0; 466 } 467 return nextToken; 468 } 469 470 } // namespace latinime 471