Home | History | Annotate | Download | only in v2
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
     19 
     20 #include "defines.h"
     21 #include "suggest/core/dicnode/dic_node.h"
     22 #include "suggest/core/dicnode/dic_node_vector.h"
     23 #include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
     24 #include "suggest/core/dictionary/ngram_listener.h"
     25 #include "suggest/core/session/prev_words_info.h"
     26 #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
     27 #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
     28 #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
     29 #include "utils/char_utils.h"
     30 
     31 namespace latinime {
     32 
     33 void PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
     34         DicNodeVector *const childDicNodes) const {
     35     if (!dicNode->hasChildren()) {
     36         return;
     37     }
     38     int nextPos = dicNode->getChildrenPtNodeArrayPos();
     39     if (nextPos < 0 || nextPos >= mDictBufferSize) {
     40         AKLOGE("Children PtNode array position is invalid. pos: %d, dict size: %d",
     41                 nextPos, mDictBufferSize);
     42         mIsCorrupted = true;
     43         ASSERT(false);
     44         return;
     45     }
     46     const int childCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
     47             mDictRoot, &nextPos);
     48     for (int i = 0; i < childCount; i++) {
     49         if (nextPos < 0 || nextPos >= mDictBufferSize) {
     50             AKLOGE("Child PtNode position is invalid. pos: %d, dict size: %d, childCount: %d / %d",
     51                     nextPos, mDictBufferSize, i, childCount);
     52             mIsCorrupted = true;
     53             ASSERT(false);
     54             return;
     55         }
     56         nextPos = createAndGetLeavingChildNode(dicNode, nextPos, childDicNodes);
     57     }
     58 }
     59 
     60 // This retrieves code points and the probability of the word by its terminal position.
     61 // Due to the fact that words are ordered in the dictionary in a strict breadth-first order,
     62 // it is possible to check for this with advantageous complexity. For each PtNode array, we search
     63 // for PtNodes with children and compare the children position with the position we look for.
     64 // When we shoot the position we look for, it means the word we look for is in the children
     65 // of the previous PtNode. The only tricky part is the fact that if we arrive at the end of a
     66 // PtNode array with the last PtNode's children position still less than what we are searching for,
     67 // we must descend the last PtNode's children (for example, if the word we are searching for starts
     68 // with a z, it's the last PtNode of the root array, so all children addresses will be smaller
     69 // than the position we look for, and we have to descend the z PtNode).
     70 /* Parameters :
     71  * ptNodePos: the byte position of the terminal PtNode of the word we are searching for (this is
     72  *   what is stored as the "bigram position" in each bigram)
     73  * outCodePoints: an array to write the found word, with MAX_WORD_LENGTH size.
     74  * outUnigramProbability: a pointer to an int to write the probability into.
     75  * Return value : the code point count, of 0 if the word was not found.
     76  */
     77 // TODO: Split this function to be more readable
     78 int PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
     79         const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
     80         int *const outUnigramProbability) const {
     81     int pos = getRootPosition();
     82     int wordPos = 0;
     83     // One iteration of the outer loop iterates through PtNode arrays. As stated above, we will
     84     // only traverse PtNodes that are actually a part of the terminal we are searching, so each
     85     // time we enter this loop we are one depth level further than last time.
     86     // The only reason we count PtNodes is because we want to reduce the probability of infinite
     87     // looping in case there is a bug. Since we know there is an upper bound to the depth we are
     88     // supposed to traverse, it does not hurt to count iterations.
     89     for (int loopCount = maxCodePointCount; loopCount > 0; --loopCount) {
     90         int lastCandidatePtNodePos = 0;
     91         // Let's loop through PtNodes in this PtNode array searching for either the terminal
     92         // or one of its ascendants.
     93         if (pos < 0 || pos >= mDictBufferSize) {
     94             AKLOGE("PtNode array position is invalid. pos: %d, dict size: %d",
     95                     pos, mDictBufferSize);
     96             mIsCorrupted = true;
     97             ASSERT(false);
     98             *outUnigramProbability = NOT_A_PROBABILITY;
     99             return 0;
    100         }
    101         for (int ptNodeCount = PatriciaTrieReadingUtils::getPtNodeArraySizeAndAdvancePosition(
    102                 mDictRoot, &pos); ptNodeCount > 0; --ptNodeCount) {
    103             const int startPos = pos;
    104             if (pos < 0 || pos >= mDictBufferSize) {
    105                 AKLOGE("PtNode position is invalid. pos: %d, dict size: %d", pos, mDictBufferSize);
    106                 mIsCorrupted = true;
    107                 ASSERT(false);
    108                 *outUnigramProbability = NOT_A_PROBABILITY;
    109                 return 0;
    110             }
    111             const PatriciaTrieReadingUtils::NodeFlags flags =
    112                     PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(mDictRoot, &pos);
    113             const int character = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
    114                     mDictRoot, &pos);
    115             if (ptNodePos == startPos) {
    116                 // We found the position. Copy the rest of the code points in the buffer and return
    117                 // the length.
    118                 outCodePoints[wordPos] = character;
    119                 if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
    120                     int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
    121                             mDictRoot, &pos);
    122                     // We count code points in order to avoid infinite loops if the file is broken
    123                     // or if there is some other bug
    124                     int charCount = maxCodePointCount;
    125                     while (NOT_A_CODE_POINT != nextChar && --charCount > 0) {
    126                         outCodePoints[++wordPos] = nextChar;
    127                         nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
    128                                 mDictRoot, &pos);
    129                     }
    130                 }
    131                 *outUnigramProbability =
    132                         PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
    133                                 &pos);
    134                 return ++wordPos;
    135             }
    136             // We need to skip past this PtNode, so skip any remaining code points after the
    137             // first and possibly the probability.
    138             if (PatriciaTrieReadingUtils::hasMultipleChars(flags)) {
    139                 PatriciaTrieReadingUtils::skipCharacters(mDictRoot, flags, MAX_WORD_LENGTH, &pos);
    140             }
    141             if (PatriciaTrieReadingUtils::isTerminal(flags)) {
    142                 PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot, &pos);
    143             }
    144             // The fact that this PtNode has children is very important. Since we already know
    145             // that this PtNode does not match, if it has no children we know it is irrelevant
    146             // to what we are searching for.
    147             const bool hasChildren = PatriciaTrieReadingUtils::hasChildrenInFlags(flags);
    148             // We will write in `found' whether we have passed the children position we are
    149             // searching for. For example if we search for "beer", the children of b are less
    150             // than the address we are searching for and the children of c are greater. When we
    151             // come here for c, we realize this is too big, and that we should descend b.
    152             bool found;
    153             if (hasChildren) {
    154                 int currentPos = pos;
    155                 // Here comes the tricky part. First, read the children position.
    156                 const int childrenPos = PatriciaTrieReadingUtils
    157                         ::readChildrenPositionAndAdvancePosition(mDictRoot, flags, &currentPos);
    158                 if (childrenPos > ptNodePos) {
    159                     // If the children pos is greater than the position, it means the previous
    160                     // PtNode, which position is stored in lastCandidatePtNodePos, was the right
    161                     // one.
    162                     found = true;
    163                 } else if (1 >= ptNodeCount) {
    164                     // However if we are on the LAST PtNode of this array, and we have NOT shot the
    165                     // position we should descend THIS PtNode. So we trick the
    166                     // lastCandidatePtNodePos so that we will descend this PtNode, not the previous
    167                     // one.
    168                     lastCandidatePtNodePos = startPos;
    169                     found = true;
    170                 } else {
    171                     // Else, we should continue looking.
    172                     found = false;
    173                 }
    174             } else {
    175                 // Even if we don't have children here, we could still be on the last PtNode of
    176                 // this array. If this is the case, we should descend the last PtNode that had
    177                 // children, and their position is already in lastCandidatePtNodePos.
    178                 found = (1 >= ptNodeCount);
    179             }
    180 
    181             if (found) {
    182                 // Okay, we found the PtNode we should descend. Its position is in
    183                 // the lastCandidatePtNodePos variable, so we just re-read it.
    184                 if (0 != lastCandidatePtNodePos) {
    185                     const PatriciaTrieReadingUtils::NodeFlags lastFlags =
    186                             PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(
    187                                     mDictRoot, &lastCandidatePtNodePos);
    188                     const int lastChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
    189                             mDictRoot, &lastCandidatePtNodePos);
    190                     // We copy all the characters in this PtNode to the buffer
    191                     outCodePoints[wordPos] = lastChar;
    192                     if (PatriciaTrieReadingUtils::hasMultipleChars(lastFlags)) {
    193                         int nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
    194                                 mDictRoot, &lastCandidatePtNodePos);
    195                         int charCount = maxCodePointCount;
    196                         while (-1 != nextChar && --charCount > 0) {
    197                             outCodePoints[++wordPos] = nextChar;
    198                             nextChar = PatriciaTrieReadingUtils::getCodePointAndAdvancePosition(
    199                                     mDictRoot, &lastCandidatePtNodePos);
    200                         }
    201                     }
    202                     ++wordPos;
    203                     // Now we only need to branch to the children address. Skip the probability if
    204                     // it's there, read pos, and break to resume the search at pos.
    205                     if (PatriciaTrieReadingUtils::isTerminal(lastFlags)) {
    206                         PatriciaTrieReadingUtils::readProbabilityAndAdvancePosition(mDictRoot,
    207                                 &lastCandidatePtNodePos);
    208                     }
    209                     pos = PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
    210                             mDictRoot, lastFlags, &lastCandidatePtNodePos);
    211                     break;
    212                 } else {
    213                     // Here is a little tricky part: we come here if we found out that all children
    214                     // addresses in this PtNode are bigger than the address we are searching for.
    215                     // Should we conclude the word is not in the dictionary? No! It could still be
    216                     // one of the remaining PtNodes in this array, so we have to keep looking in
    217                     // this array until we find it (or we realize it's not there either, in which
    218                     // case it's actually not in the dictionary). Pass the end of this PtNode,
    219                     // ready to start the next one.
    220                     if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
    221                         PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
    222                                 mDictRoot, flags, &pos);
    223                     }
    224                     if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
    225                         mShortcutListPolicy.skipAllShortcuts(&pos);
    226                     }
    227                     if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
    228                         if (!mBigramListPolicy.skipAllBigrams(&pos)) {
    229                             AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize,
    230                                     pos);
    231                             mIsCorrupted = true;
    232                             ASSERT(false);
    233                             *outUnigramProbability = NOT_A_PROBABILITY;
    234                             return 0;
    235                         }
    236                     }
    237                 }
    238             } else {
    239                 // If we did not find it, we should record the last children address for the next
    240                 // iteration.
    241                 if (hasChildren) lastCandidatePtNodePos = startPos;
    242                 // Now skip the end of this PtNode (children pos and the attributes if any) so that
    243                 // our pos is after the end of this PtNode, at the start of the next one.
    244                 if (PatriciaTrieReadingUtils::hasChildrenInFlags(flags)) {
    245                     PatriciaTrieReadingUtils::readChildrenPositionAndAdvancePosition(
    246                             mDictRoot, flags, &pos);
    247                 }
    248                 if (PatriciaTrieReadingUtils::hasShortcutTargets(flags)) {
    249                     mShortcutListPolicy.skipAllShortcuts(&pos);
    250                 }
    251                 if (PatriciaTrieReadingUtils::hasBigrams(flags)) {
    252                     if (!mBigramListPolicy.skipAllBigrams(&pos)) {
    253                         AKLOGE("Cannot skip bigrams. BufSize: %d, pos: %d.", mDictBufferSize, pos);
    254                         mIsCorrupted = true;
    255                         ASSERT(false);
    256                         *outUnigramProbability = NOT_A_PROBABILITY;
    257                         return 0;
    258                     }
    259                 }
    260             }
    261 
    262         }
    263     }
    264     // If we have looked through all the PtNodes and found no match, the ptNodePos is
    265     // not the position of a terminal in this dictionary.
    266     return 0;
    267 }
    268 
    269 // This function gets the position of the terminal PtNode of the exact matching word in the
    270 // dictionary. If no match is found, it returns NOT_A_DICT_POS.
    271 int PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
    272         const int length, const bool forceLowerCaseSearch) const {
    273     DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
    274     readingHelper.initWithPtNodeArrayPos(getRootPosition());
    275     const int ptNodePos =
    276             readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
    277     if (readingHelper.isError()) {
    278         mIsCorrupted = true;
    279         AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
    280     }
    281     return ptNodePos;
    282 }
    283 
    284 int PatriciaTriePolicy::getProbability(const int unigramProbability,
    285         const int bigramProbability) const {
    286     // Due to space constraints, the probability for bigrams is approximate - the lower the unigram
    287     // probability, the worse the precision. The theoritical maximum error in resulting probability
    288     // is 8 - although in the practice it's never bigger than 3 or 4 in very bad cases. This means
    289     // that sometimes, we'll see some bigrams interverted here, but it can't get too bad.
    290     if (unigramProbability == NOT_A_PROBABILITY) {
    291         return NOT_A_PROBABILITY;
    292     } else if (bigramProbability == NOT_A_PROBABILITY) {
    293         return ProbabilityUtils::backoff(unigramProbability);
    294     } else {
    295         return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
    296                 bigramProbability);
    297     }
    298 }
    299 
    300 int PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
    301         const int ptNodePos) const {
    302     if (ptNodePos == NOT_A_DICT_POS) {
    303         return NOT_A_PROBABILITY;
    304     }
    305     const PtNodeParams ptNodeParams =
    306             mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    307     if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
    308         // If this is not a word, or if it's a blacklisted entry, it should behave as
    309         // having no probability outside of the suggestion process (where it should be used
    310         // for shortcuts).
    311         return NOT_A_PROBABILITY;
    312     }
    313     if (prevWordsPtNodePos) {
    314         const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
    315         BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
    316         while (bigramsIt.hasNext()) {
    317             bigramsIt.next();
    318             if (bigramsIt.getBigramPos() == ptNodePos
    319                     && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
    320                 return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
    321             }
    322         }
    323         return NOT_A_PROBABILITY;
    324     }
    325     return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
    326 }
    327 
    328 void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
    329         NgramListener *const listener) const {
    330     if (!prevWordsPtNodePos) {
    331         return;
    332     }
    333     const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
    334     BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition);
    335     while (bigramsIt.hasNext()) {
    336         bigramsIt.next();
    337         listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
    338     }
    339 }
    340 
    341 int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
    342     if (ptNodePos == NOT_A_DICT_POS) {
    343         return NOT_A_DICT_POS;
    344     }
    345     return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos();
    346 }
    347 
    348 int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
    349     if (ptNodePos == NOT_A_DICT_POS) {
    350         return NOT_A_DICT_POS;
    351     }
    352     return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getBigramsPos();
    353 }
    354 
    355 int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNode,
    356         const int ptNodePos, DicNodeVector *childDicNodes) const {
    357     PatriciaTrieReadingUtils::NodeFlags flags;
    358     int mergedNodeCodePointCount = 0;
    359     int mergedNodeCodePoints[MAX_WORD_LENGTH];
    360     int probability = NOT_A_PROBABILITY;
    361     int childrenPos = NOT_A_DICT_POS;
    362     int shortcutPos = NOT_A_DICT_POS;
    363     int bigramPos = NOT_A_DICT_POS;
    364     int siblingPos = NOT_A_DICT_POS;
    365     PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
    366             &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
    367             &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
    368     // Skip PtNodes don't start with Unicode code point because they represent non-word information.
    369     if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
    370         childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
    371                 PatriciaTrieReadingUtils::isTerminal(flags),
    372                 PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
    373                 PatriciaTrieReadingUtils::isBlacklisted(flags)
    374                         || PatriciaTrieReadingUtils::isNotAWord(flags),
    375                 mergedNodeCodePointCount, mergedNodeCodePoints);
    376     }
    377     return siblingPos;
    378 }
    379 
    380 const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoints,
    381         const int codePointCount) const {
    382     const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
    383             false /* forceLowerCaseSearch */);
    384     if (ptNodePos == NOT_A_DICT_POS) {
    385         AKLOGE("getWordProperty was called for invalid word.");
    386         return WordProperty();
    387     }
    388     const PtNodeParams ptNodeParams =
    389             mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    390     std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
    391             ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
    392     // Fetch bigram information.
    393     std::vector<BigramProperty> bigrams;
    394     const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
    395     int bigramWord1CodePoints[MAX_WORD_LENGTH];
    396     BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos);
    397     while (bigramsIt.hasNext()) {
    398         // Fetch the next bigram information and forward the iterator.
    399         bigramsIt.next();
    400         // Skip the entry if the entry has been deleted. This never happens for ver2 dicts.
    401         if (bigramsIt.getBigramPos() != NOT_A_DICT_POS) {
    402             int word1Probability = NOT_A_PROBABILITY;
    403             const int word1CodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
    404                     bigramsIt.getBigramPos(), MAX_WORD_LENGTH, bigramWord1CodePoints,
    405                     &word1Probability);
    406             const std::vector<int> word1(bigramWord1CodePoints,
    407                     bigramWord1CodePoints + word1CodePointCount);
    408             const int probability = getProbability(word1Probability, bigramsIt.getProbability());
    409             bigrams.emplace_back(&word1, probability,
    410                     NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */);
    411         }
    412     }
    413     // Fetch shortcut information.
    414     std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    415     int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
    416     if (shortcutPos != NOT_A_DICT_POS) {
    417         int shortcutTargetCodePoints[MAX_WORD_LENGTH];
    418         ShortcutListReadingUtils::getShortcutListSizeAndForwardPointer(mDictRoot, &shortcutPos);
    419         bool hasNext = true;
    420         while (hasNext) {
    421             const ShortcutListReadingUtils::ShortcutFlags shortcutFlags =
    422                     ShortcutListReadingUtils::getFlagsAndForwardPointer(mDictRoot, &shortcutPos);
    423             hasNext = ShortcutListReadingUtils::hasNext(shortcutFlags);
    424             const int shortcutTargetLength = ShortcutListReadingUtils::readShortcutTarget(
    425                     mDictRoot, MAX_WORD_LENGTH, shortcutTargetCodePoints, &shortcutPos);
    426             const std::vector<int> shortcutTarget(shortcutTargetCodePoints,
    427                     shortcutTargetCodePoints + shortcutTargetLength);
    428             const int shortcutProbability =
    429                     ShortcutListReadingUtils::getProbabilityFromFlags(shortcutFlags);
    430             shortcuts.emplace_back(&shortcutTarget, shortcutProbability);
    431         }
    432     }
    433     const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
    434             ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
    435             NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
    436     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
    437 }
    438 
    439 int PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
    440         int *const outCodePointCount) {
    441     *outCodePointCount = 0;
    442     if (token == 0) {
    443         // Start iterating the dictionary.
    444         mTerminalPtNodePositionsForIteratingWords.clear();
    445         DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
    446                 &mTerminalPtNodePositionsForIteratingWords);
    447         DynamicPtReadingHelper readingHelper(&mPtNodeReader, &mPtNodeArrayReader);
    448         readingHelper.initWithPtNodeArrayPos(getRootPosition());
    449         readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
    450     }
    451     const int terminalPtNodePositionsVectorSize =
    452             static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
    453     if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
    454         AKLOGE("Given token %d is invalid.", token);
    455         return 0;
    456     }
    457     const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
    458     int unigramProbability = NOT_A_PROBABILITY;
    459     *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(terminalPtNodePos,
    460             MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
    461     const int nextToken = token + 1;
    462     if (nextToken >= terminalPtNodePositionsVectorSize) {
    463         // All words have been iterated.
    464         mTerminalPtNodePositionsForIteratingWords.clear();
    465         return 0;
    466     }
    467     return nextToken;
    468 }
    469 
    470 } // namespace latinime
    471