Home | History | Annotate | Download | only in v402
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
     19  * Do not edit this file other than updating policy's interface.
     20  *
     21  * This file was generated from
     22  *   suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp
     23  */
     24 
     25 #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
     26 
     27 #include <vector>
     28 
     29 #include "suggest/core/dicnode/dic_node.h"
     30 #include "suggest/core/dicnode/dic_node_vector.h"
     31 #include "suggest/core/dictionary/ngram_listener.h"
     32 #include "suggest/core/dictionary/property/bigram_property.h"
     33 #include "suggest/core/dictionary/property/unigram_property.h"
     34 #include "suggest/core/dictionary/property/word_property.h"
     35 #include "suggest/core/session/prev_words_info.h"
     36 #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
     37 #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
     38 #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
     39 #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
     40 
     41 namespace latinime {
     42 namespace backward {
     43 namespace v402 {
     44 
     45 // Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
     46 // BinaryDictionaryDecayingTests.
     47 const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
     48 const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
     49 const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
     50 const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
     51 const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
     52 const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
     53         Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
     54 
     55 void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
     56         DicNodeVector *const childDicNodes) const {
     57     if (!dicNode->hasChildren()) {
     58         return;
     59     }
     60     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     61     readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
     62     while (!readingHelper.isEnd()) {
     63         const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
     64         if (!ptNodeParams.isValid()) {
     65             break;
     66         }
     67         bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
     68         if (isTerminal && mHeaderPolicy->isDecayingDict()) {
     69             // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
     70             // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
     71             // valid terminal DicNode.
     72             isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
     73         }
     74         readingHelper.readNextSiblingNode(ptNodeParams);
     75         if (ptNodeParams.representsNonWordInfo()) {
     76             // Skip PtNodes that represent non-word information.
     77             continue;
     78         }
     79         childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
     80                 ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
     81                 ptNodeParams.hasChildren(),
     82                 ptNodeParams.isBlacklisted()
     83                         || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
     84                 ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
     85     }
     86     if (readingHelper.isError()) {
     87         mIsCorrupted = true;
     88         AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
     89     }
     90 }
     91 
     92 int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
     93         const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
     94         int *const outUnigramProbability) const {
     95     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     96     readingHelper.initWithPtNodePos(ptNodePos);
     97     const int codePointCount =  readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
     98             maxCodePointCount, outCodePoints, outUnigramProbability);
     99     if (readingHelper.isError()) {
    100         mIsCorrupted = true;
    101         AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
    102     }
    103     return codePointCount;
    104 }
    105 
    106 int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
    107         const int length, const bool forceLowerCaseSearch) const {
    108     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    109     readingHelper.initWithPtNodeArrayPos(getRootPosition());
    110     const int ptNodePos =
    111             readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
    112     if (readingHelper.isError()) {
    113         mIsCorrupted = true;
    114         AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
    115     }
    116     return ptNodePos;
    117 }
    118 
    119 int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
    120         const int bigramProbability) const {
    121     if (mHeaderPolicy->isDecayingDict()) {
    122         // Both probabilities are encoded. Decode them and get probability.
    123         return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
    124     } else {
    125         if (unigramProbability == NOT_A_PROBABILITY) {
    126             return NOT_A_PROBABILITY;
    127         } else if (bigramProbability == NOT_A_PROBABILITY) {
    128             return ProbabilityUtils::backoff(unigramProbability);
    129         } else {
    130             return bigramProbability;
    131         }
    132     }
    133 }
    134 
    135 int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
    136         const int ptNodePos) const {
    137     if (ptNodePos == NOT_A_DICT_POS) {
    138         return NOT_A_PROBABILITY;
    139     }
    140     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    141     if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
    142         return NOT_A_PROBABILITY;
    143     }
    144     if (prevWordsPtNodePos) {
    145         const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
    146         BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
    147         while (bigramsIt.hasNext()) {
    148             bigramsIt.next();
    149             if (bigramsIt.getBigramPos() == ptNodePos
    150                     && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
    151                 return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
    152             }
    153         }
    154         return NOT_A_PROBABILITY;
    155     }
    156     return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
    157 }
    158 
    159 void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
    160         NgramListener *const listener) const {
    161     if (!prevWordsPtNodePos) {
    162         return;
    163     }
    164     const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
    165     BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
    166     while (bigramsIt.hasNext()) {
    167         bigramsIt.next();
    168         listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
    169     }
    170 }
    171 
    172 int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
    173     if (ptNodePos == NOT_A_DICT_POS) {
    174         return NOT_A_DICT_POS;
    175     }
    176     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    177     if (ptNodeParams.isDeleted()) {
    178         return NOT_A_DICT_POS;
    179     }
    180     return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
    181             ptNodeParams.getTerminalId());
    182 }
    183 
    184 int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
    185     if (ptNodePos == NOT_A_DICT_POS) {
    186         return NOT_A_DICT_POS;
    187     }
    188     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    189     if (ptNodeParams.isDeleted()) {
    190         return NOT_A_DICT_POS;
    191     }
    192     return mBuffers->getBigramDictContent()->getBigramListHeadPos(
    193             ptNodeParams.getTerminalId());
    194 }
    195 
    196 bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
    197         const UnigramProperty *const unigramProperty) {
    198     if (!mBuffers->isUpdatable()) {
    199         AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
    200         return false;
    201     }
    202     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    203         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    204                 mDictBuffer->getTailPosition());
    205         return false;
    206     }
    207     if (length > MAX_WORD_LENGTH) {
    208         AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
    209         return false;
    210     }
    211     for (const auto &shortcut : unigramProperty->getShortcuts()) {
    212         if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
    213             AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
    214                     shortcut.getTargetCodePoints()->size());
    215             return false;
    216         }
    217     }
    218     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    219     readingHelper.initWithPtNodeArrayPos(getRootPosition());
    220     bool addedNewUnigram = false;
    221     int codePointsToAdd[MAX_WORD_LENGTH];
    222     int codePointCountToAdd = length;
    223     memmove(codePointsToAdd, word, sizeof(int) * length);
    224     if (unigramProperty->representsBeginningOfSentence()) {
    225         codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
    226                 codePointCountToAdd, MAX_WORD_LENGTH);
    227     }
    228     if (codePointCountToAdd <= 0) {
    229         return false;
    230     }
    231     if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
    232             unigramProperty, &addedNewUnigram)) {
    233         if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
    234             mUnigramCount++;
    235         }
    236         if (unigramProperty->getShortcuts().size() > 0) {
    237             // Add shortcut target.
    238             const int wordPos = getTerminalPtNodePositionOfWord(word, length,
    239                     false /* forceLowerCaseSearch */);
    240             if (wordPos == NOT_A_DICT_POS) {
    241                 AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
    242                 return false;
    243             }
    244             for (const auto &shortcut : unigramProperty->getShortcuts()) {
    245                 if (!mUpdatingHelper.addShortcutTarget(wordPos,
    246                         shortcut.getTargetCodePoints()->data(),
    247                         shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
    248                     AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
    249                             "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
    250                             shortcut.getProbability());
    251                     return false;
    252                 }
    253             }
    254         }
    255         return true;
    256     } else {
    257         return false;
    258     }
    259 }
    260 
    261 bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
    262         const BigramProperty *const bigramProperty) {
    263     if (!mBuffers->isUpdatable()) {
    264         AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
    265         return false;
    266     }
    267     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    268         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    269                 mDictBuffer->getTailPosition());
    270         return false;
    271     }
    272     if (!prevWordsInfo->isValid()) {
    273         AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
    274         return false;
    275     }
    276     if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
    277         AKLOGE("The word is too long to insert the ngram to the dictionary. "
    278                 "length: %d", bigramProperty->getTargetCodePoints()->size());
    279         return false;
    280     }
    281     int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    282     prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
    283             false /* tryLowerCaseSearch */);
    284     // TODO: Support N-gram.
    285     if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
    286         if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
    287             const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    288             const UnigramProperty beginningOfSentenceUnigramProperty(
    289                     true /* representsBeginningOfSentence */, true /* isNotAWord */,
    290                     false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
    291                     NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
    292             if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
    293                     prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
    294                     &beginningOfSentenceUnigramProperty)) {
    295                 AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
    296                 return false;
    297             }
    298             // Refresh Terminal PtNode positions.
    299             prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
    300                     false /* tryLowerCaseSearch */);
    301         } else {
    302             return false;
    303         }
    304     }
    305     const int word1Pos = getTerminalPtNodePositionOfWord(
    306             bigramProperty->getTargetCodePoints()->data(),
    307             bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
    308     if (word1Pos == NOT_A_DICT_POS) {
    309         return false;
    310     }
    311     bool addedNewBigram = false;
    312     if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::fromObject(prevWordsPtNodePos),
    313             word1Pos, bigramProperty, &addedNewBigram)) {
    314         if (addedNewBigram) {
    315             mBigramCount++;
    316         }
    317         return true;
    318     } else {
    319         return false;
    320     }
    321 }
    322 
    323 bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
    324         const int *const word, const int length) {
    325     if (!mBuffers->isUpdatable()) {
    326         AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
    327         return false;
    328     }
    329     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    330         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    331                 mDictBuffer->getTailPosition());
    332         return false;
    333     }
    334     if (!prevWordsInfo->isValid()) {
    335         AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
    336         return false;
    337     }
    338     if (length > MAX_WORD_LENGTH) {
    339         AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
    340     }
    341     int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    342     prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
    343             false /* tryLowerCaseSerch */);
    344     // TODO: Support N-gram.
    345     if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
    346         return false;
    347     }
    348     const int wordPos = getTerminalPtNodePositionOfWord(word, length,
    349             false /* forceLowerCaseSearch */);
    350     if (wordPos == NOT_A_DICT_POS) {
    351         return false;
    352     }
    353     if (mUpdatingHelper.removeNgramEntry(
    354             PtNodePosArrayView::fromObject(prevWordsPtNodePos), wordPos)) {
    355         mBigramCount--;
    356         return true;
    357     } else {
    358         return false;
    359     }
    360 }
    361 
    362 bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
    363     if (!mBuffers->isUpdatable()) {
    364         AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
    365         return false;
    366     }
    367     if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
    368         AKLOGE("Cannot flush the dictionary to file.");
    369         mIsCorrupted = true;
    370         return false;
    371     }
    372     return true;
    373 }
    374 
    375 bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
    376     if (!mBuffers->isUpdatable()) {
    377         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
    378         return false;
    379     }
    380     if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
    381         AKLOGE("Cannot flush the dictionary to file with GC.");
    382         mIsCorrupted = true;
    383         return false;
    384     }
    385     return true;
    386 }
    387 
    388 bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
    389     if (!mBuffers->isUpdatable()) {
    390         AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
    391         return false;
    392     }
    393     if (mBuffers->isNearSizeLimit()) {
    394         // Additional buffer size is near the limit.
    395         return true;
    396     } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
    397             > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
    398         // Total extended region size of the trie exceeds the limit.
    399         return true;
    400     } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
    401             && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
    402         // Needs to reduce dictionary size.
    403         return true;
    404     } else if (mHeaderPolicy->isDecayingDict()) {
    405         return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
    406                 mHeaderPolicy);
    407     }
    408     return false;
    409 }
    410 
    411 void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
    412         char *const outResult, const int maxResultLength) {
    413     const int compareLength = queryLength + 1 /* terminator */;
    414     if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
    415         snprintf(outResult, maxResultLength, "%d", mUnigramCount);
    416     } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
    417         snprintf(outResult, maxResultLength, "%d", mBigramCount);
    418     } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
    419         snprintf(outResult, maxResultLength, "%d",
    420                 mHeaderPolicy->isDecayingDict() ?
    421                         ForgettingCurveUtils::getUnigramCountHardLimit(
    422                                 mHeaderPolicy->getMaxUnigramCount()) :
    423                         static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
    424     } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
    425         snprintf(outResult, maxResultLength, "%d",
    426                 mHeaderPolicy->isDecayingDict() ?
    427                         ForgettingCurveUtils::getBigramCountHardLimit(
    428                                 mHeaderPolicy->getMaxBigramCount()) :
    429                         static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
    430     }
    431 }
    432 
    433 const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
    434         const int codePointCount) const {
    435     const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
    436             false /* forceLowerCaseSearch */);
    437     if (ptNodePos == NOT_A_DICT_POS) {
    438         AKLOGE("getWordProperty is called for invalid word.");
    439         return WordProperty();
    440     }
    441     const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    442     std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
    443             ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
    444     const ProbabilityEntry probabilityEntry =
    445             mBuffers->getProbabilityDictContent()->getProbabilityEntry(
    446                     ptNodeParams.getTerminalId());
    447     const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
    448     // Fetch bigram information.
    449     std::vector<BigramProperty> bigrams;
    450     const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
    451     if (bigramListPos != NOT_A_DICT_POS) {
    452         int bigramWord1CodePoints[MAX_WORD_LENGTH];
    453         const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
    454         const TerminalPositionLookupTable *const terminalPositionLookupTable =
    455                 mBuffers->getTerminalPositionLookupTable();
    456         bool hasNext = true;
    457         int readingPos = bigramListPos;
    458         while (hasNext) {
    459             const BigramEntry bigramEntry =
    460                     bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    461             hasNext = bigramEntry.hasNext();
    462             const int word1TerminalId = bigramEntry.getTargetTerminalId();
    463             const int word1TerminalPtNodePos =
    464                     terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
    465             if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
    466                 continue;
    467             }
    468             // Word (unigram) probability
    469             int word1Probability = NOT_A_PROBABILITY;
    470             const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
    471                     word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
    472                     &word1Probability);
    473             const std::vector<int> word1(bigramWord1CodePoints,
    474                     bigramWord1CodePoints + codePointCount);
    475             const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
    476             const int probability = bigramEntry.hasHistoricalInfo() ?
    477                     ForgettingCurveUtils::decodeProbability(
    478                             bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
    479                     bigramEntry.getProbability();
    480             bigrams.emplace_back(&word1, probability,
    481                     historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
    482                     historicalInfo->getCount());
    483         }
    484     }
    485     // Fetch shortcut information.
    486     std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    487     int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
    488     if (shortcutPos != NOT_A_DICT_POS) {
    489         int shortcutTarget[MAX_WORD_LENGTH];
    490         const ShortcutDictContent *const shortcutDictContent =
    491                 mBuffers->getShortcutDictContent();
    492         bool hasNext = true;
    493         while (hasNext) {
    494             int shortcutTargetLength = 0;
    495             int shortcutProbability = NOT_A_PROBABILITY;
    496             shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
    497                     &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
    498             const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
    499             shortcuts.emplace_back(&target, shortcutProbability);
    500         }
    501     }
    502     const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
    503             ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
    504             historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
    505             historicalInfo->getCount(), &shortcuts);
    506     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
    507 }
    508 
    509 int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
    510         int *const outCodePointCount) {
    511     *outCodePointCount = 0;
    512     if (token == 0) {
    513         mTerminalPtNodePositionsForIteratingWords.clear();
    514         DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
    515                 &mTerminalPtNodePositionsForIteratingWords);
    516         DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    517         readingHelper.initWithPtNodeArrayPos(getRootPosition());
    518         readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
    519     }
    520     const int terminalPtNodePositionsVectorSize =
    521             static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
    522     if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
    523         AKLOGE("Given token %d is invalid.", token);
    524         return 0;
    525     }
    526     const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
    527     int unigramProbability = NOT_A_PROBABILITY;
    528     *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
    529             terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
    530     const int nextToken = token + 1;
    531     if (nextToken >= terminalPtNodePositionsVectorSize) {
    532         // All words have been iterated.
    533         mTerminalPtNodePositionsForIteratingWords.clear();
    534         return 0;
    535     }
    536     return nextToken;
    537 }
    538 
    539 } // namespace v402
    540 } // namespace backward
    541 } // namespace latinime
    542