Home | History | Annotate | Download | only in v4
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
     18 
     19 #include <vector>
     20 
     21 #include "suggest/core/dicnode/dic_node.h"
     22 #include "suggest/core/dicnode/dic_node_vector.h"
     23 #include "suggest/core/dictionary/ngram_listener.h"
     24 #include "suggest/core/dictionary/property/bigram_property.h"
     25 #include "suggest/core/dictionary/property/unigram_property.h"
     26 #include "suggest/core/dictionary/property/word_property.h"
     27 #include "suggest/core/session/prev_words_info.h"
     28 #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
     29 #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
     30 #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
     31 #include "suggest/policyimpl/dictionary/utils/probability_utils.h"
     32 
     33 namespace latinime {
     34 
     35 // Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
     36 // BinaryDictionaryDecayingTests.
     37 const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
     38 const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
     39 const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
     40 const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
     41 const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
     42 const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
     43         Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
     44 
     45 void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
     46         DicNodeVector *const childDicNodes) const {
     47     if (!dicNode->hasChildren()) {
     48         return;
     49     }
     50     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     51     readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
     52     while (!readingHelper.isEnd()) {
     53         const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
     54         if (!ptNodeParams.isValid()) {
     55             break;
     56         }
     57         bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
     58         if (isTerminal && mHeaderPolicy->isDecayingDict()) {
     59             // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
     60             // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
     61             // valid terminal DicNode.
     62             isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
     63         }
     64         readingHelper.readNextSiblingNode(ptNodeParams);
     65         if (ptNodeParams.representsNonWordInfo()) {
     66             // Skip PtNodes that represent non-word information.
     67             continue;
     68         }
     69         childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
     70                 ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
     71                 ptNodeParams.hasChildren(),
     72                 ptNodeParams.isBlacklisted()
     73                         || ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,
     74                 ptNodeParams.getCodePointCount(), ptNodeParams.getCodePoints());
     75     }
     76     if (readingHelper.isError()) {
     77         mIsCorrupted = true;
     78         AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
     79     }
     80 }
     81 
     82 int Ver4PatriciaTriePolicy::getCodePointsAndProbabilityAndReturnCodePointCount(
     83         const int ptNodePos, const int maxCodePointCount, int *const outCodePoints,
     84         int *const outUnigramProbability) const {
     85     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     86     readingHelper.initWithPtNodePos(ptNodePos);
     87     const int codePointCount =  readingHelper.getCodePointsAndProbabilityAndReturnCodePointCount(
     88             maxCodePointCount, outCodePoints, outUnigramProbability);
     89     if (readingHelper.isError()) {
     90         mIsCorrupted = true;
     91         AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
     92     }
     93     return codePointCount;
     94 }
     95 
     96 int Ver4PatriciaTriePolicy::getTerminalPtNodePositionOfWord(const int *const inWord,
     97         const int length, const bool forceLowerCaseSearch) const {
     98     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     99     readingHelper.initWithPtNodeArrayPos(getRootPosition());
    100     const int ptNodePos =
    101             readingHelper.getTerminalPtNodePositionOfWord(inWord, length, forceLowerCaseSearch);
    102     if (readingHelper.isError()) {
    103         mIsCorrupted = true;
    104         AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
    105     }
    106     return ptNodePos;
    107 }
    108 
    109 int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
    110         const int bigramProbability) const {
    111     if (mHeaderPolicy->isDecayingDict()) {
    112         // Both probabilities are encoded. Decode them and get probability.
    113         return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability);
    114     } else {
    115         if (unigramProbability == NOT_A_PROBABILITY) {
    116             return NOT_A_PROBABILITY;
    117         } else if (bigramProbability == NOT_A_PROBABILITY) {
    118             return ProbabilityUtils::backoff(unigramProbability);
    119         } else {
    120             return bigramProbability;
    121         }
    122     }
    123 }
    124 
    125 int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
    126         const int ptNodePos) const {
    127     if (ptNodePos == NOT_A_DICT_POS) {
    128         return NOT_A_PROBABILITY;
    129     }
    130     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    131     if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
    132         return NOT_A_PROBABILITY;
    133     }
    134     if (prevWordsPtNodePos) {
    135         const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
    136         BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
    137         while (bigramsIt.hasNext()) {
    138             bigramsIt.next();
    139             if (bigramsIt.getBigramPos() == ptNodePos
    140                     && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
    141                 return getProbability(ptNodeParams.getProbability(), bigramsIt.getProbability());
    142             }
    143         }
    144         return NOT_A_PROBABILITY;
    145     }
    146     return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
    147 }
    148 
    149 void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
    150         NgramListener *const listener) const {
    151     if (!prevWordsPtNodePos) {
    152         return;
    153     }
    154     const int bigramsPosition = getBigramsPositionOfPtNode(prevWordsPtNodePos[0]);
    155     BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
    156     while (bigramsIt.hasNext()) {
    157         bigramsIt.next();
    158         listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
    159     }
    160 }
    161 
    162 int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
    163     if (ptNodePos == NOT_A_DICT_POS) {
    164         return NOT_A_DICT_POS;
    165     }
    166     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    167     if (ptNodeParams.isDeleted()) {
    168         return NOT_A_DICT_POS;
    169     }
    170     return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
    171             ptNodeParams.getTerminalId());
    172 }
    173 
    174 int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
    175     if (ptNodePos == NOT_A_DICT_POS) {
    176         return NOT_A_DICT_POS;
    177     }
    178     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    179     if (ptNodeParams.isDeleted()) {
    180         return NOT_A_DICT_POS;
    181     }
    182     return mBuffers->getBigramDictContent()->getBigramListHeadPos(
    183             ptNodeParams.getTerminalId());
    184 }
    185 
    186 bool Ver4PatriciaTriePolicy::addUnigramEntry(const int *const word, const int length,
    187         const UnigramProperty *const unigramProperty) {
    188     if (!mBuffers->isUpdatable()) {
    189         AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
    190         return false;
    191     }
    192     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    193         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    194                 mDictBuffer->getTailPosition());
    195         return false;
    196     }
    197     if (length > MAX_WORD_LENGTH) {
    198         AKLOGE("The word is too long to insert to the dictionary, length: %d", length);
    199         return false;
    200     }
    201     for (const auto &shortcut : unigramProperty->getShortcuts()) {
    202         if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
    203             AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %d",
    204                     shortcut.getTargetCodePoints()->size());
    205             return false;
    206         }
    207     }
    208     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    209     readingHelper.initWithPtNodeArrayPos(getRootPosition());
    210     bool addedNewUnigram = false;
    211     int codePointsToAdd[MAX_WORD_LENGTH];
    212     int codePointCountToAdd = length;
    213     memmove(codePointsToAdd, word, sizeof(int) * length);
    214     if (unigramProperty->representsBeginningOfSentence()) {
    215         codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
    216                 codePointCountToAdd, MAX_WORD_LENGTH);
    217     }
    218     if (codePointCountToAdd <= 0) {
    219         return false;
    220     }
    221     if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointsToAdd, codePointCountToAdd,
    222             unigramProperty, &addedNewUnigram)) {
    223         if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
    224             mUnigramCount++;
    225         }
    226         if (unigramProperty->getShortcuts().size() > 0) {
    227             // Add shortcut target.
    228             const int wordPos = getTerminalPtNodePositionOfWord(word, length,
    229                     false /* forceLowerCaseSearch */);
    230             if (wordPos == NOT_A_DICT_POS) {
    231                 AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
    232                 return false;
    233             }
    234             for (const auto &shortcut : unigramProperty->getShortcuts()) {
    235                 if (!mUpdatingHelper.addShortcutTarget(wordPos,
    236                         shortcut.getTargetCodePoints()->data(),
    237                         shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
    238                     AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %d, "
    239                             "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
    240                             shortcut.getProbability());
    241                     return false;
    242                 }
    243             }
    244         }
    245         return true;
    246     } else {
    247         return false;
    248     }
    249 }
    250 
    251 bool Ver4PatriciaTriePolicy::removeUnigramEntry(const int *const word, const int length) {
    252     if (!mBuffers->isUpdatable()) {
    253         AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
    254         return false;
    255     }
    256     const int ptNodePos = getTerminalPtNodePositionOfWord(word, length,
    257             false /* forceLowerCaseSearch */);
    258     if (ptNodePos == NOT_A_DICT_POS) {
    259         return false;
    260     }
    261     const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    262     if (!mNodeWriter.markPtNodeAsDeleted(&ptNodeParams)) {
    263         AKLOGE("Cannot remove unigram. ptNodePos: %d", ptNodePos);
    264         return false;
    265     }
    266     if (!ptNodeParams.representsNonWordInfo()) {
    267         mUnigramCount--;
    268     }
    269     return true;
    270 }
    271 
    272 bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
    273         const BigramProperty *const bigramProperty) {
    274     if (!mBuffers->isUpdatable()) {
    275         AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
    276         return false;
    277     }
    278     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    279         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    280                 mDictBuffer->getTailPosition());
    281         return false;
    282     }
    283     if (!prevWordsInfo->isValid()) {
    284         AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
    285         return false;
    286     }
    287     if (bigramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
    288         AKLOGE("The word is too long to insert the ngram to the dictionary. "
    289                 "length: %d", bigramProperty->getTargetCodePoints()->size());
    290         return false;
    291     }
    292     int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    293     prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
    294             false /* tryLowerCaseSearch */);
    295     const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
    296     // TODO: Support N-gram.
    297     if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
    298         if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
    299             const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    300             const UnigramProperty beginningOfSentenceUnigramProperty(
    301                     true /* representsBeginningOfSentence */, true /* isNotAWord */,
    302                     false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
    303                     NOT_A_TIMESTAMP /* timestamp */, 0 /* level */, 0 /* count */, &shortcuts);
    304             if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
    305                     prevWordsInfo->getNthPrevWordCodePointCount(1 /* n */),
    306                     &beginningOfSentenceUnigramProperty)) {
    307                 AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
    308                 return false;
    309             }
    310             // Refresh Terminal PtNode positions.
    311             prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
    312                     false /* tryLowerCaseSearch */);
    313         } else {
    314             return false;
    315         }
    316     }
    317     const int word1Pos = getTerminalPtNodePositionOfWord(
    318             bigramProperty->getTargetCodePoints()->data(),
    319             bigramProperty->getTargetCodePoints()->size(), false /* forceLowerCaseSearch */);
    320     if (word1Pos == NOT_A_DICT_POS) {
    321         return false;
    322     }
    323     bool addedNewEntry = false;
    324     if (mUpdatingHelper.addNgramEntry(prevWordsPtNodePosView, word1Pos, bigramProperty,
    325             &addedNewEntry)) {
    326         if (addedNewEntry) {
    327             mBigramCount++;
    328         }
    329         return true;
    330     } else {
    331         return false;
    332     }
    333 }
    334 
    335 bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
    336         const int *const word, const int length) {
    337     if (!mBuffers->isUpdatable()) {
    338         AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
    339         return false;
    340     }
    341     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    342         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    343                 mDictBuffer->getTailPosition());
    344         return false;
    345     }
    346     if (!prevWordsInfo->isValid()) {
    347         AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
    348         return false;
    349     }
    350     if (length > MAX_WORD_LENGTH) {
    351         AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %d", length);
    352     }
    353     int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    354     prevWordsInfo->getPrevWordsTerminalPtNodePos(this, prevWordsPtNodePos,
    355             false /* tryLowerCaseSerch */);
    356     const auto prevWordsPtNodePosView = PtNodePosArrayView::fromFixedSizeArray(prevWordsPtNodePos);
    357     // TODO: Support N-gram.
    358     if (prevWordsPtNodePos[0] == NOT_A_DICT_POS) {
    359         return false;
    360     }
    361     const int wordPos = getTerminalPtNodePositionOfWord(word, length,
    362             false /* forceLowerCaseSearch */);
    363     if (wordPos == NOT_A_DICT_POS) {
    364         return false;
    365     }
    366     if (mUpdatingHelper.removeNgramEntry(prevWordsPtNodePosView, wordPos)) {
    367         mBigramCount--;
    368         return true;
    369     } else {
    370         return false;
    371     }
    372 }
    373 
    374 bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
    375     if (!mBuffers->isUpdatable()) {
    376         AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
    377         return false;
    378     }
    379     if (!mWritingHelper.writeToDictFile(filePath, mUnigramCount, mBigramCount)) {
    380         AKLOGE("Cannot flush the dictionary to file.");
    381         mIsCorrupted = true;
    382         return false;
    383     }
    384     return true;
    385 }
    386 
    387 bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
    388     if (!mBuffers->isUpdatable()) {
    389         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
    390         return false;
    391     }
    392     if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
    393         AKLOGE("Cannot flush the dictionary to file with GC.");
    394         mIsCorrupted = true;
    395         return false;
    396     }
    397     return true;
    398 }
    399 
    400 bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
    401     if (!mBuffers->isUpdatable()) {
    402         AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
    403         return false;
    404     }
    405     if (mBuffers->isNearSizeLimit()) {
    406         // Additional buffer size is near the limit.
    407         return true;
    408     } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
    409             > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
    410         // Total extended region size of the trie exceeds the limit.
    411         return true;
    412     } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
    413             && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
    414         // Needs to reduce dictionary size.
    415         return true;
    416     } else if (mHeaderPolicy->isDecayingDict()) {
    417         return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mUnigramCount, mBigramCount,
    418                 mHeaderPolicy);
    419     }
    420     return false;
    421 }
    422 
    423 void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
    424         char *const outResult, const int maxResultLength) {
    425     const int compareLength = queryLength + 1 /* terminator */;
    426     if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
    427         snprintf(outResult, maxResultLength, "%d", mUnigramCount);
    428     } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
    429         snprintf(outResult, maxResultLength, "%d", mBigramCount);
    430     } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
    431         snprintf(outResult, maxResultLength, "%d",
    432                 mHeaderPolicy->isDecayingDict() ?
    433                         ForgettingCurveUtils::getUnigramCountHardLimit(
    434                                 mHeaderPolicy->getMaxUnigramCount()) :
    435                         static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
    436     } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
    437         snprintf(outResult, maxResultLength, "%d",
    438                 mHeaderPolicy->isDecayingDict() ?
    439                         ForgettingCurveUtils::getBigramCountHardLimit(
    440                                 mHeaderPolicy->getMaxBigramCount()) :
    441                         static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
    442     }
    443 }
    444 
    445 const WordProperty Ver4PatriciaTriePolicy::getWordProperty(const int *const codePoints,
    446         const int codePointCount) const {
    447     const int ptNodePos = getTerminalPtNodePositionOfWord(codePoints, codePointCount,
    448             false /* forceLowerCaseSearch */);
    449     if (ptNodePos == NOT_A_DICT_POS) {
    450         AKLOGE("getWordProperty is called for invalid word.");
    451         return WordProperty();
    452     }
    453     const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    454     std::vector<int> codePointVector(ptNodeParams.getCodePoints(),
    455             ptNodeParams.getCodePoints() + ptNodeParams.getCodePointCount());
    456     const ProbabilityEntry probabilityEntry =
    457             mBuffers->getLanguageModelDictContent()->getProbabilityEntry(
    458                     ptNodeParams.getTerminalId());
    459     const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
    460     // Fetch bigram information.
    461     std::vector<BigramProperty> bigrams;
    462     const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
    463     if (bigramListPos != NOT_A_DICT_POS) {
    464         int bigramWord1CodePoints[MAX_WORD_LENGTH];
    465         const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
    466         const TerminalPositionLookupTable *const terminalPositionLookupTable =
    467                 mBuffers->getTerminalPositionLookupTable();
    468         bool hasNext = true;
    469         int readingPos = bigramListPos;
    470         while (hasNext) {
    471             const BigramEntry bigramEntry =
    472                     bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    473             hasNext = bigramEntry.hasNext();
    474             const int word1TerminalId = bigramEntry.getTargetTerminalId();
    475             const int word1TerminalPtNodePos =
    476                     terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
    477             if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
    478                 continue;
    479             }
    480             // Word (unigram) probability
    481             int word1Probability = NOT_A_PROBABILITY;
    482             const int codePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
    483                     word1TerminalPtNodePos, MAX_WORD_LENGTH, bigramWord1CodePoints,
    484                     &word1Probability);
    485             const std::vector<int> word1(bigramWord1CodePoints,
    486                     bigramWord1CodePoints + codePointCount);
    487             const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
    488             const int probability = bigramEntry.hasHistoricalInfo() ?
    489                     ForgettingCurveUtils::decodeProbability(
    490                             bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
    491                     bigramEntry.getProbability();
    492             bigrams.emplace_back(&word1, probability,
    493                     historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
    494                     historicalInfo->getCount());
    495         }
    496     }
    497     // Fetch shortcut information.
    498     std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    499     int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
    500     if (shortcutPos != NOT_A_DICT_POS) {
    501         int shortcutTarget[MAX_WORD_LENGTH];
    502         const ShortcutDictContent *const shortcutDictContent =
    503                 mBuffers->getShortcutDictContent();
    504         bool hasNext = true;
    505         while (hasNext) {
    506             int shortcutTargetLength = 0;
    507             int shortcutProbability = NOT_A_PROBABILITY;
    508             shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
    509                     &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
    510             const std::vector<int> target(shortcutTarget, shortcutTarget + shortcutTargetLength);
    511             shortcuts.emplace_back(&target, shortcutProbability);
    512         }
    513     }
    514     const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
    515             ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
    516             historicalInfo->getTimeStamp(), historicalInfo->getLevel(),
    517             historicalInfo->getCount(), &shortcuts);
    518     return WordProperty(&codePointVector, &unigramProperty, &bigrams);
    519 }
    520 
    521 int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
    522         int *const outCodePointCount) {
    523     *outCodePointCount = 0;
    524     if (token == 0) {
    525         mTerminalPtNodePositionsForIteratingWords.clear();
    526         DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
    527                 &mTerminalPtNodePositionsForIteratingWords);
    528         DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    529         readingHelper.initWithPtNodeArrayPos(getRootPosition());
    530         readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
    531     }
    532     const int terminalPtNodePositionsVectorSize =
    533             static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
    534     if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
    535         AKLOGE("Given token %d is invalid.", token);
    536         return 0;
    537     }
    538     const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
    539     int unigramProbability = NOT_A_PROBABILITY;
    540     *outCodePointCount = getCodePointsAndProbabilityAndReturnCodePointCount(
    541             terminalPtNodePos, MAX_WORD_LENGTH, outCodePoints, &unigramProbability);
    542     const int nextToken = token + 1;
    543     if (nextToken >= terminalPtNodePositionsVectorSize) {
    544         // All words have been iterated.
    545         mTerminalPtNodePositionsForIteratingWords.clear();
    546         return 0;
    547     }
    548     return nextToken;
    549 }
    550 
    551 } // namespace latinime
    552