Home | History | Annotate | Download | only in v4
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "dictionary/structure/v4/ver4_patricia_trie_policy.h"
     18 
     19 #include <array>
     20 #include <vector>
     21 
     22 #include "suggest/core/dicnode/dic_node.h"
     23 #include "suggest/core/dicnode/dic_node_vector.h"
     24 #include "dictionary/interface/ngram_listener.h"
     25 #include "dictionary/property/ngram_context.h"
     26 #include "dictionary/property/ngram_property.h"
     27 #include "dictionary/property/unigram_property.h"
     28 #include "dictionary/property/word_property.h"
     29 #include "dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
     30 #include "dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
     31 #include "dictionary/utils/forgetting_curve_utils.h"
     32 #include "dictionary/utils/multi_bigram_map.h"
     33 #include "dictionary/utils/probability_utils.h"
     34 #include "utils/ngram_utils.h"
     35 
     36 namespace latinime {
     37 
     38 // Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
     39 // BinaryDictionaryDecayingTests.
     40 const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
     41 const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
     42 const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
     43 const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
     44 const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
     45 const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
     46         Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
     47 
     48 void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
     49         DicNodeVector *const childDicNodes) const {
     50     if (!dicNode->hasChildren()) {
     51         return;
     52     }
     53     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     54     readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
     55     while (!readingHelper.isEnd()) {
     56         const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
     57         if (!ptNodeParams.isValid()) {
     58             break;
     59         }
     60         const bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
     61         const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
     62         childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
     63                 wordId, ptNodeParams.getCodePointArrayView());
     64         readingHelper.readNextSiblingNode(ptNodeParams);
     65     }
     66     if (readingHelper.isError()) {
     67         mIsCorrupted = true;
     68         AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
     69     }
     70 }
     71 
     72 int Ver4PatriciaTriePolicy::getCodePointsAndReturnCodePointCount(const int wordId,
     73         const int maxCodePointCount, int *const outCodePoints) const {
     74     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     75     const int ptNodePos =
     76             mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
     77     readingHelper.initWithPtNodePos(ptNodePos);
     78     const int codePointCount =  readingHelper.getCodePointsAndReturnCodePointCount(
     79             maxCodePointCount, outCodePoints);
     80     if (readingHelper.isError()) {
     81         mIsCorrupted = true;
     82         AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
     83     }
     84     return codePointCount;
     85 }
     86 
     87 int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
     88         const bool forceLowerCaseSearch) const {
     89     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     90     readingHelper.initWithPtNodeArrayPos(getRootPosition());
     91     const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(),
     92             wordCodePoints.size(), forceLowerCaseSearch);
     93     if (readingHelper.isError()) {
     94         mIsCorrupted = true;
     95         AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
     96     }
     97     if (ptNodePos == NOT_A_DICT_POS) {
     98         return NOT_A_WORD_ID;
     99     }
    100     const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    101     if (ptNodeParams.isDeleted()) {
    102         return NOT_A_WORD_ID;
    103     }
    104     return ptNodeParams.getTerminalId();
    105 }
    106 
    107 const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext(
    108         const WordIdArrayView prevWordIds, const int wordId,
    109         MultiBigramMap *const multiBigramMap) const {
    110     if (wordId == NOT_A_WORD_ID) {
    111         return WordAttributes();
    112     }
    113     return mBuffers->getLanguageModelDictContent()->getWordAttributes(prevWordIds, wordId,
    114             false /* mustMatchAllPrevWords */, mHeaderPolicy);
    115 }
    116 
    117 int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
    118         const int wordId) const {
    119     if (wordId == NOT_A_WORD_ID || prevWordIds.contains(NOT_A_WORD_ID)) {
    120         return NOT_A_PROBABILITY;
    121     }
    122     const WordAttributes wordAttributes =
    123             mBuffers->getLanguageModelDictContent()->getWordAttributes(prevWordIds, wordId,
    124                     true /* mustMatchAllPrevWords */, mHeaderPolicy);
    125     if (wordAttributes.isBlacklisted() || wordAttributes.isNotAWord()) {
    126         return NOT_A_PROBABILITY;
    127     }
    128     return wordAttributes.getProbability();
    129 }
    130 
    131 BinaryDictionaryShortcutIterator Ver4PatriciaTriePolicy::getShortcutIterator(
    132         const int wordId) const {
    133     const int shortcutPos = getShortcutPositionOfWord(wordId);
    134     return BinaryDictionaryShortcutIterator(&mShortcutPolicy, shortcutPos);
    135 }
    136 
    137 void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordIds,
    138         NgramListener *const listener) const {
    139     if (prevWordIds.empty()) {
    140         return;
    141     }
    142     const auto languageModelDictContent = mBuffers->getLanguageModelDictContent();
    143     for (size_t i = 1; i <= prevWordIds.size(); ++i) {
    144         for (const auto entry : languageModelDictContent->getProbabilityEntries(
    145                 prevWordIds.limit(i))) {
    146             const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry();
    147             if (!probabilityEntry.isValid()) {
    148                 continue;
    149             }
    150             int probability = NOT_A_PROBABILITY;
    151             if (probabilityEntry.hasHistoricalInfo()) {
    152                 // TODO: Quit checking count here.
    153                 // If count <= 1, the word can be an invaild word. The actual probability should
    154                 // be checked using getWordAttributesInContext() in onVisitEntry().
    155                 probability = probabilityEntry.getHistoricalInfo()->getCount() <= 1 ?
    156                         NOT_A_PROBABILITY : 0;
    157             } else {
    158                 probability = probabilityEntry.getProbability();
    159             }
    160             listener->onVisitEntry(probability, entry.getWordId());
    161         }
    162     }
    163 }
    164 
    165 int Ver4PatriciaTriePolicy::getShortcutPositionOfWord(const int wordId) const {
    166     if (wordId == NOT_A_WORD_ID) {
    167         return NOT_A_DICT_POS;
    168     }
    169     const int ptNodePos =
    170             mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
    171     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    172     if (ptNodeParams.isDeleted()) {
    173         return NOT_A_DICT_POS;
    174     }
    175     return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
    176             ptNodeParams.getTerminalId());
    177 }
    178 
    179 bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePoints,
    180         const UnigramProperty *const unigramProperty) {
    181     if (!mBuffers->isUpdatable()) {
    182         AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
    183         return false;
    184     }
    185     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    186         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    187                 mDictBuffer->getTailPosition());
    188         return false;
    189     }
    190     if (wordCodePoints.size() > MAX_WORD_LENGTH) {
    191         AKLOGE("The word is too long to insert to the dictionary, length: %zd",
    192                 wordCodePoints.size());
    193         return false;
    194     }
    195     for (const auto &shortcut : unigramProperty->getShortcuts()) {
    196         if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
    197             AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %zd",
    198                     shortcut.getTargetCodePoints()->size());
    199             return false;
    200         }
    201     }
    202     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    203     readingHelper.initWithPtNodeArrayPos(getRootPosition());
    204     bool addedNewUnigram = false;
    205     int codePointsToAdd[MAX_WORD_LENGTH];
    206     int codePointCountToAdd = wordCodePoints.size();
    207     memmove(codePointsToAdd, wordCodePoints.data(), sizeof(int) * codePointCountToAdd);
    208     if (unigramProperty->representsBeginningOfSentence()) {
    209         codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
    210                 codePointCountToAdd, MAX_WORD_LENGTH);
    211     }
    212     if (codePointCountToAdd <= 0) {
    213         return false;
    214     }
    215     const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
    216     if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
    217             &addedNewUnigram)) {
    218         if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
    219             mEntryCounters.incrementNgramCount(NgramType::Unigram);
    220         }
    221         if (unigramProperty->getShortcuts().size() > 0) {
    222             // Add shortcut target.
    223             const int wordId = getWordId(codePointArrayView, false /* forceLowerCaseSearch */);
    224             if (wordId == NOT_A_WORD_ID) {
    225                 AKLOGE("Cannot find word id to add shortcut target.");
    226                 return false;
    227             }
    228             const int wordPos =
    229                     mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
    230             for (const auto &shortcut : unigramProperty->getShortcuts()) {
    231                 if (!mUpdatingHelper.addShortcutTarget(wordPos,
    232                         CodePointArrayView(*shortcut.getTargetCodePoints()),
    233                         shortcut.getProbability())) {
    234                     AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
    235                             "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
    236                             shortcut.getProbability());
    237                     return false;
    238                 }
    239             }
    240         }
    241         return true;
    242     } else {
    243         return false;
    244     }
    245 }
    246 
    247 bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCodePoints) {
    248     if (!mBuffers->isUpdatable()) {
    249         AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
    250         return false;
    251     }
    252     const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);
    253     if (wordId == NOT_A_WORD_ID) {
    254         return false;
    255     }
    256     const int ptNodePos =
    257             mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
    258     const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    259     if (!mNodeWriter.markPtNodeAsDeleted(&ptNodeParams)) {
    260         AKLOGE("Cannot remove unigram. ptNodePos: %d", ptNodePos);
    261         return false;
    262     }
    263     if (!mBuffers->getMutableLanguageModelDictContent()->removeProbabilityEntry(wordId)) {
    264         return false;
    265     }
    266     if (!ptNodeParams.representsNonWordInfo()) {
    267         mEntryCounters.decrementNgramCount(NgramType::Unigram);
    268     }
    269     return true;
    270 }
    271 
    272 bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) {
    273     if (!mBuffers->isUpdatable()) {
    274         AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
    275         return false;
    276     }
    277     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    278         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    279                 mDictBuffer->getTailPosition());
    280         return false;
    281     }
    282     const NgramContext *const ngramContext = ngramProperty->getNgramContext();
    283     if (!ngramContext->isValid()) {
    284         AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
    285         return false;
    286     }
    287     if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
    288         AKLOGE("The word is too long to insert the ngram to the dictionary. "
    289                 "length: %zd", ngramProperty->getTargetCodePoints()->size());
    290         return false;
    291     }
    292     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
    293     const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
    294             false /* tryLowerCaseSearch */);
    295     if (prevWordIds.empty()) {
    296         return false;
    297     }
    298     for (size_t i = 0; i < prevWordIds.size(); ++i) {
    299         if (prevWordIds[i] != NOT_A_WORD_ID) {
    300             continue;
    301         }
    302         if (!ngramContext->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
    303             return false;
    304         }
    305         const UnigramProperty beginningOfSentenceUnigramProperty(
    306                 true /* representsBeginningOfSentence */, true /* isNotAWord */,
    307                 false /* isBlacklisted */, false /* isPossiblyOffensive */,
    308                 MAX_PROBABILITY /* probability */, HistoricalInfo());
    309         if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
    310                 &beginningOfSentenceUnigramProperty)) {
    311             AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
    312             return false;
    313         }
    314         // Refresh word ids.
    315         ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
    316     }
    317     const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()),
    318             false /* forceLowerCaseSearch */);
    319     if (wordId == NOT_A_WORD_ID) {
    320         return false;
    321     }
    322     bool addedNewEntry = false;
    323     if (mNodeWriter.addNgramEntry(prevWordIds, wordId, ngramProperty, &addedNewEntry)) {
    324         if (addedNewEntry) {
    325             mEntryCounters.incrementNgramCount(
    326                     NgramUtils::getNgramTypeFromWordCount(prevWordIds.size() + 1));
    327         }
    328         return true;
    329     } else {
    330         return false;
    331     }
    332 }
    333 
    334 bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext,
    335         const CodePointArrayView wordCodePoints) {
    336     if (!mBuffers->isUpdatable()) {
    337         AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
    338         return false;
    339     }
    340     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    341         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    342                 mDictBuffer->getTailPosition());
    343         return false;
    344     }
    345     if (!ngramContext->isValid()) {
    346         AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary.");
    347         return false;
    348     }
    349     if (wordCodePoints.size() > MAX_WORD_LENGTH) {
    350         AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %zd",
    351                 wordCodePoints.size());
    352     }
    353     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
    354     const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
    355             false /* tryLowerCaseSerch */);
    356     if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) {
    357         return false;
    358     }
    359     const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);
    360     if (wordId == NOT_A_WORD_ID) {
    361         return false;
    362     }
    363     if (mNodeWriter.removeNgramEntry(prevWordIds, wordId)) {
    364         mEntryCounters.decrementNgramCount(
    365                 NgramUtils::getNgramTypeFromWordCount(prevWordIds.size() + 1));
    366         return true;
    367     } else {
    368         return false;
    369     }
    370 }
    371 
    372 bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
    373         const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints,
    374         const bool isValidWord, const HistoricalInfo historicalInfo) {
    375     if (!mBuffers->isUpdatable()) {
    376         AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
    377                 "dictionary.");
    378         return false;
    379     }
    380     const bool updateAsAValidWord = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */) ?
    381             false : isValidWord;
    382     int wordId = getWordId(wordCodePoints, false /* tryLowerCaseSearch */);
    383     if (wordId == NOT_A_WORD_ID) {
    384         // The word is not in the dictionary.
    385         const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
    386                 false /* isNotAWord */, false /* isBlacklisted */, false /* isPossiblyOffensive */,
    387                 NOT_A_PROBABILITY, HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */,
    388                 0 /* count */));
    389         if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
    390             AKLOGE("Cannot add unigarm entry in updateEntriesForWordWithNgramContext().");
    391             return false;
    392         }
    393         if (!isValidWord) {
    394             return true;
    395         }
    396         wordId = getWordId(wordCodePoints, false /* tryLowerCaseSearch */);
    397     }
    398 
    399     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
    400     const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
    401             false /* tryLowerCaseSearch */);
    402     if (ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) {
    403         if (prevWordIds.firstOrDefault(NOT_A_WORD_ID) == NOT_A_WORD_ID) {
    404             const UnigramProperty beginningOfSentenceUnigramProperty(
    405                     true /* representsBeginningOfSentence */,
    406                     true /* isNotAWord */, false /* isPossiblyOffensive */, NOT_A_PROBABILITY,
    407                     HistoricalInfo(historicalInfo.getTimestamp(), 0 /* level */, 0 /* count */));
    408             if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
    409                     &beginningOfSentenceUnigramProperty)) {
    410                 AKLOGE("Cannot add BoS entry in updateEntriesForWordWithNgramContext().");
    411                 return false;
    412             }
    413             // Refresh word ids.
    414             ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
    415         }
    416         // Update entries for beginning of sentence.
    417         if (!mBuffers->getMutableLanguageModelDictContent()->updateAllEntriesOnInputWord(
    418                 prevWordIds.skip(1 /* n */), prevWordIds[0], true /* isVaild */, historicalInfo,
    419                 mHeaderPolicy, &mEntryCounters)) {
    420             return false;
    421         }
    422     }
    423     if (!mBuffers->getMutableLanguageModelDictContent()->updateAllEntriesOnInputWord(prevWordIds,
    424             wordId, updateAsAValidWord, historicalInfo, mHeaderPolicy, &mEntryCounters)) {
    425         return false;
    426     }
    427     return true;
    428 }
    429 
    430 bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
    431     if (!mBuffers->isUpdatable()) {
    432         AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
    433         return false;
    434     }
    435     if (!mWritingHelper.writeToDictFile(filePath, mEntryCounters.getEntryCounts())) {
    436         AKLOGE("Cannot flush the dictionary to file.");
    437         mIsCorrupted = true;
    438         return false;
    439     }
    440     return true;
    441 }
    442 
    443 bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
    444     if (!mBuffers->isUpdatable()) {
    445         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
    446         return false;
    447     }
    448     if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
    449         AKLOGE("Cannot flush the dictionary to file with GC.");
    450         mIsCorrupted = true;
    451         return false;
    452     }
    453     return true;
    454 }
    455 
    456 bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
    457     if (!mBuffers->isUpdatable()) {
    458         AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
    459         return false;
    460     }
    461     if (mBuffers->isNearSizeLimit()) {
    462         // Additional buffer size is near the limit.
    463         return true;
    464     } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
    465             > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
    466         // Total extended region size of the trie exceeds the limit.
    467         return true;
    468     } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
    469             && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
    470         // Needs to reduce dictionary size.
    471         return true;
    472     } else if (mHeaderPolicy->isDecayingDict()) {
    473         return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(),
    474                 mHeaderPolicy);
    475     }
    476     return false;
    477 }
    478 
    479 void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
    480         char *const outResult, const int maxResultLength) {
    481     const int compareLength = queryLength + 1 /* terminator */;
    482     if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
    483         snprintf(outResult, maxResultLength, "%d",
    484                 mEntryCounters.getNgramCount(NgramType::Unigram));
    485     } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
    486         snprintf(outResult, maxResultLength, "%d", mEntryCounters.getNgramCount(NgramType::Bigram));
    487     } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
    488         snprintf(outResult, maxResultLength, "%d",
    489                 mHeaderPolicy->isDecayingDict() ?
    490                         ForgettingCurveUtils::getEntryCountHardLimit(
    491                                 mHeaderPolicy->getMaxNgramCounts().getNgramCount(
    492                                         NgramType::Unigram)) :
    493                         static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
    494     } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
    495         snprintf(outResult, maxResultLength, "%d",
    496                 mHeaderPolicy->isDecayingDict() ?
    497                         ForgettingCurveUtils::getEntryCountHardLimit(
    498                                 mHeaderPolicy->getMaxNgramCounts().getNgramCount(
    499                                         NgramType::Bigram)) :
    500                         static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
    501     }
    502 }
    503 
    504 const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
    505         const CodePointArrayView wordCodePoints) const {
    506     const int wordId = getWordId(wordCodePoints, false /* forceLowerCaseSearch */);
    507     if (wordId == NOT_A_WORD_ID) {
    508         AKLOGE("getWordProperty is called for invalid word.");
    509         return WordProperty();
    510     }
    511     const LanguageModelDictContent *const languageModelDictContent =
    512             mBuffers->getLanguageModelDictContent();
    513     // Fetch ngram information.
    514     std::vector<NgramProperty> ngrams;
    515     int ngramTargetCodePoints[MAX_WORD_LENGTH];
    516     int ngramPrevWordsCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
    517     int ngramPrevWordsCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    518     bool ngramPrevWordIsBeginningOfSentense[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    519     for (const auto entry : languageModelDictContent->exportAllNgramEntriesRelatedToWord(
    520             mHeaderPolicy, wordId)) {
    521         const int codePointCount = getCodePointsAndReturnCodePointCount(entry.getTargetWordId(),
    522                 MAX_WORD_LENGTH, ngramTargetCodePoints);
    523         const WordIdArrayView prevWordIds = entry.getPrevWordIds();
    524         for (size_t i = 0; i < prevWordIds.size(); ++i) {
    525             ngramPrevWordsCodePointCount[i] = getCodePointsAndReturnCodePointCount(prevWordIds[i],
    526                        MAX_WORD_LENGTH, ngramPrevWordsCodePoints[i]);
    527             ngramPrevWordIsBeginningOfSentense[i] = languageModelDictContent->getProbabilityEntry(
    528                     prevWordIds[i]).representsBeginningOfSentence();
    529             if (ngramPrevWordIsBeginningOfSentense[i]) {
    530                 ngramPrevWordsCodePointCount[i] = CharUtils::removeBeginningOfSentenceMarker(
    531                         ngramPrevWordsCodePoints[i], ngramPrevWordsCodePointCount[i]);
    532             }
    533         }
    534         const NgramContext ngramContext(ngramPrevWordsCodePoints, ngramPrevWordsCodePointCount,
    535                 ngramPrevWordIsBeginningOfSentense, prevWordIds.size());
    536         const ProbabilityEntry ngramProbabilityEntry = entry.getProbabilityEntry();
    537         const HistoricalInfo *const historicalInfo = ngramProbabilityEntry.getHistoricalInfo();
    538         // TODO: Output flags in WordAttributes.
    539         ngrams.emplace_back(ngramContext,
    540                 CodePointArrayView(ngramTargetCodePoints, codePointCount).toVector(),
    541                 entry.getWordAttributes().getProbability(), *historicalInfo);
    542     }
    543     // Fetch shortcut information.
    544     std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    545     int shortcutPos = getShortcutPositionOfWord(wordId);
    546     if (shortcutPos != NOT_A_DICT_POS) {
    547         int shortcutTarget[MAX_WORD_LENGTH];
    548         const ShortcutDictContent *const shortcutDictContent =
    549                 mBuffers->getShortcutDictContent();
    550         bool hasNext = true;
    551         while (hasNext) {
    552             int shortcutTargetLength = 0;
    553             int shortcutProbability = NOT_A_PROBABILITY;
    554             shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
    555                     &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
    556             shortcuts.emplace_back(
    557                     CodePointArrayView(shortcutTarget, shortcutTargetLength).toVector(),
    558                     shortcutProbability);
    559         }
    560     }
    561     const WordAttributes wordAttributes = languageModelDictContent->getWordAttributes(
    562             WordIdArrayView(), wordId, true /* mustMatchAllPrevWords */, mHeaderPolicy);
    563     const ProbabilityEntry probabilityEntry = languageModelDictContent->getProbabilityEntry(wordId);
    564     const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
    565     const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
    566             wordAttributes.isNotAWord(), wordAttributes.isBlacklisted(),
    567             wordAttributes.isPossiblyOffensive(), wordAttributes.getProbability(),
    568             *historicalInfo, std::move(shortcuts));
    569     return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams);
    570 }
    571 
    572 int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
    573         int *const outCodePointCount) {
    574     *outCodePointCount = 0;
    575     if (token == 0) {
    576         mTerminalPtNodePositionsForIteratingWords.clear();
    577         DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
    578                 &mTerminalPtNodePositionsForIteratingWords);
    579         DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    580         readingHelper.initWithPtNodeArrayPos(getRootPosition());
    581         readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
    582     }
    583     const int terminalPtNodePositionsVectorSize =
    584             static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
    585     if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
    586         AKLOGE("Given token %d is invalid.", token);
    587         return 0;
    588     }
    589     const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
    590     const PtNodeParams ptNodeParams =
    591             mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(terminalPtNodePos);
    592     *outCodePointCount = getCodePointsAndReturnCodePointCount(ptNodeParams.getTerminalId(),
    593             MAX_WORD_LENGTH, outCodePoints);
    594     const int nextToken = token + 1;
    595     if (nextToken >= terminalPtNodePositionsVectorSize) {
    596         // All words have been iterated.
    597         mTerminalPtNodePositionsForIteratingWords.clear();
    598         return 0;
    599     }
    600     return nextToken;
    601 }
    602 
    603 } // namespace latinime
    604