Home | History | Annotate | Download | only in v402
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
     19  * Do not edit this file other than updating policy's interface.
     20  *
     21  * This file was generated from
     22  *   dictionary/structure/v4/ver4_patricia_trie_policy.cpp
     23  */
     24 
     25 #include "dictionary/structure/backward/v402/ver4_patricia_trie_policy.h"
     26 
     27 #include <vector>
     28 
     29 #include "suggest/core/dicnode/dic_node.h"
     30 #include "suggest/core/dicnode/dic_node_vector.h"
     31 #include "dictionary/interface/ngram_listener.h"
     32 #include "dictionary/property/ngram_context.h"
     33 #include "dictionary/property/ngram_property.h"
     34 #include "dictionary/property/unigram_property.h"
     35 #include "dictionary/property/word_property.h"
     36 #include "dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
     37 #include "dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
     38 #include "dictionary/utils/forgetting_curve_utils.h"
     39 #include "dictionary/utils/multi_bigram_map.h"
     40 #include "dictionary/utils/probability_utils.h"
     41 
     42 namespace latinime {
     43 namespace backward {
     44 namespace v402 {
     45 
     46 // Note that there are corresponding definitions in Java side in BinaryDictionaryTests and
     47 // BinaryDictionaryDecayingTests.
     48 const char *const Ver4PatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
     49 const char *const Ver4PatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
     50 const char *const Ver4PatriciaTriePolicy::MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
     51 const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
     52 const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
     53 const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
     54         Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
     55 const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
     56 
     57 void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
     58         DicNodeVector *const childDicNodes) const {
     59     if (!dicNode->hasChildren()) {
     60         return;
     61     }
     62     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     63     readingHelper.initWithPtNodeArrayPos(dicNode->getChildrenPtNodeArrayPos());
     64     while (!readingHelper.isEnd()) {
     65         const PtNodeParams ptNodeParams = readingHelper.getPtNodeParams();
     66         if (!ptNodeParams.isValid()) {
     67             break;
     68         }
     69         bool isTerminal = ptNodeParams.isTerminal() && !ptNodeParams.isDeleted();
     70         if (isTerminal && mHeaderPolicy->isDecayingDict()) {
     71             // A DecayingDict may have a terminal PtNode that has a terminal DicNode whose
     72             // probability is NOT_A_PROBABILITY. In such case, we don't want to treat it as a
     73             // valid terminal DicNode.
     74             isTerminal = ptNodeParams.getProbability() != NOT_A_PROBABILITY;
     75         }
     76         readingHelper.readNextSiblingNode(ptNodeParams);
     77         if (ptNodeParams.representsNonWordInfo()) {
     78             // Skip PtNodes that represent non-word information.
     79             continue;
     80         }
     81         const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID;
     82         childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
     83                 wordId, ptNodeParams.getCodePointArrayView());
     84     }
     85     if (readingHelper.isError()) {
     86         mIsCorrupted = true;
     87         AKLOGE("Dictionary reading error in createAndGetAllChildDicNodes().");
     88     }
     89 }
     90 
     91 int Ver4PatriciaTriePolicy::getCodePointsAndReturnCodePointCount(const int wordId,
     92         const int maxCodePointCount, int *const outCodePoints) const {
     93     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
     94     const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
     95     readingHelper.initWithPtNodePos(ptNodePos);
     96     const int codePointCount =  readingHelper.getCodePointsAndReturnCodePointCount(
     97             maxCodePointCount, outCodePoints);
     98     if (readingHelper.isError()) {
     99         mIsCorrupted = true;
    100         AKLOGE("Dictionary reading error in getCodePointsAndProbabilityAndReturnCodePointCount().");
    101     }
    102     return codePointCount;
    103 }
    104 
    105 int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints,
    106         const bool forceLowerCaseSearch) const {
    107     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    108     readingHelper.initWithPtNodeArrayPos(getRootPosition());
    109     const int ptNodePos = readingHelper.getTerminalPtNodePositionOfWord(wordCodePoints.data(),
    110             wordCodePoints.size(), forceLowerCaseSearch);
    111     if (readingHelper.isError()) {
    112         mIsCorrupted = true;
    113         AKLOGE("Dictionary reading error in getWordId().");
    114     }
    115     return getWordIdFromTerminalPtNodePos(ptNodePos);
    116 }
    117 
    118 const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext(
    119         const WordIdArrayView prevWordIds, const int wordId,
    120         MultiBigramMap *const multiBigramMap) const {
    121     if (wordId == NOT_A_WORD_ID) {
    122         return WordAttributes();
    123     }
    124     const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
    125     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    126     if (multiBigramMap) {
    127         const int probability = multiBigramMap->getBigramProbability(this /* structurePolicy */,
    128                 prevWordIds, wordId, ptNodeParams.getProbability());
    129         return getWordAttributes(probability, ptNodeParams);
    130     }
    131     if (!prevWordIds.empty()) {
    132         const int probability = getProbabilityOfWord(prevWordIds, wordId);
    133         if (probability != NOT_A_PROBABILITY) {
    134             return getWordAttributes(probability, ptNodeParams);
    135         }
    136     }
    137     return getWordAttributes(getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY),
    138             ptNodeParams);
    139 }
    140 
    141 const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability,
    142         const PtNodeParams &ptNodeParams) const {
    143     return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
    144             ptNodeParams.getProbability() == 0);
    145 }
    146 
    147 int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
    148         const int bigramProbability) const {
    149     // In the v4 format, bigramProbability is a conditional probability.
    150     const int bigramConditionalProbability = bigramProbability;
    151     if (unigramProbability == NOT_A_PROBABILITY) {
    152         return NOT_A_PROBABILITY;
    153     }
    154     if (bigramConditionalProbability == NOT_A_PROBABILITY) {
    155         return ProbabilityUtils::backoff(unigramProbability);
    156     }
    157     return bigramConditionalProbability;
    158 }
    159 
    160 int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds,
    161         const int wordId) const {
    162     if (wordId == NOT_A_WORD_ID) {
    163         return NOT_A_PROBABILITY;
    164     }
    165     const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
    166     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    167     if (ptNodeParams.isDeleted() || ptNodeParams.isNotAWord()) {
    168         return NOT_A_PROBABILITY;
    169     }
    170     if (prevWordIds.empty()) {
    171         return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
    172     }
    173     if (prevWordIds[0] == NOT_A_WORD_ID) {
    174         return NOT_A_PROBABILITY;
    175     }
    176     const PtNodeParams prevWordPtNodeParams =
    177             mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(prevWordIds[0]);
    178     if (prevWordPtNodeParams.isDeleted()) {
    179         return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
    180     }
    181     const int bigramsPosition = mBuffers->getBigramDictContent()->getBigramListHeadPos(
    182             prevWordPtNodeParams.getTerminalId());
    183     BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
    184     while (bigramsIt.hasNext()) {
    185         bigramsIt.next();
    186         if (bigramsIt.getBigramPos() == ptNodePos
    187                 && bigramsIt.getProbability() != NOT_A_PROBABILITY) {
    188             const int bigramConditionalProbability = getBigramConditionalProbability(
    189                     prevWordPtNodeParams.getProbability(),
    190                     prevWordPtNodeParams.representsBeginningOfSentence(),
    191                     bigramsIt.getProbability());
    192             return getProbability(ptNodeParams.getProbability(), bigramConditionalProbability);
    193         }
    194     }
    195     return NOT_A_PROBABILITY;
    196 }
    197 
    198 void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordIds,
    199         NgramListener *const listener) const {
    200     if (prevWordIds.firstOrDefault(NOT_A_DICT_POS) == NOT_A_DICT_POS) {
    201         return;
    202     }
    203     const PtNodeParams prevWordPtNodeParams =
    204             mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(prevWordIds[0]);
    205     if (prevWordPtNodeParams.isDeleted()) {
    206         return;
    207     }
    208     const int bigramsPosition = mBuffers->getBigramDictContent()->getBigramListHeadPos(
    209             prevWordPtNodeParams.getTerminalId());
    210     BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition);
    211     while (bigramsIt.hasNext()) {
    212         bigramsIt.next();
    213         const int bigramConditionalProbability = getBigramConditionalProbability(
    214                 prevWordPtNodeParams.getProbability(),
    215                 prevWordPtNodeParams.representsBeginningOfSentence(), bigramsIt.getProbability());
    216         listener->onVisitEntry(bigramConditionalProbability,
    217                 getWordIdFromTerminalPtNodePos(bigramsIt.getBigramPos()));
    218     }
    219 }
    220 
    221 int Ver4PatriciaTriePolicy::getBigramConditionalProbability(const int prevWordUnigramProbability,
    222         const bool isInBeginningOfSentenceContext, const int bigramProbability) const {
    223     if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
    224         if (isInBeginningOfSentenceContext) {
    225             return bigramProbability;
    226         }
    227         // Calculate conditional probability.
    228         return std::min(MAX_PROBABILITY - prevWordUnigramProbability + bigramProbability,
    229                 MAX_PROBABILITY);
    230     } else {
    231         // bigramProbability is a conditional probability.
    232         return bigramProbability;
    233     }
    234 }
    235 
    236 BinaryDictionaryShortcutIterator Ver4PatriciaTriePolicy::getShortcutIterator(
    237         const int wordId) const {
    238     const int shortcutPos = getShortcutPositionOfPtNode(getTerminalPtNodePosFromWordId(wordId));
    239     return BinaryDictionaryShortcutIterator(&mShortcutPolicy, shortcutPos);
    240 }
    241 
    242 int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
    243     if (ptNodePos == NOT_A_DICT_POS) {
    244         return NOT_A_DICT_POS;
    245     }
    246     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    247     if (ptNodeParams.isDeleted()) {
    248         return NOT_A_DICT_POS;
    249     }
    250     return mBuffers->getShortcutDictContent()->getShortcutListHeadPos(
    251             ptNodeParams.getTerminalId());
    252 }
    253 
    254 int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
    255     if (ptNodePos == NOT_A_DICT_POS) {
    256         return NOT_A_DICT_POS;
    257     }
    258     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
    259     if (ptNodeParams.isDeleted()) {
    260         return NOT_A_DICT_POS;
    261     }
    262     return mBuffers->getBigramDictContent()->getBigramListHeadPos(
    263             ptNodeParams.getTerminalId());
    264 }
    265 
    266 bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePoints,
    267         const UnigramProperty *const unigramProperty) {
    268     if (!mBuffers->isUpdatable()) {
    269         AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
    270         return false;
    271     }
    272     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    273         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    274                 mDictBuffer->getTailPosition());
    275         return false;
    276     }
    277     if (wordCodePoints.size() > MAX_WORD_LENGTH) {
    278         AKLOGE("The word is too long to insert to the dictionary, length: %zd",
    279                 wordCodePoints.size());
    280         return false;
    281     }
    282     for (const auto &shortcut : unigramProperty->getShortcuts()) {
    283         if (shortcut.getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
    284             AKLOGE("One of shortcut targets is too long to insert to the dictionary, length: %zd",
    285                     shortcut.getTargetCodePoints()->size());
    286             return false;
    287         }
    288     }
    289     DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    290     readingHelper.initWithPtNodeArrayPos(getRootPosition());
    291     bool addedNewUnigram = false;
    292     int codePointsToAdd[MAX_WORD_LENGTH];
    293     int codePointCountToAdd = wordCodePoints.size();
    294     memmove(codePointsToAdd, wordCodePoints.data(), sizeof(int) * codePointCountToAdd);
    295     if (unigramProperty->representsBeginningOfSentence()) {
    296         codePointCountToAdd = CharUtils::attachBeginningOfSentenceMarker(codePointsToAdd,
    297                 codePointCountToAdd, MAX_WORD_LENGTH);
    298     }
    299     if (codePointCountToAdd <= 0) {
    300         return false;
    301     }
    302     const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
    303     if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
    304             &addedNewUnigram)) {
    305         if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
    306             mEntryCounters.incrementNgramCount(NgramType::Unigram);
    307         }
    308         if (unigramProperty->getShortcuts().size() > 0) {
    309             // Add shortcut target.
    310             const int wordPos = getTerminalPtNodePosFromWordId(
    311                     getWordId(codePointArrayView, false /* forceLowerCaseSearch */));
    312             if (wordPos == NOT_A_DICT_POS) {
    313                 AKLOGE("Cannot find terminal PtNode position to add shortcut target.");
    314                 return false;
    315             }
    316             for (const auto &shortcut : unigramProperty->getShortcuts()) {
    317                 if (!mUpdatingHelper.addShortcutTarget(wordPos,
    318                         CodePointArrayView(*shortcut.getTargetCodePoints()),
    319                         shortcut.getProbability())) {
    320                     AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
    321                             "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
    322                             shortcut.getProbability());
    323                     return false;
    324                 }
    325             }
    326         }
    327         return true;
    328     } else {
    329         return false;
    330     }
    331 }
    332 
    333 bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCodePoints) {
    334     if (!mBuffers->isUpdatable()) {
    335         AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
    336         return false;
    337     }
    338     const int ptNodePos = getTerminalPtNodePosFromWordId(
    339             getWordId(wordCodePoints, false /* forceLowerCaseSearch */));
    340     if (ptNodePos == NOT_A_DICT_POS) {
    341         return false;
    342     }
    343     const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    344     return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
    345 }
    346 
    347 bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) {
    348     if (!mBuffers->isUpdatable()) {
    349         AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
    350         return false;
    351     }
    352     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    353         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    354                 mDictBuffer->getTailPosition());
    355         return false;
    356     }
    357     const NgramContext *const ngramContext = ngramProperty->getNgramContext();
    358     if (!ngramContext->isValid()) {
    359         AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
    360         return false;
    361     }
    362     if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
    363         AKLOGE("The word is too long to insert the ngram to the dictionary. "
    364                 "length: %zd", ngramProperty->getTargetCodePoints()->size());
    365         return false;
    366     }
    367     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
    368     const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
    369             false /* tryLowerCaseSearch */);
    370     if (prevWordIds.empty()) {
    371         return false;
    372     }
    373     if (prevWordIds[0] == NOT_A_WORD_ID) {
    374         if (ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) {
    375             const UnigramProperty beginningOfSentenceUnigramProperty(
    376                     true /* representsBeginningOfSentence */, true /* isNotAWord */,
    377                     false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
    378             if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
    379                     &beginningOfSentenceUnigramProperty)) {
    380                 AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
    381                 return false;
    382             }
    383             // Refresh word ids.
    384             ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
    385         } else {
    386             return false;
    387         }
    388     }
    389     const int wordPos = getTerminalPtNodePosFromWordId(getWordId(
    390             CodePointArrayView(*ngramProperty->getTargetCodePoints()),
    391                     false /* forceLowerCaseSearch */));
    392     if (wordPos == NOT_A_DICT_POS) {
    393         return false;
    394     }
    395     bool addedNewBigram = false;
    396     const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]);
    397     if (mUpdatingHelper.addNgramEntry(PtNodePosArrayView::singleElementView(&prevWordPtNodePos),
    398             wordPos, ngramProperty, &addedNewBigram)) {
    399         if (addedNewBigram) {
    400             mEntryCounters.incrementNgramCount(NgramType::Bigram);
    401         }
    402         return true;
    403     } else {
    404         return false;
    405     }
    406 }
    407 
    408 bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext,
    409         const CodePointArrayView wordCodePoints) {
    410     if (!mBuffers->isUpdatable()) {
    411         AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
    412         return false;
    413     }
    414     if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
    415         AKLOGE("The dictionary is too large to dynamically update. Dictionary size: %d",
    416                 mDictBuffer->getTailPosition());
    417         return false;
    418     }
    419     if (!ngramContext->isValid()) {
    420         AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary.");
    421         return false;
    422     }
    423     if (wordCodePoints.size() > MAX_WORD_LENGTH) {
    424         AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %zd",
    425                 wordCodePoints.size());
    426     }
    427     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
    428     const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
    429             false /* tryLowerCaseSerch */);
    430     if (prevWordIds.firstOrDefault(NOT_A_WORD_ID) == NOT_A_WORD_ID) {
    431         return false;
    432     }
    433     const int wordPos = getTerminalPtNodePosFromWordId(getWordId(wordCodePoints,
    434             false /* forceLowerCaseSearch */));
    435     if (wordPos == NOT_A_DICT_POS) {
    436         return false;
    437     }
    438     const int prevWordPtNodePos = getTerminalPtNodePosFromWordId(prevWordIds[0]);
    439     if (mUpdatingHelper.removeNgramEntry(
    440             PtNodePosArrayView::singleElementView(&prevWordPtNodePos), wordPos)) {
    441         mEntryCounters.decrementNgramCount(NgramType::Bigram);
    442         return true;
    443     } else {
    444         return false;
    445     }
    446 }
    447 
    448 
    449 bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
    450         const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints,
    451         const bool isValidWord, const HistoricalInfo historicalInfo) {
    452     if (!mBuffers->isUpdatable()) {
    453         AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
    454                 "dictionary.");
    455         return false;
    456     }
    457     const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
    458     const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
    459             false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
    460     if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
    461         AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
    462         return false;
    463     }
    464     const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
    465             ? NOT_A_PROBABILITY : probability;
    466     const NgramProperty ngramProperty(*ngramContext, wordCodePoints.toVector(), probabilityForNgram,
    467             historicalInfo);
    468     if (!addNgramEntry(&ngramProperty)) {
    469         AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
    470         return false;
    471     }
    472     return true;
    473 }
    474 
    475 bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
    476     if (!mBuffers->isUpdatable()) {
    477         AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
    478         return false;
    479     }
    480     if (!mWritingHelper.writeToDictFile(filePath, mEntryCounters.getEntryCounts())) {
    481         AKLOGE("Cannot flush the dictionary to file.");
    482         mIsCorrupted = true;
    483         return false;
    484     }
    485     return true;
    486 }
    487 
    488 bool Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
    489     if (!mBuffers->isUpdatable()) {
    490         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
    491         return false;
    492     }
    493     if (!mWritingHelper.writeToDictFileWithGC(getRootPosition(), filePath)) {
    494         AKLOGE("Cannot flush the dictionary to file with GC.");
    495         mIsCorrupted = true;
    496         return false;
    497     }
    498     return true;
    499 }
    500 
    501 bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
    502     if (!mBuffers->isUpdatable()) {
    503         AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
    504         return false;
    505     }
    506     if (mBuffers->isNearSizeLimit()) {
    507         // Additional buffer size is near the limit.
    508         return true;
    509     } else if (mHeaderPolicy->getExtendedRegionSize() + mDictBuffer->getUsedAdditionalBufferSize()
    510             > Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE) {
    511         // Total extended region size of the trie exceeds the limit.
    512         return true;
    513     } else if (mDictBuffer->getTailPosition() >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
    514             && mDictBuffer->getUsedAdditionalBufferSize() > 0) {
    515         // Needs to reduce dictionary size.
    516         return true;
    517     } else if (mHeaderPolicy->isDecayingDict()) {
    518         return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(),
    519                 mHeaderPolicy);
    520     }
    521     return false;
    522 }
    523 
    524 void Ver4PatriciaTriePolicy::getProperty(const char *const query, const int queryLength,
    525         char *const outResult, const int maxResultLength) {
    526     const int compareLength = queryLength + 1 /* terminator */;
    527     if (strncmp(query, UNIGRAM_COUNT_QUERY, compareLength) == 0) {
    528         snprintf(outResult, maxResultLength, "%d",
    529                 mEntryCounters.getNgramCount(NgramType::Unigram));
    530     } else if (strncmp(query, BIGRAM_COUNT_QUERY, compareLength) == 0) {
    531         snprintf(outResult, maxResultLength, "%d", mEntryCounters.getNgramCount(NgramType::Bigram));
    532     } else if (strncmp(query, MAX_UNIGRAM_COUNT_QUERY, compareLength) == 0) {
    533         snprintf(outResult, maxResultLength, "%d",
    534                 mHeaderPolicy->isDecayingDict() ?
    535                         ForgettingCurveUtils::getEntryCountHardLimit(
    536                                 mHeaderPolicy->getMaxNgramCounts().getNgramCount(
    537                                         NgramType::Unigram)) :
    538                         static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
    539     } else if (strncmp(query, MAX_BIGRAM_COUNT_QUERY, compareLength) == 0) {
    540         snprintf(outResult, maxResultLength, "%d",
    541                 mHeaderPolicy->isDecayingDict() ?
    542                         ForgettingCurveUtils::getEntryCountHardLimit(
    543                                 mHeaderPolicy->getMaxNgramCounts().getNgramCount(
    544                                         NgramType::Bigram)) :
    545                         static_cast<int>(Ver4DictConstants::MAX_DICTIONARY_SIZE));
    546     }
    547 }
    548 
    549 const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
    550         const CodePointArrayView wordCodePoints) const {
    551     const int ptNodePos = getTerminalPtNodePosFromWordId(
    552             getWordId(wordCodePoints, false /* forceLowerCaseSearch */));
    553     if (ptNodePos == NOT_A_DICT_POS) {
    554         AKLOGE("getWordProperty is called for invalid word.");
    555         return WordProperty();
    556     }
    557     const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    558     const ProbabilityEntry probabilityEntry =
    559             mBuffers->getProbabilityDictContent()->getProbabilityEntry(
    560                     ptNodeParams.getTerminalId());
    561     const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
    562     // Fetch bigram information.
    563     std::vector<NgramProperty> ngrams;
    564     const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
    565     if (bigramListPos != NOT_A_DICT_POS) {
    566         int bigramWord1CodePoints[MAX_WORD_LENGTH];
    567         const BigramDictContent *const bigramDictContent = mBuffers->getBigramDictContent();
    568         const TerminalPositionLookupTable *const terminalPositionLookupTable =
    569                 mBuffers->getTerminalPositionLookupTable();
    570         bool hasNext = true;
    571         int readingPos = bigramListPos;
    572         while (hasNext) {
    573             const BigramEntry bigramEntry =
    574                     bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    575             hasNext = bigramEntry.hasNext();
    576             const int word1TerminalId = bigramEntry.getTargetTerminalId();
    577             const int word1TerminalPtNodePos =
    578                     terminalPositionLookupTable->getTerminalPtNodePosition(word1TerminalId);
    579             if (word1TerminalPtNodePos == NOT_A_DICT_POS) {
    580                 continue;
    581             }
    582             const int codePointCount = getCodePointsAndReturnCodePointCount(
    583                     getWordIdFromTerminalPtNodePos(word1TerminalPtNodePos), MAX_WORD_LENGTH,
    584                     bigramWord1CodePoints);
    585             const HistoricalInfo *const historicalInfo = bigramEntry.getHistoricalInfo();
    586             const int rawBigramProbability = bigramEntry.hasHistoricalInfo()
    587                     ? ForgettingCurveUtils::decodeProbability(
    588                             bigramEntry.getHistoricalInfo(), mHeaderPolicy)
    589                     : bigramEntry.getProbability();
    590             const int probability = getBigramConditionalProbability(ptNodeParams.getProbability(),
    591                     ptNodeParams.representsBeginningOfSentence(), rawBigramProbability);
    592             ngrams.emplace_back(
    593                     NgramContext(wordCodePoints.data(), wordCodePoints.size(),
    594                             ptNodeParams.representsBeginningOfSentence()),
    595                     CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
    596                     probability, *historicalInfo);
    597         }
    598     }
    599     // Fetch shortcut information.
    600     std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    601     int shortcutPos = getShortcutPositionOfPtNode(ptNodePos);
    602     if (shortcutPos != NOT_A_DICT_POS) {
    603         int shortcutTarget[MAX_WORD_LENGTH];
    604         const ShortcutDictContent *const shortcutDictContent =
    605                 mBuffers->getShortcutDictContent();
    606         bool hasNext = true;
    607         while (hasNext) {
    608             int shortcutTargetLength = 0;
    609             int shortcutProbability = NOT_A_PROBABILITY;
    610             shortcutDictContent->getShortcutEntryAndAdvancePosition(MAX_WORD_LENGTH, shortcutTarget,
    611                     &shortcutTargetLength, &shortcutProbability, &hasNext, &shortcutPos);
    612             shortcuts.emplace_back(
    613                     CodePointArrayView(shortcutTarget, shortcutTargetLength).toVector(),
    614                     shortcutProbability);
    615         }
    616     }
    617     const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
    618             ptNodeParams.isNotAWord(), ptNodeParams.isPossiblyOffensive(),
    619             ptNodeParams.getProbability(), *historicalInfo, std::move(shortcuts));
    620     return WordProperty(wordCodePoints.toVector(), unigramProperty, ngrams);
    621 }
    622 
    623 int Ver4PatriciaTriePolicy::getNextWordAndNextToken(const int token, int *const outCodePoints,
    624         int *const outCodePointCount) {
    625     *outCodePointCount = 0;
    626     if (token == 0) {
    627         mTerminalPtNodePositionsForIteratingWords.clear();
    628         DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions traversePolicy(
    629                 &mTerminalPtNodePositionsForIteratingWords);
    630         DynamicPtReadingHelper readingHelper(&mNodeReader, &mPtNodeArrayReader);
    631         readingHelper.initWithPtNodeArrayPos(getRootPosition());
    632         readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(&traversePolicy);
    633     }
    634     const int terminalPtNodePositionsVectorSize =
    635             static_cast<int>(mTerminalPtNodePositionsForIteratingWords.size());
    636     if (token < 0 || token >= terminalPtNodePositionsVectorSize) {
    637         AKLOGE("Given token %d is invalid.", token);
    638         return 0;
    639     }
    640     const int terminalPtNodePos = mTerminalPtNodePositionsForIteratingWords[token];
    641     *outCodePointCount = getCodePointsAndReturnCodePointCount(
    642             getWordIdFromTerminalPtNodePos(terminalPtNodePos), MAX_WORD_LENGTH, outCodePoints);
    643     const int nextToken = token + 1;
    644     if (nextToken >= terminalPtNodePositionsVectorSize) {
    645         // All words have been iterated.
    646         mTerminalPtNodePositionsForIteratingWords.clear();
    647         return 0;
    648     }
    649     return nextToken;
    650 }
    651 
    652 int Ver4PatriciaTriePolicy::getWordIdFromTerminalPtNodePos(const int ptNodePos) const {
    653     return ptNodePos == NOT_A_DICT_POS ? NOT_A_WORD_ID : ptNodePos;
    654 }
    655 
    656 int Ver4PatriciaTriePolicy::getTerminalPtNodePosFromWordId(const int wordId) const {
    657     return wordId == NOT_A_WORD_ID ? NOT_A_DICT_POS : wordId;
    658 }
    659 
    660 } // namespace v402
    661 } // namespace backward
    662 } // namespace latinime
    663