Home | History | Annotate | Download | only in dictionary
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
     18 
     19 #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
     20 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
     21 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
     22 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
     23 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
     24 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h"
     25 #include "suggest/policyimpl/dictionary/header/header_policy.h"
     26 #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
     27 #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
     28 #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
     29 #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
     30 #include "utils/hash_map_compat.h"
     31 
     32 namespace latinime {
     33 
     34 const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
     35 // TODO: Make MAX_DICTIONARY_SIZE 8MB.
     36 const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024;
     37 
     38 bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
     39         DynamicPatriciaTrieReadingHelper *const readingHelper,
     40         const int *const wordCodePoints, const int codePointCount, const int probability,
     41         bool *const outAddedNewUnigram) {
     42     int parentPos = NOT_A_DICT_POS;
     43     while (!readingHelper->isEnd()) {
     44         const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
     45         if (!readingHelper->isMatchedCodePoint(0 /* index */,
     46                 wordCodePoints[matchedCodePointCount])) {
     47             // The first code point is different from target code point. Skip this node and read
     48             // the next sibling node.
     49             readingHelper->readNextSiblingNode();
     50             continue;
     51         }
     52         // Check following merged node code points.
     53         const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader();
     54         const int nodeCodePointCount = nodeReader->getCodePointCount();
     55         for (int j = 1; j < nodeCodePointCount; ++j) {
     56             const int nextIndex = matchedCodePointCount + j;
     57             if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
     58                     wordCodePoints[matchedCodePointCount + j])) {
     59                 *outAddedNewUnigram = true;
     60                 return reallocatePtNodeAndAddNewPtNodes(nodeReader,
     61                         readingHelper->getMergedNodeCodePoints(), j,
     62                         getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
     63                                 probability),
     64                         wordCodePoints + matchedCodePointCount,
     65                         codePointCount - matchedCodePointCount);
     66             }
     67         }
     68         // All characters are matched.
     69         if (codePointCount == readingHelper->getTotalCodePointCount()) {
     70             return setPtNodeProbability(nodeReader, probability,
     71                     readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
     72         }
     73         if (!nodeReader->hasChildren()) {
     74             *outAddedNewUnigram = true;
     75             return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
     76                     getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
     77                     wordCodePoints + readingHelper->getTotalCodePointCount(),
     78                     codePointCount - readingHelper->getTotalCodePointCount());
     79         }
     80         // Advance to the children nodes.
     81         parentPos = nodeReader->getHeadPos();
     82         readingHelper->readChildNode();
     83     }
     84     if (readingHelper->isError()) {
     85         // The dictionary is invalid.
     86         return false;
     87     }
     88     int pos = readingHelper->getPosOfLastForwardLinkField();
     89     *outAddedNewUnigram = true;
     90     return createAndInsertNodeIntoPtNodeArray(parentPos,
     91             wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
     92             codePointCount - readingHelper->getPrevTotalCodePointCount(),
     93             getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
     94 }
     95 
     96 bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
     97         const int probability, bool *const outAddedNewBigram) {
     98     int mMergedNodeCodePoints[MAX_WORD_LENGTH];
     99     DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
    100     nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
    101             mMergedNodeCodePoints);
    102     // Move node to add bigram entry.
    103     const int newNodePos = mBuffer->getTailPosition();
    104     if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) {
    105         return false;
    106     }
    107     int writingPos = newNodePos;
    108     // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer.
    109     if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(),
    110             mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(),
    111             &writingPos)) {
    112         return false;
    113     }
    114     nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos);
    115     if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
    116         // Insert a new bigram entry into the existing bigram list.
    117         int bigramListPos = nodeReader.getBigramsPos();
    118         return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
    119                 outAddedNewBigram);
    120     } else {
    121         // The PtNode doesn't have a bigram list.
    122         *outAddedNewBigram = true;
    123         // First, Write a bigram entry at the tail position of the PtNode.
    124         if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
    125             return false;
    126         }
    127         // Then, Mark as the PtNode having bigram list in the flags.
    128         const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
    129                 PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(),
    130                         nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY,
    131                         nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */,
    132                         nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE);
    133         writingPos = newNodePos;
    134         // Write updated flags into the moved PtNode's flags field.
    135         return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
    136                 &writingPos);
    137     }
    138 }
    139 
    140 // Remove a bigram relation from word0Pos to word1Pos.
    141 bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) {
    142     DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
    143     nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos);
    144     if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) {
    145         return false;
    146     }
    147     return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
    148 }
    149 
    150 void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
    151         const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
    152     BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
    153     const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
    154             mBuffer->getUsedAdditionalBufferSize();
    155     if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
    156             false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
    157         return;
    158     }
    159     DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
    160 }
    161 
    162 void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
    163         const char *const fileName, const HeaderPolicy *const headerPolicy) {
    164     BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
    165             MAX_DICTIONARY_SIZE);
    166     int unigramCount = 0;
    167     int bigramCount = 0;
    168     if (mNeedsToDecay) {
    169         ForgettingCurveUtils::sTimeKeeper.setCurrentTime();
    170     }
    171     if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) {
    172         return;
    173     }
    174     BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
    175     if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
    176             mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
    177         return;
    178     }
    179     DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
    180 }
    181 
    182 bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted(
    183         const DynamicPatriciaTrieNodeReader *const nodeToUpdate) {
    184     int pos = nodeToUpdate->getHeadPos();
    185     const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
    186     const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
    187     if (usesAdditionalBuffer) {
    188         pos -= mBuffer->getOriginalBufferSize();
    189     }
    190     // Read original flags
    191     const PatriciaTrieReadingUtils::NodeFlags originalFlags =
    192             PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
    193     const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
    194             DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */,
    195                     true /* isDeleted */);
    196     int writingPos = nodeToUpdate->getHeadPos();
    197     // Update flags.
    198     return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
    199             &writingPos);
    200 }
    201 
    202 bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
    203         const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
    204         const int bigramLinkedNodePos) {
    205     int pos = originalNode->getHeadPos();
    206     const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos);
    207     const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
    208     if (usesAdditionalBuffer) {
    209         pos -= mBuffer->getOriginalBufferSize();
    210     }
    211     // Read original flags
    212     const PatriciaTrieReadingUtils::NodeFlags originalFlags =
    213             PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos);
    214     const PatriciaTrieReadingUtils::NodeFlags updatedFlags =
    215             DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */,
    216                     false /* isDeleted */);
    217     int writingPos = originalNode->getHeadPos();
    218     // Update flags.
    219     if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags,
    220             &writingPos)) {
    221         return false;
    222     }
    223     // Update moved position, which is stored in the parent offset field.
    224     if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
    225             mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) {
    226         return false;
    227     }
    228     // Update bigram linked node position, which is stored in the children position field.
    229     int childrenPosFieldPos = originalNode->getChildrenPosFieldPos();
    230     if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
    231             mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) {
    232         return false;
    233     }
    234     if (originalNode->hasChildren()) {
    235         // Update children's parent position.
    236         DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
    237         const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader();
    238         readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos());
    239         while (!readingHelper.isEnd()) {
    240             int parentOffsetFieldPos = nodeReader->getHeadPos()
    241                     + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
    242             if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(
    243                     mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(),
    244                     &parentOffsetFieldPos)) {
    245                 // Parent offset cannot be written because of a bug or a broken dictionary; thus,
    246                 // we give up to update dictionary.
    247                 return false;
    248             }
    249             readingHelper.readNextSiblingNode();
    250         }
    251     }
    252     return true;
    253 }
    254 
    255 // Write new PtNode at writingPos.
    256 bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(
    257         BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted,
    258         const bool isNotAWord, const int parentPos, const int *const codePoints,
    259         const int codePointCount, const int probability, const int childrenPos,
    260         const int originalBigramListPos, const int originalShortcutListPos,
    261         int *const writingPos) {
    262     const int nodePos = *writingPos;
    263     // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the
    264     // PtNode writing.
    265     if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite,
    266             0 /* nodeFlags */, writingPos)) {
    267         return false;
    268     }
    269     // Calculate a parent offset and write the offset.
    270     if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite,
    271             parentPos, nodePos, writingPos)) {
    272         return false;
    273     }
    274     // Write code points
    275     if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite,
    276             codePoints, codePointCount, writingPos)) {
    277         return false;
    278     }
    279     // Write probability when the probability is a valid probability, which means this node is
    280     // terminal.
    281     if (probability != NOT_A_PROBABILITY) {
    282         if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite,
    283                 probability, writingPos)) {
    284             return false;
    285         }
    286     }
    287     // Write children position
    288     if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite,
    289             childrenPos, writingPos)) {
    290         return false;
    291     }
    292     // Copy shortcut list when the originalShortcutListPos is valid dictionary position.
    293     if (originalShortcutListPos != NOT_A_DICT_POS) {
    294         int fromPos = originalShortcutListPos;
    295         if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos,
    296                 writingPos)) {
    297             return false;
    298         }
    299     }
    300     // Copy bigram list when the originalBigramListPos is valid dictionary position.
    301     int bigramCount = 0;
    302     if (originalBigramListPos != NOT_A_DICT_POS) {
    303         int fromPos = originalBigramListPos;
    304         if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) {
    305             return false;
    306         }
    307     }
    308     // Create node flags and write them.
    309     PatriciaTrieReadingUtils::NodeFlags nodeFlags =
    310             PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord,
    311                     probability != NOT_A_PROBABILITY /* isTerminal */,
    312                     originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */,
    313                     bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
    314                     CHILDREN_POSITION_FIELD_SIZE);
    315     int flagsFieldPos = nodePos;
    316     if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags,
    317             &flagsFieldPos)) {
    318         return false;
    319     }
    320     return true;
    321 }
    322 
    323 bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(
    324         BufferWithExtendableBuffer *const bufferToWrite, const int parentPos,
    325         const int *const codePoints, const int codePointCount, const int probability,
    326         int *const writingPos) {
    327     return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */,
    328             false /* isNotAWord */, parentPos, codePoints, codePointCount, probability,
    329             NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
    330             NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
    331 }
    332 
    333 bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
    334         BufferWithExtendableBuffer *const bufferToWrite,
    335         const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
    336         const int *const codePoints, const int codePointCount, const int probability,
    337         int *const writingPos) {
    338     return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(),
    339             originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
    340             originalNode->getChildrenPos(), originalNode->getBigramsPos(),
    341             originalNode->getShortcutPos(), writingPos);
    342 }
    343 
    344 bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
    345         const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
    346         int *const forwardLinkFieldPos) {
    347     const int newPtNodeArrayPos = mBuffer->getTailPosition();
    348     if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
    349             newPtNodeArrayPos, forwardLinkFieldPos)) {
    350         return false;
    351     }
    352     return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
    353             probability);
    354 }
    355 
    356 bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
    357         const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
    358         const int *const codePoints, bool *const outAddedNewUnigram) {
    359     if (originalPtNode->isTerminal()) {
    360         // Overwrites the probability.
    361         *outAddedNewUnigram = false;
    362         const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
    363                 probability);
    364         int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
    365         if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
    366                 probabilityToWrite, &probabilityFieldPos)) {
    367             return false;
    368         }
    369     } else {
    370         // Make the node terminal and write the probability.
    371         *outAddedNewUnigram = true;
    372         int movedPos = mBuffer->getTailPosition();
    373         if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
    374             return false;
    375         }
    376         if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
    377                 originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
    378                 getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
    379                 &movedPos)) {
    380             return false;
    381         }
    382     }
    383     return true;
    384 }
    385 
    386 bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode(
    387         const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
    388         const int *const codePoints, const int codePointCount) {
    389     const int newPtNodeArrayPos = mBuffer->getTailPosition();
    390     int childrenPosFieldPos = parentNode->getChildrenPosFieldPos();
    391     if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
    392             newPtNodeArrayPos, &childrenPosFieldPos)) {
    393         return false;
    394     }
    395     return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints,
    396             codePointCount, probability);
    397 }
    398 
    399 bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
    400         const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
    401         const int probability) {
    402     int writingPos = mBuffer->getTailPosition();
    403     if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
    404             1 /* arraySize */, &writingPos)) {
    405         return false;
    406     }
    407     if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount,
    408             probability, &writingPos)) {
    409         return false;
    410     }
    411     if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
    412             NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
    413         return false;
    414     }
    415     return true;
    416 }
    417 
    418 // Returns whether the dictionary updating was succeeded or not.
    419 bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
    420         const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
    421         const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
    422         const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
    423         const int newNodeCodePointCount) {
    424     // When addsExtraChild is true, split the reallocating PtNode and add new child.
    425     // Reallocating PtNode: abcde, newNode: abcxy.
    426     // abc (1st, not terminal) __ de (2nd)
    427     //                         \_ xy (extra child, terminal)
    428     // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
    429     // Reallocating PtNode: abcde, newNode: abc.
    430     // abc (1st, terminal) __ de (2nd)
    431     const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
    432     const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
    433     int writingPos = firstPartOfReallocatedPtNodePos;
    434     // Write the 1st part of the reallocating node. The children position will be updated later
    435     // with actual children position.
    436     const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
    437     if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(),
    438             reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability,
    439             &writingPos)) {
    440         return false;
    441     }
    442     const int actualChildrenPos = writingPos;
    443     // Create new children PtNode array.
    444     const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
    445     if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
    446             newPtNodeCount, &writingPos)) {
    447         return false;
    448     }
    449     // Write the 2nd part of the reallocating node.
    450     const int secondPartOfReallocatedPtNodePos = writingPos;
    451     if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode,
    452             firstPartOfReallocatedPtNodePos,
    453             reallocatingPtNodeCodePoints + overlappingCodePointCount,
    454             reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
    455             reallocatingPtNode->getProbability(), &writingPos)) {
    456         return false;
    457     }
    458     if (addsExtraChild) {
    459         if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos,
    460                 newNodeCodePoints + overlappingCodePointCount,
    461                 newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
    462                 &writingPos)) {
    463             return false;
    464         }
    465     }
    466     if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
    467             NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
    468         return false;
    469     }
    470     // Update original reallocatingPtNode as moved.
    471     if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos,
    472             secondPartOfReallocatedPtNodePos)) {
    473         return false;
    474     }
    475     // Load node info. Information of the 1st part will be fetched.
    476     DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
    477     nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos);
    478     // Update children position.
    479     int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
    480     if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
    481             actualChildrenPos, &childrenPosFieldPos)) {
    482         return false;
    483     }
    484     return true;
    485 }
    486 
    487 bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
    488         const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite,
    489         int *const outUnigramCount, int *const outBigramCount) {
    490     DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
    491     readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
    492     DynamicPatriciaTrieGcEventListeners
    493             ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
    494                     traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
    495                             headerPolicy, this, mBuffer, mNeedsToDecay);
    496     if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
    497             &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
    498         return false;
    499     }
    500     if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
    501             .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
    502         // TODO: Remove more unigrams.
    503     }
    504 
    505     readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
    506     DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
    507             traversePolicyToUpdateBigramProbability(mBigramPolicy);
    508     if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
    509             &traversePolicyToUpdateBigramProbability)) {
    510         return false;
    511     }
    512     if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
    513             > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
    514         // TODO: Remove more bigrams.
    515     }
    516 
    517     // Mapping from positions in mBuffer to positions in bufferToWrite.
    518     DictPositionRelocationMap dictPositionRelocationMap;
    519     readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
    520     DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
    521             traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite,
    522                     &dictPositionRelocationMap);
    523     if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
    524             &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
    525         return false;
    526     }
    527 
    528     // Create policy instance for the GCed dictionary.
    529     DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
    530     DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy,
    531             mNeedsToDecay);
    532     // Create reading helper for the GCed dictionary.
    533     DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
    534             &newDictShortcutPolicy);
    535     newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
    536     DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
    537             traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite,
    538                     &dictPositionRelocationMap);
    539     if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
    540             &traversePolicyToUpdateAllPositionFields)) {
    541         return false;
    542     }
    543     *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
    544     *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
    545     return true;
    546 }
    547 
    548 int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
    549         const int newProbability) {
    550     if (mNeedsToDecay) {
    551         return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability,
    552                 newProbability);
    553     } else {
    554         return newProbability;
    555     }
    556 }
    557 
    558 } // namespace latinime
    559