Home | History | Annotate | Download | only in dictionary
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
     18 
     19 #include "suggest/core/policy/dictionary_header_structure_policy.h"
     20 #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
     21 
     22 namespace latinime {
     23 
     24 bool DynamicPatriciaTrieGcEventListeners
     25         ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
     26                 ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
     27                         const int *const nodeCodePoints) {
     28     // PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
     29     // children.
     30     bool isUselessPtNode = !node->isTerminal();
     31     if (node->isTerminal() && mIsDecayingDict) {
     32         const int newProbability =
     33                 ForgettingCurveUtils::getEncodedProbabilityToSave(node->getProbability(),
     34                         mHeaderPolicy);
     35         int writingPos = node->getProbabilityFieldPos();
     36         // Update probability.
     37         if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
     38                 mBuffer, newProbability, &writingPos)) {
     39             return false;
     40         }
     41         if (!ForgettingCurveUtils::isValidEncodedProbability(newProbability)) {
     42             isUselessPtNode = true;
     43         }
     44     }
     45     if (mChildrenValue > 0) {
     46         isUselessPtNode = false;
     47     } else if (node->isTerminal()) {
     48         // Remove children as all children are useless.
     49         int writingPos = node->getChildrenPosFieldPos();
     50         if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(
     51                 mBuffer, NOT_A_DICT_POS /* childrenPosition */, &writingPos)) {
     52             return false;
     53         }
     54     }
     55     if (isUselessPtNode) {
     56         // Current PtNode is no longer needed. Mark it as deleted.
     57         if (!mWritingHelper->markNodeAsDeleted(node)) {
     58             return false;
     59         }
     60     } else {
     61         mValueStack.back() += 1;
     62         if (node->isTerminal()) {
     63             mValidUnigramCount += 1;
     64         }
     65     }
     66     return true;
     67 }
     68 
     69 bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
     70         ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
     71                 const int *const nodeCodePoints) {
     72     if (!node->isDeleted()) {
     73         int pos = node->getBigramsPos();
     74         if (pos != NOT_A_DICT_POS) {
     75             int bigramEntryCount = 0;
     76             if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
     77                     &bigramEntryCount)) {
     78                 return false;
     79             }
     80             mValidBigramEntryCount += bigramEntryCount;
     81         }
     82     }
     83     return true;
     84 }
     85 
     86 // Writes dummy PtNode array size when the head of PtNode array is read.
     87 bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
     88         ::onDescend(const int ptNodeArrayPos) {
     89     mValidPtNodeCount = 0;
     90     int writingPos = mBufferToWrite->getTailPosition();
     91     mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.insert(
     92             DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::value_type(
     93                     ptNodeArrayPos, writingPos));
     94     // Writes dummy PtNode array size because arrays can have a forward link or needles PtNodes.
     95     // This field will be updated later in onReadingPtNodeArrayTail() with actual PtNode count.
     96     mPtNodeArraySizeFieldPos = writingPos;
     97     return DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
     98             mBufferToWrite, 0 /* arraySize */, &writingPos);
     99 }
    100 
    101 // Write PtNode array terminal and actual PtNode array size.
    102 bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
    103         ::onReadingPtNodeArrayTail() {
    104     int writingPos = mBufferToWrite->getTailPosition();
    105     // Write PtNode array terminal.
    106     if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(
    107             mBufferToWrite, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
    108         return false;
    109     }
    110     // Write actual PtNode array size.
    111     if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(
    112             mBufferToWrite, mValidPtNodeCount, &mPtNodeArraySizeFieldPos)) {
    113         return false;
    114     }
    115     return true;
    116 }
    117 
    118 // Write valid PtNode to buffer and memorize mapping from the old position to the new position.
    119 bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
    120         ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
    121                 const int *const nodeCodePoints) {
    122     if (node->isDeleted()) {
    123         // Current PtNode is not written in new buffer because it has been deleted.
    124         mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
    125                 DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
    126                         node->getHeadPos(), NOT_A_DICT_POS));
    127         return true;
    128     }
    129     int writingPos = mBufferToWrite->getTailPosition();
    130     mDictPositionRelocationMap->mPtNodePositionRelocationMap.insert(
    131             DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::value_type(
    132                     node->getHeadPos(), writingPos));
    133     mValidPtNodeCount++;
    134     // Writes current PtNode.
    135     return mWritingHelper->writePtNodeToBufferByCopyingPtNodeInfo(mBufferToWrite, node,
    136             node->getParentPos(), nodeCodePoints, node->getCodePointCount(),
    137             node->getProbability(), &writingPos);
    138 }
    139 
    140 bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields
    141         ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
    142                 const int *const nodeCodePoints) {
    143     // Updates parent position.
    144     int parentPos = node->getParentPos();
    145     if (parentPos != NOT_A_DICT_POS) {
    146         DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it =
    147                 mDictPositionRelocationMap->mPtNodePositionRelocationMap.find(parentPos);
    148         if (it != mDictPositionRelocationMap->mPtNodePositionRelocationMap.end()) {
    149             parentPos = it->second;
    150         }
    151     }
    152     int writingPos = node->getHeadPos() + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE;
    153     // Write updated parent offset.
    154     if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(mBufferToWrite,
    155             parentPos, node->getHeadPos(), &writingPos)) {
    156         return false;
    157     }
    158 
    159     // Updates children position.
    160     int childrenPos = node->getChildrenPos();
    161     if (childrenPos != NOT_A_DICT_POS) {
    162         DynamicPatriciaTrieWritingHelper::PtNodeArrayPositionRelocationMap::const_iterator it =
    163                 mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.find(childrenPos);
    164         if (it != mDictPositionRelocationMap->mPtNodeArrayPositionRelocationMap.end()) {
    165             childrenPos = it->second;
    166         }
    167     }
    168     writingPos = node->getChildrenPosFieldPos();
    169     if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBufferToWrite,
    170             childrenPos, &writingPos)) {
    171         return false;
    172     }
    173 
    174     // Updates bigram target PtNode positions in the bigram list.
    175     int bigramsPos = node->getBigramsPos();
    176     if (bigramsPos != NOT_A_DICT_POS) {
    177         int bigramEntryCount;
    178         if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
    179                 &mDictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) {
    180             return false;
    181         }
    182         mBigramCount += bigramEntryCount;
    183     }
    184     if (node->isTerminal()) {
    185         mUnigramCount++;
    186     }
    187 
    188     return true;
    189 }
    190 
    191 } // namespace latinime
    192