Home | History | Annotate | Download | only in dictionary
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
     18 #define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
     19 
     20 #include <stdint.h>
     21 
     22 #include "defines.h"
     23 #include "utils/hash_map_compat.h"
     24 
     25 namespace latinime {
     26 
     27 class BufferWithExtendableBuffer;
     28 class DynamicBigramListPolicy;
     29 class DynamicPatriciaTrieNodeReader;
     30 class DynamicPatriciaTrieReadingHelper;
     31 class DynamicShortcutListPolicy;
     32 class HeaderPolicy;
     33 
     34 class DynamicPatriciaTrieWritingHelper {
     35  public:
     36     typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap;
     37     typedef hash_map_compat<int, int> PtNodePositionRelocationMap;
     38     struct DictPositionRelocationMap {
     39      public:
     40         DictPositionRelocationMap()
     41                 : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {}
     42 
     43         PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap;
     44         PtNodePositionRelocationMap mPtNodePositionRelocationMap;
     45 
     46      private:
     47         DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
     48     };
     49 
     50     static const size_t MAX_DICTIONARY_SIZE;
     51 
     52     DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
     53             DynamicBigramListPolicy *const bigramPolicy,
     54             DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay)
     55             : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
     56               mNeedsToDecay(needsToDecay) {}
     57 
     58     ~DynamicPatriciaTrieWritingHelper() {}
     59 
     60     // Add a word to the dictionary. If the word already exists, update the probability.
     61     bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
     62             const int *const wordCodePoints, const int codePointCount, const int probability,
     63             bool *const outAddedNewUnigram);
     64 
     65     // Add a bigram relation from word0Pos to word1Pos.
     66     bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
     67             bool *const outAddedNewBigram);
     68 
     69     // Remove a bigram relation from word0Pos to word1Pos.
     70     bool removeBigramWords(const int word0Pos, const int word1Pos);
     71 
     72     void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
     73             const int unigramCount, const int bigramCount);
     74 
     75     void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
     76             const HeaderPolicy *const headerPolicy);
     77 
     78     // CAVEAT: This method must be called only from inner classes of
     79     // DynamicPatriciaTrieGcEventListeners.
     80     bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate);
     81 
     82     // CAVEAT: This method must be called only from this class or inner classes of
     83     // DynamicPatriciaTrieGcEventListeners.
     84     bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite,
     85             const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
     86             const int *const codePoints, const int codePointCount, const int probability,
     87             int *const writingPos);
     88 
     89  private:
     90     DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
     91 
     92     static const int CHILDREN_POSITION_FIELD_SIZE;
     93 
     94     BufferWithExtendableBuffer *const mBuffer;
     95     DynamicBigramListPolicy *const mBigramPolicy;
     96     DynamicShortcutListPolicy *const mShortcutPolicy;
     97     const bool mNeedsToDecay;
     98 
     99     bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
    100             const int movedPos, const int bigramLinkedNodePos);
    101 
    102     bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
    103             const bool isBlacklisted, const bool isNotAWord,
    104             const int parentPos,  const int *const codePoints, const int codePointCount,
    105             const int probability, const int childrenPos, const int originalBigramListPos,
    106             const int originalShortcutListPos, int *const writingPos);
    107 
    108     bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
    109             const int parentPos, const int *const codePoints, const int codePointCount,
    110             const int probability, int *const writingPos);
    111 
    112     bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
    113             const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
    114 
    115     bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
    116             const int probability, const int *const codePoints, bool *const outAddedNewUnigram);
    117 
    118     bool createChildrenPtNodeArrayAndAChildPtNode(
    119             const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
    120             const int *const codePoints, const int codePointCount);
    121 
    122     bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
    123             const int nodeCodePointCount, const int probability);
    124 
    125     bool reallocatePtNodeAndAddNewPtNodes(
    126             const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
    127             const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
    128             const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
    129             const int newNodeCodePointCount);
    130 
    131     bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy,
    132             BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
    133             int *const outBigramCount);
    134 
    135     int getUpdatedProbability(const int originalProbability, const int newProbability);
    136 };
    137 } // namespace latinime
    138 #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
    139