Home | History | Annotate | Download | only in v402
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
     19  * Do not edit this file other than updating policy's interface.
     20  *
     21  * This file was generated from
     22  *   dictionary/structure/v4/ver4_patricia_trie_policy.h
     23  */
     24 
     25 #ifndef LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
     26 #define LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
     27 
     28 #include <vector>
     29 
     30 #include "defines.h"
     31 #include "dictionary/header/header_policy.h"
     32 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
     33 #include "dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
     34 #include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
     35 #include "dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
     36 #include "dictionary/structure/backward/v402/ver4_dict_buffers.h"
     37 #include "dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
     38 #include "dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
     39 #include "dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
     40 #include "dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
     41 #include "dictionary/utils/binary_dictionary_bigrams_iterator.h"
     42 #include "dictionary/utils/binary_dictionary_shortcut_iterator.h"
     43 #include "dictionary/utils/buffer_with_extendable_buffer.h"
     44 #include "dictionary/utils/entry_counters.h"
     45 #include "utils/int_array_view.h"
     46 
     47 namespace latinime {
     48 namespace backward {
     49 namespace v402 {
     50 
     51 } // namespace v402
     52 } // namespace backward
     53 class DicNode;
     54 namespace backward {
     55 namespace v402 {
     56 } // namespace v402
     57 } // namespace backward
     58 class DicNodeVector;
     59 namespace backward {
     60 namespace v402 {
     61 
     62 // Word id = Position of a PtNode that represents the word.
     63 // Max supported n-gram is bigram.
     64 class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
     65  public:
     66     Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)
     67             : mBuffers(std::move(buffers)), mHeaderPolicy(mBuffers->getHeaderPolicy()),
     68               mDictBuffer(mBuffers->getWritableTrieBuffer()),
     69               mBigramPolicy(mBuffers->getMutableBigramDictContent(),
     70                       mBuffers->getTerminalPositionLookupTable(), mHeaderPolicy),
     71               mShortcutPolicy(mBuffers->getMutableShortcutDictContent(),
     72                       mBuffers->getTerminalPositionLookupTable()),
     73               mNodeReader(mDictBuffer, mBuffers->getProbabilityDictContent(), mHeaderPolicy),
     74               mPtNodeArrayReader(mDictBuffer),
     75               mNodeWriter(mDictBuffer, mBuffers.get(), mHeaderPolicy, &mNodeReader,
     76                       &mPtNodeArrayReader, &mBigramPolicy, &mShortcutPolicy),
     77               mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter),
     78               mWritingHelper(mBuffers.get()),
     79               mEntryCounters(mHeaderPolicy->getNgramCounts().getCountArray()),
     80               mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {};
     81 
     82     virtual int getRootPosition() const {
     83         return 0;
     84     }
     85 
     86     void createAndGetAllChildDicNodes(const DicNode *const dicNode,
     87             DicNodeVector *const childDicNodes) const;
     88 
     89     int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount,
     90             int *const outCodePoints) const;
     91 
     92     int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
     93 
     94     const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds,
     95             const int wordId, MultiBigramMap *const multiBigramMap) const;
     96 
     97     int getProbability(const int unigramProbability, const int bigramProbability) const;
     98 
     99     int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const;
    100 
    101     void iterateNgramEntries(const WordIdArrayView prevWordIds,
    102             NgramListener *const listener) const;
    103 
    104     BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const;
    105 
    106     const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
    107         return mHeaderPolicy;
    108     }
    109 
    110     bool addUnigramEntry(const CodePointArrayView wordCodePoints,
    111             const UnigramProperty *const unigramProperty);
    112 
    113     bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
    114 
    115     bool addNgramEntry(const NgramProperty *const ngramProperty);
    116 
    117     bool removeNgramEntry(const NgramContext *const ngramContext,
    118             const CodePointArrayView wordCodePoints);
    119 
    120     bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
    121             const CodePointArrayView wordCodePoints, const bool isValidWord,
    122             const HistoricalInfo historicalInfo);
    123 
    124     bool flush(const char *const filePath);
    125 
    126     bool flushWithGC(const char *const filePath);
    127 
    128     bool needsToRunGC(const bool mindsBlockByGC) const;
    129 
    130     void getProperty(const char *const query, const int queryLength, char *const outResult,
    131             const int maxResultLength);
    132 
    133     const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
    134 
    135     int getNextWordAndNextToken(const int token, int *const outCodePoints,
    136             int *const outCodePointCount);
    137 
    138     bool isCorrupted() const {
    139         return mIsCorrupted;
    140     }
    141 
    142  private:
    143     DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTriePolicy);
    144 
    145     static const char *const UNIGRAM_COUNT_QUERY;
    146     static const char *const BIGRAM_COUNT_QUERY;
    147     static const char *const MAX_UNIGRAM_COUNT_QUERY;
    148     static const char *const MAX_BIGRAM_COUNT_QUERY;
    149     // When the dictionary size is near the maximum size, we have to refuse dynamic operations to
    150     // prevent the dictionary from overflowing.
    151     static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
    152     static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
    153     static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
    154 
    155     const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
    156     const HeaderPolicy *const mHeaderPolicy;
    157     BufferWithExtendableBuffer *const mDictBuffer;
    158     Ver4BigramListPolicy mBigramPolicy;
    159     Ver4ShortcutListPolicy mShortcutPolicy;
    160     Ver4PatriciaTrieNodeReader mNodeReader;
    161     Ver4PtNodeArrayReader mPtNodeArrayReader;
    162     Ver4PatriciaTrieNodeWriter mNodeWriter;
    163     DynamicPtUpdatingHelper mUpdatingHelper;
    164     Ver4PatriciaTrieWritingHelper mWritingHelper;
    165     MutableEntryCounters mEntryCounters;
    166     std::vector<int> mTerminalPtNodePositionsForIteratingWords;
    167     mutable bool mIsCorrupted;
    168 
    169     int getBigramsPositionOfPtNode(const int ptNodePos) const;
    170     int getShortcutPositionOfPtNode(const int ptNodePos) const;
    171     int getWordIdFromTerminalPtNodePos(const int ptNodePos) const;
    172     int getTerminalPtNodePosFromWordId(const int wordId) const;
    173     const WordAttributes getWordAttributes(const int probability,
    174             const PtNodeParams &ptNodeParams) const;
    175     int getBigramConditionalProbability(const int prevWordUnigramProbability,
    176             const bool isInBeginningOfSentenceContext, const int bigramProbability) const;
    177 };
    178 } // namespace v402
    179 } // namespace backward
    180 } // namespace latinime
    181 #endif // LATINIME_BACKWARD_V402_VER4_PATRICIA_TRIE_POLICY_H
    182