Home | History | Annotate | Download | only in v2
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_PATRICIA_TRIE_POLICY_H
     18 #define LATINIME_PATRICIA_TRIE_POLICY_H
     19 
     20 #include <cstdint>
     21 #include <vector>
     22 
     23 #include "defines.h"
     24 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
     25 #include "suggest/policyimpl/dictionary/header/header_policy.h"
     26 #include "suggest/policyimpl/dictionary/structure/v2/bigram/bigram_list_policy.h"
     27 #include "suggest/policyimpl/dictionary/structure/v2/shortcut/shortcut_list_policy.h"
     28 #include "suggest/policyimpl/dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
     29 #include "suggest/policyimpl/dictionary/structure/v2/ver2_pt_node_array_reader.h"
     30 #include "suggest/policyimpl/dictionary/utils/format_utils.h"
     31 #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
     32 #include "utils/byte_array_view.h"
     33 
     34 namespace latinime {
     35 
     36 class DicNode;
     37 class DicNodeVector;
     38 
     39 class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
     40  public:
     41     PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
     42             : mMmappedBuffer(std::move(mmappedBuffer)),
     43               mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
     44                       FormatUtils::VERSION_2),
     45               mDictRoot(mMmappedBuffer->getReadOnlyByteArrayView().data()
     46                       + mHeaderPolicy.getSize()),
     47               mDictBufferSize(mMmappedBuffer->getReadOnlyByteArrayView().size()
     48                       - mHeaderPolicy.getSize()),
     49               mBigramListPolicy(mDictRoot, mDictBufferSize), mShortcutListPolicy(mDictRoot),
     50               mPtNodeReader(mDictRoot, mDictBufferSize, &mBigramListPolicy, &mShortcutListPolicy),
     51               mPtNodeArrayReader(mDictRoot, mDictBufferSize),
     52               mTerminalPtNodePositionsForIteratingWords(), mIsCorrupted(false) {}
     53 
     54     AK_FORCE_INLINE int getRootPosition() const {
     55         return 0;
     56     }
     57 
     58     void createAndGetAllChildDicNodes(const DicNode *const dicNode,
     59             DicNodeVector *const childDicNodes) const;
     60 
     61     int getCodePointsAndProbabilityAndReturnCodePointCount(
     62             const int terminalNodePos, const int maxCodePointCount, int *const outCodePoints,
     63             int *const outUnigramProbability) const;
     64 
     65     int getTerminalPtNodePositionOfWord(const int *const inWord,
     66             const int length, const bool forceLowerCaseSearch) const;
     67 
     68     int getProbability(const int unigramProbability, const int bigramProbability) const;
     69 
     70     int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
     71 
     72     void iterateNgramEntries(const int *const prevWordsPtNodePos,
     73             NgramListener *const listener) const;
     74 
     75     int getShortcutPositionOfPtNode(const int ptNodePos) const;
     76 
     77     const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
     78         return &mHeaderPolicy;
     79     }
     80 
     81     const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
     82         return &mShortcutListPolicy;
     83     }
     84 
     85     bool addUnigramEntry(const int *const word, const int length,
     86             const UnigramProperty *const unigramProperty) {
     87         // This method should not be called for non-updatable dictionary.
     88         AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
     89         return false;
     90     }
     91 
     92     bool removeUnigramEntry(const int *const word, const int length) {
     93         // This method should not be called for non-updatable dictionary.
     94         AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
     95         return false;
     96     }
     97 
     98     bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
     99             const BigramProperty *const bigramProperty) {
    100         // This method should not be called for non-updatable dictionary.
    101         AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
    102         return false;
    103     }
    104 
    105     bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo, const int *const word,
    106             const int length) {
    107         // This method should not be called for non-updatable dictionary.
    108         AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
    109         return false;
    110     }
    111 
    112     bool flush(const char *const filePath) {
    113         // This method should not be called for non-updatable dictionary.
    114         AKLOGI("Warning: flush() is called for non-updatable dictionary.");
    115         return false;
    116     }
    117 
    118     bool flushWithGC(const char *const filePath) {
    119         // This method should not be called for non-updatable dictionary.
    120         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
    121         return false;
    122     }
    123 
    124     bool needsToRunGC(const bool mindsBlockByGC) const {
    125         // This method should not be called for non-updatable dictionary.
    126         AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
    127         return false;
    128     }
    129 
    130     void getProperty(const char *const query, const int queryLength, char *const outResult,
    131             const int maxResultLength) {
    132         // getProperty is not supported for this class.
    133         if (maxResultLength > 0) {
    134             outResult[0] = '\0';
    135         }
    136     }
    137 
    138     const WordProperty getWordProperty(const int *const codePoints,
    139             const int codePointCount) const;
    140 
    141     int getNextWordAndNextToken(const int token, int *const outCodePoints,
    142             int *const outCodePointCount);
    143 
    144     bool isCorrupted() const {
    145         return mIsCorrupted;
    146     }
    147 
    148  private:
    149     DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
    150 
    151     const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
    152     const HeaderPolicy mHeaderPolicy;
    153     const uint8_t *const mDictRoot;
    154     const int mDictBufferSize;
    155     const BigramListPolicy mBigramListPolicy;
    156     const ShortcutListPolicy mShortcutListPolicy;
    157     const Ver2ParticiaTrieNodeReader mPtNodeReader;
    158     const Ver2PtNodeArrayReader mPtNodeArrayReader;
    159     std::vector<int> mTerminalPtNodePositionsForIteratingWords;
    160     mutable bool mIsCorrupted;
    161 
    162     int getBigramsPositionOfPtNode(const int ptNodePos) const;
    163     int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
    164             DicNodeVector *const childDicNodes) const;
    165 };
    166 } // namespace latinime
    167 #endif // LATINIME_PATRICIA_TRIE_POLICY_H
    168