Home | History | Annotate | Download | only in v2
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_PATRICIA_TRIE_POLICY_H
     18 #define LATINIME_PATRICIA_TRIE_POLICY_H
     19 
     20 #include <cstdint>
     21 #include <vector>
     22 
     23 #include "defines.h"
     24 #include "dictionary/header/header_policy.h"
     25 #include "dictionary/interface/dictionary_structure_with_buffer_policy.h"
     26 #include "dictionary/structure/v2/bigram/bigram_list_policy.h"
     27 #include "dictionary/structure/v2/shortcut/shortcut_list_policy.h"
     28 #include "dictionary/structure/v2/ver2_patricia_trie_node_reader.h"
     29 #include "dictionary/structure/v2/ver2_pt_node_array_reader.h"
     30 #include "dictionary/utils/format_utils.h"
     31 #include "dictionary/utils/mmapped_buffer.h"
     32 #include "utils/byte_array_view.h"
     33 #include "utils/int_array_view.h"
     34 
     35 namespace latinime {
     36 
     37 class DicNode;
     38 class DicNodeVector;
     39 
     40 // Word id = Position of a PtNode that represents the word.
     41 // Max supported n-gram is bigram.
     42 class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
     43  public:
     44     PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)
     45             : mMmappedBuffer(std::move(mmappedBuffer)),
     46               mHeaderPolicy(mMmappedBuffer->getReadOnlyByteArrayView().data(),
     47                       FormatUtils::detectFormatVersion(mMmappedBuffer->getReadOnlyByteArrayView())),
     48               mBuffer(mMmappedBuffer->getReadOnlyByteArrayView().skip(mHeaderPolicy.getSize())),
     49               mBigramListPolicy(mBuffer), mShortcutListPolicy(mBuffer),
     50               mPtNodeReader(mBuffer, &mBigramListPolicy, &mShortcutListPolicy,
     51                       mHeaderPolicy.getCodePointTable()),
     52               mPtNodeArrayReader(mBuffer), mTerminalPtNodePositionsForIteratingWords(),
     53               mIsCorrupted(false) {}
     54 
     55     AK_FORCE_INLINE int getRootPosition() const {
     56         return 0;
     57     }
     58 
     59     void createAndGetAllChildDicNodes(const DicNode *const dicNode,
     60             DicNodeVector *const childDicNodes) const;
     61 
     62     int getCodePointsAndReturnCodePointCount(const int wordId, const int maxCodePointCount,
     63             int *const outCodePoints) const;
     64 
     65     int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const;
     66 
     67     const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds,
     68             const int wordId, MultiBigramMap *const multiBigramMap) const;
     69 
     70     int getProbability(const int unigramProbability, const int bigramProbability) const;
     71 
     72     int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const;
     73 
     74     void iterateNgramEntries(const WordIdArrayView prevWordIds,
     75             NgramListener *const listener) const;
     76 
     77     BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const;
     78 
     79     const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
     80         return &mHeaderPolicy;
     81     }
     82 
     83     bool addUnigramEntry(const CodePointArrayView wordCodePoints,
     84             const UnigramProperty *const unigramProperty) {
     85         // This method should not be called for non-updatable dictionary.
     86         AKLOGI("Warning: addUnigramEntry() is called for non-updatable dictionary.");
     87         return false;
     88     }
     89 
     90     bool removeUnigramEntry(const CodePointArrayView wordCodePoints) {
     91         // This method should not be called for non-updatable dictionary.
     92         AKLOGI("Warning: removeUnigramEntry() is called for non-updatable dictionary.");
     93         return false;
     94     }
     95 
     96     bool addNgramEntry(const NgramProperty *const ngramProperty) {
     97         // This method should not be called for non-updatable dictionary.
     98         AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
     99         return false;
    100     }
    101 
    102     bool removeNgramEntry(const NgramContext *const ngramContext,
    103             const CodePointArrayView wordCodePoints) {
    104         // This method should not be called for non-updatable dictionary.
    105         AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
    106         return false;
    107     }
    108 
    109     bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
    110             const CodePointArrayView wordCodePoints, const bool isValidWord,
    111             const HistoricalInfo historicalInfo) {
    112         // This method should not be called for non-updatable dictionary.
    113         AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
    114                 "dictionary.");
    115         return false;
    116     }
    117 
    118     bool flush(const char *const filePath) {
    119         // This method should not be called for non-updatable dictionary.
    120         AKLOGI("Warning: flush() is called for non-updatable dictionary.");
    121         return false;
    122     }
    123 
    124     bool flushWithGC(const char *const filePath) {
    125         // This method should not be called for non-updatable dictionary.
    126         AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
    127         return false;
    128     }
    129 
    130     bool needsToRunGC(const bool mindsBlockByGC) const {
    131         // This method should not be called for non-updatable dictionary.
    132         AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
    133         return false;
    134     }
    135 
    136     void getProperty(const char *const query, const int queryLength, char *const outResult,
    137             const int maxResultLength) {
    138         // getProperty is not supported for this class.
    139         if (maxResultLength > 0) {
    140             outResult[0] = '\0';
    141         }
    142     }
    143 
    144     const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const;
    145 
    146     int getNextWordAndNextToken(const int token, int *const outCodePoints,
    147             int *const outCodePointCount);
    148 
    149     bool isCorrupted() const {
    150         return mIsCorrupted;
    151     }
    152 
    153  private:
    154     DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
    155 
    156     const MmappedBuffer::MmappedBufferPtr mMmappedBuffer;
    157     const HeaderPolicy mHeaderPolicy;
    158     const ReadOnlyByteArrayView mBuffer;
    159     const BigramListPolicy mBigramListPolicy;
    160     const ShortcutListPolicy mShortcutListPolicy;
    161     const Ver2ParticiaTrieNodeReader mPtNodeReader;
    162     const Ver2PtNodeArrayReader mPtNodeArrayReader;
    163     std::vector<int> mTerminalPtNodePositionsForIteratingWords;
    164     mutable bool mIsCorrupted;
    165 
    166     int getCodePointsAndProbabilityAndReturnCodePointCount(const int wordId,
    167             const int maxCodePointCount, int *const outCodePoints,
    168             int *const outUnigramProbability) const;
    169     int getShortcutPositionOfPtNode(const int ptNodePos) const;
    170     int getBigramsPositionOfPtNode(const int ptNodePos) const;
    171     int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
    172             DicNodeVector *const childDicNodes) const;
    173     int getWordIdFromTerminalPtNodePos(const int ptNodePos) const;
    174     int getTerminalPtNodePosFromWordId(const int wordId) const;
    175     const WordAttributes getWordAttributes(const int probability,
    176             const PtNodeParams &ptNodeParams) const;
    177     bool isValidPos(const int pos) const;
    178 };
    179 } // namespace latinime
    180 #endif // LATINIME_PATRICIA_TRIE_POLICY_H
    181