Home | History | Annotate | Download | only in pt_common
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_PT_NODE_PARAMS_H
     18 #define LATINIME_PT_NODE_PARAMS_H
     19 
     20 #include <cstring>
     21 
     22 #include "defines.h"
     23 #include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
     24 #include "suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
     25 #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
     26 #include "utils/char_utils.h"
     27 
     28 namespace latinime {
     29 
     30 // This class has information of a PtNode. This class is immutable.
     31 class PtNodeParams {
     32  public:
     33     // Invalid PtNode.
     34     PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mHasMovedFlag(false),
     35             mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(),
     36             mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
     37             mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
     38             mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
     39             mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
     40             mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {}
     41 
     42     PtNodeParams(const PtNodeParams& ptNodeParams)
     43             : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
     44               mHasMovedFlag(ptNodeParams.mHasMovedFlag), mParentPos(ptNodeParams.mParentPos),
     45               mCodePointCount(ptNodeParams.mCodePointCount), mCodePoints(),
     46               mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
     47               mTerminalId(ptNodeParams.mTerminalId),
     48               mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
     49               mProbability(ptNodeParams.mProbability),
     50               mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
     51               mChildrenPos(ptNodeParams.mChildrenPos),
     52               mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
     53               mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
     54               mSiblingPos(ptNodeParams.mSiblingPos) {
     55         memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
     56     }
     57 
     58     // PtNode read from version 2 dictionary.
     59     PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
     60             const int codePointCount, const int *const codePoints, const int probability,
     61             const int childrenPos, const int shortcutPos, const int bigramPos,
     62             const int siblingPos)
     63             : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(false), mParentPos(NOT_A_DICT_POS),
     64               mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
     65               mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
     66               mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
     67               mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
     68               mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
     69               mBigramPos(bigramPos), mSiblingPos(siblingPos) {
     70         memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
     71     }
     72 
     73     // PtNode with a terminal id.
     74     PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
     75             const int parentPos, const int codePointCount, const int *const codePoints,
     76             const int terminalIdFieldPos, const int terminalId, const int probability,
     77             const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
     78             : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
     79               mCodePointCount(codePointCount), mCodePoints(),
     80               mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
     81               mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
     82               mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
     83               mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(terminalId),
     84               mBigramPos(terminalId), mSiblingPos(siblingPos) {
     85         memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
     86     }
     87 
     88     // Construct new params by updating existing PtNode params.
     89     PtNodeParams(const PtNodeParams *const ptNodeParams,
     90             const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
     91             const int codePointCount, const int *const codePoints, const int probability)
     92             : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
     93               mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(),
     94               mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
     95               mTerminalId(ptNodeParams->getTerminalId()),
     96               mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
     97               mProbability(probability),
     98               mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()),
     99               mChildrenPos(ptNodeParams->getChildrenPos()),
    100               mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()),
    101               mShortcutPos(ptNodeParams->getShortcutPos()),
    102               mBigramPos(ptNodeParams->getBigramsPos()),
    103               mSiblingPos(ptNodeParams->getSiblingNodePos()) {
    104         memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
    105     }
    106 
    107     PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
    108             const int codePointCount, const int *const codePoints, const int probability)
    109             : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
    110               mCodePointCount(codePointCount), mCodePoints(),
    111               mTerminalIdFieldPos(NOT_A_DICT_POS),
    112               mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
    113               mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
    114               mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
    115               mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
    116               mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
    117         memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
    118     }
    119 
    120     AK_FORCE_INLINE bool isValid() const {
    121         return mCodePointCount > 0;
    122     }
    123 
    124     // Head position of the PtNode
    125     AK_FORCE_INLINE int getHeadPos() const {
    126         return mHeadPos;
    127     }
    128 
    129     // Flags
    130     AK_FORCE_INLINE bool isDeleted() const {
    131         return mHasMovedFlag && DynamicPtReadingUtils::isDeleted(mFlags);
    132     }
    133 
    134     AK_FORCE_INLINE bool willBecomeNonTerminal() const {
    135         return mHasMovedFlag && DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
    136     }
    137 
    138     AK_FORCE_INLINE bool hasChildren() const {
    139         return mChildrenPos != NOT_A_DICT_POS;
    140     }
    141 
    142     AK_FORCE_INLINE bool isTerminal() const {
    143         return PatriciaTrieReadingUtils::isTerminal(mFlags);
    144     }
    145 
    146     AK_FORCE_INLINE bool isBlacklisted() const {
    147         return PatriciaTrieReadingUtils::isBlacklisted(mFlags);
    148     }
    149 
    150     AK_FORCE_INLINE bool isNotAWord() const {
    151         return PatriciaTrieReadingUtils::isNotAWord(mFlags);
    152     }
    153 
    154     AK_FORCE_INLINE bool hasBigrams() const {
    155         return PatriciaTrieReadingUtils::hasBigrams(mFlags);
    156     }
    157 
    158     AK_FORCE_INLINE bool hasShortcutTargets() const {
    159         return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
    160     }
    161 
    162     AK_FORCE_INLINE bool representsNonWordInfo() const {
    163         return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
    164                 && isNotAWord();
    165     }
    166 
    167     AK_FORCE_INLINE int representsBeginningOfSentence() const {
    168         return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
    169                 && isNotAWord();
    170     }
    171 
    172     // Parent node position
    173     AK_FORCE_INLINE int getParentPos() const {
    174         return mParentPos;
    175     }
    176 
    177     // Number of code points
    178     AK_FORCE_INLINE uint8_t getCodePointCount() const {
    179         return mCodePointCount;
    180     }
    181 
    182     AK_FORCE_INLINE const int *getCodePoints() const {
    183         return mCodePoints;
    184     }
    185 
    186     // Probability
    187     AK_FORCE_INLINE int getTerminalIdFieldPos() const {
    188         return mTerminalIdFieldPos;
    189     }
    190 
    191     AK_FORCE_INLINE int getTerminalId() const {
    192         return mTerminalId;
    193     }
    194 
    195     // Probability
    196     AK_FORCE_INLINE int getProbabilityFieldPos() const {
    197         return mProbabilityFieldPos;
    198     }
    199 
    200     AK_FORCE_INLINE int getProbability() const {
    201         return mProbability;
    202     }
    203 
    204     // Children PtNode array position
    205     AK_FORCE_INLINE int getChildrenPosFieldPos() const {
    206         return mChildrenPosFieldPos;
    207     }
    208 
    209     AK_FORCE_INLINE int getChildrenPos() const {
    210         return mChildrenPos;
    211     }
    212 
    213     // Bigram linked node position.
    214     AK_FORCE_INLINE int getBigramLinkedNodePos() const {
    215         return mBigramLinkedNodePos;
    216     }
    217 
    218     // Shortcutlist position
    219     AK_FORCE_INLINE int getShortcutPos() const {
    220         return mShortcutPos;
    221     }
    222 
    223     // Bigrams position
    224     AK_FORCE_INLINE int getBigramsPos() const {
    225         return mBigramPos;
    226     }
    227 
    228     // Sibling node position
    229     AK_FORCE_INLINE int getSiblingNodePos() const {
    230         return mSiblingPos;
    231     }
    232 
    233  private:
    234     // This class have a public copy constructor to be used as a return value.
    235     DISALLOW_ASSIGNMENT_OPERATOR(PtNodeParams);
    236 
    237     const int mHeadPos;
    238     const PatriciaTrieReadingUtils::NodeFlags mFlags;
    239     const bool mHasMovedFlag;
    240     const int mParentPos;
    241     const uint8_t mCodePointCount;
    242     int mCodePoints[MAX_WORD_LENGTH];
    243     const int mTerminalIdFieldPos;
    244     const int mTerminalId;
    245     const int mProbabilityFieldPos;
    246     const int mProbability;
    247     const int mChildrenPosFieldPos;
    248     const int mChildrenPos;
    249     const int mBigramLinkedNodePos;
    250     const int mShortcutPos;
    251     const int mBigramPos;
    252     const int mSiblingPos;
    253 };
    254 } // namespace latinime
    255 #endif /* LATINIME_PT_NODE_PARAMS_H */
    256