Home | History | Annotate | Download | only in pt_common
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_PT_NODE_PARAMS_H
     18 #define LATINIME_PT_NODE_PARAMS_H
     19 
     20 #include <cstring>
     21 
     22 #include "defines.h"
     23 #include "dictionary/structure/pt_common/dynamic_pt_reading_utils.h"
     24 #include "dictionary/structure/pt_common/patricia_trie_reading_utils.h"
     25 #include "dictionary/structure/v4/ver4_dict_constants.h"
     26 #include "utils/char_utils.h"
     27 #include "utils/int_array_view.h"
     28 
     29 namespace latinime {
     30 
     31 // This class has information of a PtNode. This class is immutable.
     32 class PtNodeParams {
     33  public:
     34     // Invalid PtNode.
     35     PtNodeParams() : mHeadPos(NOT_A_DICT_POS), mFlags(0), mHasMovedFlag(false),
     36             mParentPos(NOT_A_DICT_POS), mCodePointCount(0), mCodePoints(),
     37             mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
     38             mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
     39             mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
     40             mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
     41             mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {}
     42 
     43     PtNodeParams(const PtNodeParams& ptNodeParams)
     44             : mHeadPos(ptNodeParams.mHeadPos), mFlags(ptNodeParams.mFlags),
     45               mHasMovedFlag(ptNodeParams.mHasMovedFlag), mParentPos(ptNodeParams.mParentPos),
     46               mCodePointCount(ptNodeParams.mCodePointCount), mCodePoints(),
     47               mTerminalIdFieldPos(ptNodeParams.mTerminalIdFieldPos),
     48               mTerminalId(ptNodeParams.mTerminalId),
     49               mProbabilityFieldPos(ptNodeParams.mProbabilityFieldPos),
     50               mProbability(ptNodeParams.mProbability),
     51               mChildrenPosFieldPos(ptNodeParams.mChildrenPosFieldPos),
     52               mChildrenPos(ptNodeParams.mChildrenPos),
     53               mBigramLinkedNodePos(ptNodeParams.mBigramLinkedNodePos),
     54               mShortcutPos(ptNodeParams.mShortcutPos), mBigramPos(ptNodeParams.mBigramPos),
     55               mSiblingPos(ptNodeParams.mSiblingPos) {
     56         memcpy(mCodePoints, ptNodeParams.getCodePoints(), sizeof(int) * mCodePointCount);
     57     }
     58 
     59     // PtNode read from version 2 dictionary.
     60     PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
     61             const int codePointCount, const int *const codePoints, const int probability,
     62             const int childrenPos, const int shortcutPos, const int bigramPos,
     63             const int siblingPos)
     64             : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(false), mParentPos(NOT_A_DICT_POS),
     65               mCodePointCount(codePointCount), mCodePoints(), mTerminalIdFieldPos(NOT_A_DICT_POS),
     66               mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
     67               mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
     68               mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(childrenPos),
     69               mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(shortcutPos),
     70               mBigramPos(bigramPos), mSiblingPos(siblingPos) {
     71         memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
     72     }
     73 
     74     // PtNode with a terminal id.
     75     PtNodeParams(const int headPos, const PatriciaTrieReadingUtils::NodeFlags flags,
     76             const int parentPos, const int codePointCount, const int *const codePoints,
     77             const int terminalIdFieldPos, const int terminalId, const int probability,
     78             const int childrenPosFieldPos, const int childrenPos, const int siblingPos)
     79             : mHeadPos(headPos), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
     80               mCodePointCount(codePointCount), mCodePoints(),
     81               mTerminalIdFieldPos(terminalIdFieldPos), mTerminalId(terminalId),
     82               mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
     83               mChildrenPosFieldPos(childrenPosFieldPos), mChildrenPos(childrenPos),
     84               mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(terminalId),
     85               mBigramPos(terminalId), mSiblingPos(siblingPos) {
     86         memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
     87     }
     88 
     89     // Construct new params by updating existing PtNode params.
     90     PtNodeParams(const PtNodeParams *const ptNodeParams,
     91             const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
     92             const CodePointArrayView codePoints, const int probability)
     93             : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
     94               mParentPos(parentPos), mCodePointCount(codePoints.size()), mCodePoints(),
     95               mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
     96               mTerminalId(ptNodeParams->getTerminalId()),
     97               mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
     98               mProbability(probability),
     99               mChildrenPosFieldPos(ptNodeParams->getChildrenPosFieldPos()),
    100               mChildrenPos(ptNodeParams->getChildrenPos()),
    101               mBigramLinkedNodePos(ptNodeParams->getBigramLinkedNodePos()),
    102               mShortcutPos(ptNodeParams->getShortcutPos()),
    103               mBigramPos(ptNodeParams->getBigramsPos()),
    104               mSiblingPos(ptNodeParams->getSiblingNodePos()) {
    105         memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount);
    106     }
    107 
    108     PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
    109             const CodePointArrayView codePoints, const int probability)
    110             : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
    111               mCodePointCount(codePoints.size()), mCodePoints(),
    112               mTerminalIdFieldPos(NOT_A_DICT_POS),
    113               mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
    114               mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
    115               mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
    116               mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
    117               mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
    118         memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount);
    119     }
    120 
    121     AK_FORCE_INLINE bool isValid() const {
    122         return mCodePointCount > 0;
    123     }
    124 
    125     // Head position of the PtNode
    126     AK_FORCE_INLINE int getHeadPos() const {
    127         return mHeadPos;
    128     }
    129 
    130     // Flags
    131     AK_FORCE_INLINE bool isDeleted() const {
    132         return mHasMovedFlag && DynamicPtReadingUtils::isDeleted(mFlags);
    133     }
    134 
    135     AK_FORCE_INLINE bool willBecomeNonTerminal() const {
    136         return mHasMovedFlag && DynamicPtReadingUtils::willBecomeNonTerminal(mFlags);
    137     }
    138 
    139     AK_FORCE_INLINE bool hasChildren() const {
    140         return mChildrenPos != NOT_A_DICT_POS;
    141     }
    142 
    143     AK_FORCE_INLINE bool isTerminal() const {
    144         return PatriciaTrieReadingUtils::isTerminal(mFlags);
    145     }
    146 
    147     AK_FORCE_INLINE bool isPossiblyOffensive() const {
    148         return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
    149     }
    150 
    151     AK_FORCE_INLINE bool isNotAWord() const {
    152         return PatriciaTrieReadingUtils::isNotAWord(mFlags);
    153     }
    154 
    155     AK_FORCE_INLINE bool hasBigrams() const {
    156         return PatriciaTrieReadingUtils::hasBigrams(mFlags);
    157     }
    158 
    159     AK_FORCE_INLINE bool hasShortcutTargets() const {
    160         return PatriciaTrieReadingUtils::hasShortcutTargets(mFlags);
    161     }
    162 
    163     AK_FORCE_INLINE bool representsNonWordInfo() const {
    164         return getCodePointCount() > 0 && !CharUtils::isInUnicodeSpace(getCodePoints()[0])
    165                 && isNotAWord();
    166     }
    167 
    168     AK_FORCE_INLINE int representsBeginningOfSentence() const {
    169         return getCodePointCount() > 0 && getCodePoints()[0] == CODE_POINT_BEGINNING_OF_SENTENCE
    170                 && isNotAWord();
    171     }
    172 
    173     // Parent node position
    174     AK_FORCE_INLINE int getParentPos() const {
    175         return mParentPos;
    176     }
    177 
    178     AK_FORCE_INLINE const CodePointArrayView getCodePointArrayView() const {
    179         return CodePointArrayView(mCodePoints, mCodePointCount);
    180     }
    181 
    182     // TODO: Remove
    183     // Number of code points
    184     AK_FORCE_INLINE uint8_t getCodePointCount() const {
    185         return mCodePointCount;
    186     }
    187 
    188     // TODO: Remove
    189     AK_FORCE_INLINE const int *getCodePoints() const {
    190         return mCodePoints;
    191     }
    192 
    193     // Probability
    194     AK_FORCE_INLINE int getTerminalIdFieldPos() const {
    195         return mTerminalIdFieldPos;
    196     }
    197 
    198     AK_FORCE_INLINE int getTerminalId() const {
    199         return mTerminalId;
    200     }
    201 
    202     // Probability
    203     AK_FORCE_INLINE int getProbabilityFieldPos() const {
    204         return mProbabilityFieldPos;
    205     }
    206 
    207     AK_FORCE_INLINE int getProbability() const {
    208         return mProbability;
    209     }
    210 
    211     // Children PtNode array position
    212     AK_FORCE_INLINE int getChildrenPosFieldPos() const {
    213         return mChildrenPosFieldPos;
    214     }
    215 
    216     AK_FORCE_INLINE int getChildrenPos() const {
    217         return mChildrenPos;
    218     }
    219 
    220     // Bigram linked node position.
    221     AK_FORCE_INLINE int getBigramLinkedNodePos() const {
    222         return mBigramLinkedNodePos;
    223     }
    224 
    225     // Shortcutlist position
    226     AK_FORCE_INLINE int getShortcutPos() const {
    227         return mShortcutPos;
    228     }
    229 
    230     // Bigrams position
    231     AK_FORCE_INLINE int getBigramsPos() const {
    232         return mBigramPos;
    233     }
    234 
    235     // Sibling node position
    236     AK_FORCE_INLINE int getSiblingNodePos() const {
    237         return mSiblingPos;
    238     }
    239 
    240  private:
    241     // This class have a public copy constructor to be used as a return value.
    242     DISALLOW_ASSIGNMENT_OPERATOR(PtNodeParams);
    243 
    244     const int mHeadPos;
    245     const PatriciaTrieReadingUtils::NodeFlags mFlags;
    246     const bool mHasMovedFlag;
    247     const int mParentPos;
    248     const uint8_t mCodePointCount;
    249     int mCodePoints[MAX_WORD_LENGTH];
    250     const int mTerminalIdFieldPos;
    251     const int mTerminalId;
    252     const int mProbabilityFieldPos;
    253     const int mProbability;
    254     const int mChildrenPosFieldPos;
    255     const int mChildrenPos;
    256     const int mBigramLinkedNodePos;
    257     const int mShortcutPos;
    258     const int mBigramPos;
    259     const int mSiblingPos;
    260 };
    261 } // namespace latinime
    262 #endif /* LATINIME_PT_NODE_PARAMS_H */
    263