Home | History | Annotate | Download | only in pt_common
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_DYNAMIC_PT_READING_HELPER_H
     18 #define LATINIME_DYNAMIC_PT_READING_HELPER_H
     19 
     20 #include <cstddef>
     21 #include <vector>
     22 
     23 #include "defines.h"
     24 #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h"
     25 #include "suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
     26 
     27 namespace latinime {
     28 
     29 class DictionaryShortcutsStructurePolicy;
     30 class PtNodeArrayReader;
     31 
     32 /*
     33  * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and
     34  * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop.
     35  */
     36 class DynamicPtReadingHelper {
     37  public:
     38     class TraversingEventListener {
     39      public:
     40         virtual ~TraversingEventListener() {};
     41 
     42         // Returns whether the event handling was succeeded or not.
     43         virtual bool onAscend() = 0;
     44 
     45         // Returns whether the event handling was succeeded or not.
     46         virtual bool onDescend(const int ptNodeArrayPos) = 0;
     47 
     48         // Returns whether the event handling was succeeded or not.
     49         virtual bool onReadingPtNodeArrayTail() = 0;
     50 
     51         // Returns whether the event handling was succeeded or not.
     52         virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0;
     53 
     54      protected:
     55         TraversingEventListener() {};
     56 
     57      private:
     58         DISALLOW_COPY_AND_ASSIGN(TraversingEventListener);
     59     };
     60 
     61     class TraversePolicyToGetAllTerminalPtNodePositions : public TraversingEventListener {
     62      public:
     63         TraversePolicyToGetAllTerminalPtNodePositions(std::vector<int> *const terminalPositions)
     64                 : mTerminalPositions(terminalPositions) {}
     65         bool onAscend() { return true; }
     66         bool onDescend(const int ptNodeArrayPos) { return true; }
     67         bool onReadingPtNodeArrayTail() { return true; }
     68         bool onVisitingPtNode(const PtNodeParams *const ptNodeParams);
     69 
     70      private:
     71         DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToGetAllTerminalPtNodePositions);
     72 
     73         std::vector<int> *const mTerminalPositions;
     74     };
     75 
     76     DynamicPtReadingHelper(const PtNodeReader *const ptNodeReader,
     77             const PtNodeArrayReader *const ptNodeArrayReader)
     78             : mIsError(false), mReadingState(), mPtNodeReader(ptNodeReader),
     79               mPtNodeArrayReader(ptNodeArrayReader), mReadingStateStack() {}
     80 
     81     ~DynamicPtReadingHelper() {}
     82 
     83     AK_FORCE_INLINE bool isError() const {
     84         return mIsError;
     85     }
     86 
     87     AK_FORCE_INLINE bool isEnd() const {
     88         return mReadingState.mPos == NOT_A_DICT_POS;
     89     }
     90 
     91     // Initialize reading state with the head position of a PtNode array.
     92     AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) {
     93         if (ptNodeArrayPos == NOT_A_DICT_POS) {
     94             mReadingState.mPos = NOT_A_DICT_POS;
     95         } else {
     96             mIsError = false;
     97             mReadingState.mPos = ptNodeArrayPos;
     98             mReadingState.mTotalCodePointCountSinceInitialization = 0;
     99             mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
    100             mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
    101             mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
    102             mReadingStateStack.clear();
    103             nextPtNodeArray();
    104         }
    105     }
    106 
    107     // Initialize reading state with the head position of a node.
    108     AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) {
    109         if (ptNodePos == NOT_A_DICT_POS) {
    110             mReadingState.mPos = NOT_A_DICT_POS;
    111         } else {
    112             mIsError = false;
    113             mReadingState.mPos = ptNodePos;
    114             mReadingState.mRemainingPtNodeCountInThisArray = 1;
    115             mReadingState.mTotalCodePointCountSinceInitialization = 0;
    116             mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
    117             mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
    118             mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
    119             mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
    120             mReadingStateStack.clear();
    121         }
    122     }
    123 
    124     AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const {
    125         if (isEnd()) {
    126             return PtNodeParams();
    127         }
    128         return mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(mReadingState.mPos);
    129     }
    130 
    131     AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const {
    132         return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal();
    133     }
    134 
    135     AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index,
    136             const int codePoint) const {
    137         return ptNodeParams.getCodePoints()[index] == codePoint;
    138     }
    139 
    140     // Return code point count exclude the last read node's code points.
    141     AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
    142         return mReadingState.mTotalCodePointCountSinceInitialization;
    143     }
    144 
    145     // Return code point count include the last read node's code points.
    146     AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
    147         return mReadingState.mTotalCodePointCountSinceInitialization
    148                 + ptNodeParams.getCodePointCount();
    149     }
    150 
    151     AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams,
    152             const int index, int *const outCodePoints) const {
    153         const int nodeCodePointCount = ptNodeParams.getCodePointCount();
    154         const int *const nodeCodePoints = ptNodeParams.getCodePoints();
    155         for (int i =  0; i < nodeCodePointCount; ++i) {
    156             outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i];
    157         }
    158     }
    159 
    160     AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) {
    161         mReadingState.mRemainingPtNodeCountInThisArray -= 1;
    162         mReadingState.mPos = ptNodeParams.getSiblingNodePos();
    163         if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) {
    164             // All nodes in the current node array have been read.
    165             followForwardLink();
    166         }
    167     }
    168 
    169     // Read the first child node of the current node.
    170     AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) {
    171         if (ptNodeParams.hasChildren()) {
    172             mReadingState.mTotalCodePointCountSinceInitialization +=
    173                     ptNodeParams.getCodePointCount();
    174             mReadingState.mTotalPtNodeIndexInThisArrayChain = 0;
    175             mReadingState.mPtNodeArrayIndexInThisArrayChain = 0;
    176             mReadingState.mPos = ptNodeParams.getChildrenPos();
    177             mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
    178             // Read children node array.
    179             nextPtNodeArray();
    180         } else {
    181             mReadingState.mPos = NOT_A_DICT_POS;
    182         }
    183     }
    184 
    185     // Read the parent node of the current node.
    186     AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) {
    187         if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) {
    188             mReadingState.mTotalCodePointCountSinceInitialization +=
    189                     ptNodeParams.getCodePointCount();
    190             mReadingState.mTotalPtNodeIndexInThisArrayChain = 1;
    191             mReadingState.mPtNodeArrayIndexInThisArrayChain = 1;
    192             mReadingState.mRemainingPtNodeCountInThisArray = 1;
    193             mReadingState.mPos = ptNodeParams.getParentPos();
    194             mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS;
    195             mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS;
    196         } else {
    197             mReadingState.mPos = NOT_A_DICT_POS;
    198         }
    199     }
    200 
    201     AK_FORCE_INLINE int getPosOfLastForwardLinkField() const {
    202         return mReadingState.mPosOfLastForwardLinkField;
    203     }
    204 
    205     AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const {
    206         return mReadingState.mPosOfThisPtNodeArrayHead;
    207     }
    208 
    209     bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener);
    210 
    211     bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
    212             TraversingEventListener *const listener);
    213 
    214     int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
    215             int *const outCodePoints, int *const outUnigramProbability);
    216 
    217     int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
    218             const bool forceLowerCaseSearch);
    219 
    220  private:
    221     DISALLOW_COPY_AND_ASSIGN(DynamicPtReadingHelper);
    222 
    223     // This class encapsulates the reading state of a position in the dictionary. It points at a
    224     // specific PtNode in the dictionary.
    225     class PtNodeReadingState {
    226      public:
    227         // Note that copy constructor and assignment operator are used for this class to use
    228         // std::vector.
    229         PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0),
    230                 mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0),
    231                 mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS),
    232                 mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {}
    233 
    234         int mPos;
    235         // Remaining node count in the current array.
    236         int mRemainingPtNodeCountInThisArray;
    237         int mTotalCodePointCountSinceInitialization;
    238         // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
    239         int mTotalPtNodeIndexInThisArrayChain;
    240         // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
    241         // PtNode arrays.
    242         int mPtNodeArrayIndexInThisArrayChain;
    243         int mPosOfLastForwardLinkField;
    244         int mPosOfThisPtNodeArrayHead;
    245     };
    246 
    247     static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP;
    248     static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
    249     static const size_t MAX_READING_STATE_STACK_SIZE;
    250 
    251     // TODO: Introduce error code to track what caused the error.
    252     bool mIsError;
    253     PtNodeReadingState mReadingState;
    254     const PtNodeReader *const mPtNodeReader;
    255     const PtNodeArrayReader *const mPtNodeArrayReader;
    256     std::vector<PtNodeReadingState> mReadingStateStack;
    257 
    258     void nextPtNodeArray();
    259 
    260     void followForwardLink();
    261 
    262     AK_FORCE_INLINE void pushReadingStateToStack() {
    263         if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) {
    264             AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE);
    265             ASSERT(false);
    266             mIsError = true;
    267             mReadingState.mPos = NOT_A_DICT_POS;
    268         } else {
    269             mReadingStateStack.push_back(mReadingState);
    270         }
    271     }
    272 
    273     AK_FORCE_INLINE void popReadingStateFromStack() {
    274         if (mReadingStateStack.empty()) {
    275             mReadingState.mPos = NOT_A_DICT_POS;
    276         } else {
    277             mReadingState = mReadingStateStack.back();
    278             mReadingStateStack.pop_back();
    279         }
    280     }
    281 };
    282 } // namespace latinime
    283 #endif /* LATINIME_DYNAMIC_PT_READING_HELPER_H */
    284