Home | History | Annotate | Download | only in pt_common
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
     18 #define LATINIME_PATRICIA_TRIE_READING_UTILS_H
     19 
     20 #include <cstdint>
     21 
     22 #include "defines.h"
     23 
     24 namespace latinime {
     25 
     26 class DictionaryShortcutsStructurePolicy;
     27 class DictionaryBigramsStructurePolicy;
     28 
     29 class PatriciaTrieReadingUtils {
     30  public:
     31     typedef uint8_t NodeFlags;
     32 
     33     static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos);
     34 
     35     static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
     36 
     37     static int getCodePointAndAdvancePosition(const uint8_t *const buffer,
     38             const int *const codePointTable, int *const pos);
     39 
     40     // Returns the number of read characters.
     41     static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
     42             const int maxLength, const int *const codePointTable, int *const outBuffer,
     43             int *const pos);
     44 
     45     // Returns the number of skipped characters.
     46     static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
     47             const int maxLength, const int *const codePointTable, int *const pos);
     48 
     49     static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
     50 
     51     static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
     52             const NodeFlags flags, int *const pos);
     53 
     54     /**
     55      * Node Flags
     56      */
     57     static AK_FORCE_INLINE bool isPossiblyOffensive(const NodeFlags flags) {
     58         return (flags & FLAG_IS_POSSIBLY_OFFENSIVE) != 0;
     59     }
     60 
     61     static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
     62         return (flags & FLAG_IS_NOT_A_WORD) != 0;
     63     }
     64 
     65     static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
     66         return (flags & FLAG_IS_TERMINAL) != 0;
     67     }
     68 
     69     static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
     70         return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
     71     }
     72 
     73     static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
     74         return (flags & FLAG_HAS_BIGRAMS) != 0;
     75     }
     76 
     77     static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
     78         return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
     79     }
     80 
     81     static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
     82         return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
     83     }
     84 
     85     static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isPossiblyOffensive,
     86             const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
     87             const bool hasBigrams, const bool hasMultipleChars,
     88             const int childrenPositionFieldSize) {
     89         NodeFlags nodeFlags = 0;
     90         nodeFlags = isPossiblyOffensive ? (nodeFlags | FLAG_IS_POSSIBLY_OFFENSIVE) : nodeFlags;
     91         nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
     92         nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
     93         nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
     94         nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags;
     95         nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags;
     96         if (childrenPositionFieldSize == 1) {
     97             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
     98         } else if (childrenPositionFieldSize == 2) {
     99             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
    100         } else if (childrenPositionFieldSize == 3) {
    101             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
    102         } else {
    103             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
    104         }
    105         return nodeFlags;
    106     }
    107 
    108     static void readPtNodeInfo(const uint8_t *const dictBuf, const int ptNodePos,
    109             const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
    110             const DictionaryBigramsStructurePolicy *const bigramPolicy,
    111             const int *const codePointTable, NodeFlags *const outFlags,
    112             int *const outCodePointCount, int *const outCodePoint, int *const outProbability,
    113             int *const outChildrenPos, int *const outShortcutPos, int *const outBigramPos,
    114             int *const outSiblingPos);
    115 
    116  private:
    117     DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
    118 
    119     static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
    120     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
    121     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
    122     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
    123     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
    124 
    125     static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
    126     static const NodeFlags FLAG_IS_TERMINAL;
    127     static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
    128     static const NodeFlags FLAG_HAS_BIGRAMS;
    129     static const NodeFlags FLAG_IS_NOT_A_WORD;
    130     static const NodeFlags FLAG_IS_POSSIBLY_OFFENSIVE;
    131 };
    132 } // namespace latinime
    133 #endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
    134