Home | History | Annotate | Download | only in dictionary
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_PATRICIA_TRIE_READING_UTILS_H
     18 #define LATINIME_PATRICIA_TRIE_READING_UTILS_H
     19 
     20 #include <stdint.h>
     21 
     22 #include "defines.h"
     23 
     24 namespace latinime {
     25 
     26 class PatriciaTrieReadingUtils {
     27  public:
     28     typedef uint8_t NodeFlags;
     29 
     30     static int getPtNodeArraySizeAndAdvancePosition(const uint8_t *const buffer, int *const pos);
     31 
     32     static NodeFlags getFlagsAndAdvancePosition(const uint8_t *const buffer, int *const pos);
     33 
     34     static int getCodePointAndAdvancePosition(const uint8_t *const buffer, int *const pos);
     35 
     36     // Returns the number of read characters.
     37     static int getCharsAndAdvancePosition(const uint8_t *const buffer, const NodeFlags flags,
     38             const int maxLength, int *const outBuffer, int *const pos);
     39 
     40     // Returns the number of skipped characters.
     41     static int skipCharacters(const uint8_t *const buffer, const NodeFlags flags,
     42             const int maxLength, int *const pos);
     43 
     44     static int readProbabilityAndAdvancePosition(const uint8_t *const buffer, int *const pos);
     45 
     46     static int readChildrenPositionAndAdvancePosition(const uint8_t *const buffer,
     47             const NodeFlags flags, int *const pos);
     48 
     49     /**
     50      * Node Flags
     51      */
     52     static AK_FORCE_INLINE bool isBlacklisted(const NodeFlags flags) {
     53         return (flags & FLAG_IS_BLACKLISTED) != 0;
     54     }
     55 
     56     static AK_FORCE_INLINE bool isNotAWord(const NodeFlags flags) {
     57         return (flags & FLAG_IS_NOT_A_WORD) != 0;
     58     }
     59 
     60     static AK_FORCE_INLINE bool isTerminal(const NodeFlags flags) {
     61         return (flags & FLAG_IS_TERMINAL) != 0;
     62     }
     63 
     64     static AK_FORCE_INLINE bool hasShortcutTargets(const NodeFlags flags) {
     65         return (flags & FLAG_HAS_SHORTCUT_TARGETS) != 0;
     66     }
     67 
     68     static AK_FORCE_INLINE bool hasBigrams(const NodeFlags flags) {
     69         return (flags & FLAG_HAS_BIGRAMS) != 0;
     70     }
     71 
     72     static AK_FORCE_INLINE bool hasMultipleChars(const NodeFlags flags) {
     73         return (flags & FLAG_HAS_MULTIPLE_CHARS) != 0;
     74     }
     75 
     76     static AK_FORCE_INLINE bool hasChildrenInFlags(const NodeFlags flags) {
     77         return FLAG_CHILDREN_POSITION_TYPE_NOPOSITION != (MASK_CHILDREN_POSITION_TYPE & flags);
     78     }
     79 
     80     static AK_FORCE_INLINE NodeFlags createAndGetFlags(const bool isBlacklisted,
     81             const bool isNotAWord, const bool isTerminal, const bool hasShortcutTargets,
     82             const bool hasBigrams, const bool hasMultipleChars,
     83             const int childrenPositionFieldSize) {
     84         NodeFlags nodeFlags = 0;
     85         nodeFlags = isBlacklisted ? (nodeFlags | FLAG_IS_BLACKLISTED) : nodeFlags;
     86         nodeFlags = isNotAWord ? (nodeFlags | FLAG_IS_NOT_A_WORD) : nodeFlags;
     87         nodeFlags = isTerminal ? (nodeFlags | FLAG_IS_TERMINAL) : nodeFlags;
     88         nodeFlags = hasShortcutTargets ? (nodeFlags | FLAG_HAS_SHORTCUT_TARGETS) : nodeFlags;
     89         nodeFlags = hasBigrams ? (nodeFlags | FLAG_HAS_BIGRAMS) : nodeFlags;
     90         nodeFlags = hasMultipleChars ? (nodeFlags | FLAG_HAS_MULTIPLE_CHARS) : nodeFlags;
     91         if (childrenPositionFieldSize == 1) {
     92             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
     93         } else if (childrenPositionFieldSize == 2) {
     94             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
     95         } else if (childrenPositionFieldSize == 3) {
     96             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
     97         } else {
     98             nodeFlags |= FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
     99         }
    100         return nodeFlags;
    101     }
    102 
    103  private:
    104     DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTrieReadingUtils);
    105 
    106     static const NodeFlags MASK_CHILDREN_POSITION_TYPE;
    107     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_NOPOSITION;
    108     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_ONEBYTE;
    109     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_TWOBYTES;
    110     static const NodeFlags FLAG_CHILDREN_POSITION_TYPE_THREEBYTES;
    111 
    112     static const NodeFlags FLAG_HAS_MULTIPLE_CHARS;
    113     static const NodeFlags FLAG_IS_TERMINAL;
    114     static const NodeFlags FLAG_HAS_SHORTCUT_TARGETS;
    115     static const NodeFlags FLAG_HAS_BIGRAMS;
    116     static const NodeFlags FLAG_IS_NOT_A_WORD;
    117     static const NodeFlags FLAG_IS_BLACKLISTED;
    118 };
    119 } // namespace latinime
    120 #endif /* LATINIME_PATRICIA_TRIE_NODE_READING_UTILS_H */
    121