1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" 18 19 #include "defines.h" 20 #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" 21 22 namespace latinime { 23 24 typedef PatriciaTrieReadingUtils PtReadingUtils; 25 26 const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE = 0xC0; 27 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00; 28 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40; 29 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80; 30 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0; 31 32 // Flag for single/multiple char group 33 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20; 34 // Flag for terminal PtNodes 35 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10; 36 // Flag for shortcut targets presence 37 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS = 0x08; 38 // Flag for bigram presence 39 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04; 40 // Flag for non-words (typically, shortcut only entries) 41 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02; 42 // Flag for blacklist 43 const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01; 44 45 /* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition( 46 const uint8_t *const buffer, int *const pos) { 47 const uint8_t firstByte = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); 48 if (firstByte < 0x80) { 49 return firstByte; 50 } else { 51 return ((firstByte & 0x7F) << 8) ^ ByteArrayUtils::readUint8AndAdvancePosition( 52 buffer, pos); 53 } 54 } 55 56 /* static */ PtReadingUtils::NodeFlags PtReadingUtils::getFlagsAndAdvancePosition( 57 const uint8_t *const buffer, int *const pos) { 58 return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); 59 } 60 61 /* static */ int PtReadingUtils::getCodePointAndAdvancePosition(const uint8_t *const buffer, 62 int *const pos) { 63 return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos); 64 } 65 66 // Returns the number of read characters. 67 /* static */ int PtReadingUtils::getCharsAndAdvancePosition(const uint8_t *const buffer, 68 const NodeFlags flags, const int maxLength, int *const outBuffer, int *const pos) { 69 int length = 0; 70 if (hasMultipleChars(flags)) { 71 length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength, outBuffer, 72 pos); 73 } else { 74 const int codePoint = getCodePointAndAdvancePosition(buffer, pos); 75 if (codePoint == NOT_A_CODE_POINT) { 76 // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is 77 // CHARACTER_ARRAY_TERMINATOR. The code point must not be CHARACTER_ARRAY_TERMINATOR 78 // when the PtNode has a single code point. 79 length = 0; 80 AKLOGE("codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos - 1]: 0x%x", 81 *pos - 1, codePoint, buffer[*pos - 1]); 82 ASSERT(false); 83 } else if (maxLength > 0) { 84 outBuffer[0] = codePoint; 85 length = 1; 86 } 87 } 88 return length; 89 } 90 91 // Returns the number of skipped characters. 92 /* static */ int PtReadingUtils::skipCharacters(const uint8_t *const buffer, const NodeFlags flags, 93 const int maxLength, int *const pos) { 94 if (hasMultipleChars(flags)) { 95 return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength, pos); 96 } else { 97 if (maxLength > 0) { 98 getCodePointAndAdvancePosition(buffer, pos); 99 return 1; 100 } else { 101 return 0; 102 } 103 } 104 } 105 106 /* static */ int PtReadingUtils::readProbabilityAndAdvancePosition(const uint8_t *const buffer, 107 int *const pos) { 108 return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); 109 } 110 111 /* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition( 112 const uint8_t *const buffer, const NodeFlags flags, int *const pos) { 113 const int base = *pos; 114 int offset = 0; 115 switch (MASK_CHILDREN_POSITION_TYPE & flags) { 116 case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE: 117 offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos); 118 break; 119 case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES: 120 offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos); 121 break; 122 case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES: 123 offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos); 124 break; 125 default: 126 // If we come here, it means we asked for the children of a word with 127 // no children. 128 return NOT_A_DICT_POS; 129 } 130 return base + offset; 131 } 132 133 } // namespace latinime 134