1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_PROBABILITY_ENTRY_H 18 #define LATINIME_PROBABILITY_ENTRY_H 19 20 #include <climits> 21 #include <cstdint> 22 23 #include "defines.h" 24 #include "dictionary/property/historical_info.h" 25 #include "dictionary/property/ngram_property.h" 26 #include "dictionary/property/unigram_property.h" 27 #include "dictionary/structure/v4/ver4_dict_constants.h" 28 29 namespace latinime { 30 31 class ProbabilityEntry { 32 public: 33 ProbabilityEntry(const ProbabilityEntry &probabilityEntry) 34 : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability), 35 mHistoricalInfo(probabilityEntry.mHistoricalInfo) {} 36 37 // Dummy entry 38 ProbabilityEntry() 39 : mFlags(Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY), mProbability(NOT_A_PROBABILITY), 40 mHistoricalInfo() {} 41 42 // Entry without historical information 43 ProbabilityEntry(const int flags, const int probability) 44 : mFlags(flags), mProbability(probability), mHistoricalInfo() {} 45 46 // Entry with historical information. 47 ProbabilityEntry(const int flags, const HistoricalInfo *const historicalInfo) 48 : mFlags(flags), mProbability(NOT_A_PROBABILITY), mHistoricalInfo(*historicalInfo) {} 49 50 // Create from unigram property. 51 ProbabilityEntry(const UnigramProperty *const unigramProperty) 52 : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(), 53 unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), 54 unigramProperty->isPossiblyOffensive())), 55 mProbability(unigramProperty->getProbability()), 56 mHistoricalInfo(unigramProperty->getHistoricalInfo()) {} 57 58 // Create from ngram property. 59 // TODO: Set flags. 60 ProbabilityEntry(const NgramProperty *const ngramProperty) 61 : mFlags(0), mProbability(ngramProperty->getProbability()), 62 mHistoricalInfo(ngramProperty->getHistoricalInfo()) {} 63 64 bool isValid() const { 65 return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0; 66 } 67 68 bool hasHistoricalInfo() const { 69 return mHistoricalInfo.isValid(); 70 } 71 72 uint8_t getFlags() const { 73 return mFlags; 74 } 75 76 int getProbability() const { 77 return mProbability; 78 } 79 80 const HistoricalInfo *getHistoricalInfo() const { 81 return &mHistoricalInfo; 82 } 83 84 bool representsBeginningOfSentence() const { 85 return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0; 86 } 87 88 bool isNotAWord() const { 89 return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0; 90 } 91 92 bool isBlacklisted() const { 93 return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0; 94 } 95 96 bool isPossiblyOffensive() const { 97 return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0; 98 } 99 100 uint64_t encode(const bool hasHistoricalInfo) const { 101 uint64_t encodedEntry = static_cast<uint8_t>(mFlags); 102 if (hasHistoricalInfo) { 103 encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT)) 104 | static_cast<uint32_t>(mHistoricalInfo.getTimestamp()); 105 encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT)) 106 | static_cast<uint8_t>(mHistoricalInfo.getLevel()); 107 encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT)) 108 | static_cast<uint16_t>(mHistoricalInfo.getCount()); 109 } else { 110 encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT)) 111 | static_cast<uint8_t>(mProbability); 112 } 113 return encodedEntry; 114 } 115 116 static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) { 117 if (hasHistoricalInfo) { 118 const int flags = readFromEncodedEntry(encodedEntry, 119 Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE, 120 Ver4DictConstants::TIME_STAMP_FIELD_SIZE 121 + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE 122 + Ver4DictConstants::WORD_COUNT_FIELD_SIZE); 123 const int timestamp = readFromEncodedEntry(encodedEntry, 124 Ver4DictConstants::TIME_STAMP_FIELD_SIZE, 125 Ver4DictConstants::WORD_LEVEL_FIELD_SIZE 126 + Ver4DictConstants::WORD_COUNT_FIELD_SIZE); 127 const int level = readFromEncodedEntry(encodedEntry, 128 Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, 129 Ver4DictConstants::WORD_COUNT_FIELD_SIZE); 130 const int count = readFromEncodedEntry(encodedEntry, 131 Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */); 132 const HistoricalInfo historicalInfo(timestamp, level, count); 133 return ProbabilityEntry(flags, &historicalInfo); 134 } else { 135 const int flags = readFromEncodedEntry(encodedEntry, 136 Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE, 137 Ver4DictConstants::PROBABILITY_SIZE); 138 const int probability = readFromEncodedEntry(encodedEntry, 139 Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */); 140 return ProbabilityEntry(flags, probability); 141 } 142 } 143 144 private: 145 // Copy constructor is public to use this class as a type of return value. 146 DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry); 147 148 const uint8_t mFlags; 149 const int mProbability; 150 const HistoricalInfo mHistoricalInfo; 151 152 static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) { 153 return static_cast<int>( 154 (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1)); 155 } 156 157 static uint8_t createFlags(const bool representsBeginningOfSentence, 158 const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) { 159 uint8_t flags = 0; 160 if (representsBeginningOfSentence) { 161 flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE; 162 } 163 if (isNotAWord) { 164 flags |= Ver4DictConstants::FLAG_NOT_A_WORD; 165 } 166 if (isBlacklisted) { 167 flags |= Ver4DictConstants::FLAG_BLACKLISTED; 168 } 169 if (isPossiblyOffensive) { 170 flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE; 171 } 172 return flags; 173 } 174 }; 175 } // namespace latinime 176 #endif /* LATINIME_PROBABILITY_ENTRY_H */ 177