Home | History | Annotate | Download | only in content
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_PROBABILITY_ENTRY_H
     18 #define LATINIME_PROBABILITY_ENTRY_H
     19 
     20 #include <climits>
     21 #include <cstdint>
     22 
     23 #include "defines.h"
     24 #include "dictionary/property/historical_info.h"
     25 #include "dictionary/property/ngram_property.h"
     26 #include "dictionary/property/unigram_property.h"
     27 #include "dictionary/structure/v4/ver4_dict_constants.h"
     28 
     29 namespace latinime {
     30 
     31 class ProbabilityEntry {
     32  public:
     33     ProbabilityEntry(const ProbabilityEntry &probabilityEntry)
     34             : mFlags(probabilityEntry.mFlags), mProbability(probabilityEntry.mProbability),
     35               mHistoricalInfo(probabilityEntry.mHistoricalInfo) {}
     36 
     37     // Dummy entry
     38     ProbabilityEntry()
     39             : mFlags(Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY), mProbability(NOT_A_PROBABILITY),
     40               mHistoricalInfo() {}
     41 
     42     // Entry without historical information
     43     ProbabilityEntry(const int flags, const int probability)
     44             : mFlags(flags), mProbability(probability), mHistoricalInfo() {}
     45 
     46     // Entry with historical information.
     47     ProbabilityEntry(const int flags, const HistoricalInfo *const historicalInfo)
     48             : mFlags(flags), mProbability(NOT_A_PROBABILITY), mHistoricalInfo(*historicalInfo) {}
     49 
     50     // Create from unigram property.
     51     ProbabilityEntry(const UnigramProperty *const unigramProperty)
     52             : mFlags(createFlags(unigramProperty->representsBeginningOfSentence(),
     53                     unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
     54                     unigramProperty->isPossiblyOffensive())),
     55               mProbability(unigramProperty->getProbability()),
     56               mHistoricalInfo(unigramProperty->getHistoricalInfo()) {}
     57 
     58     // Create from ngram property.
     59     // TODO: Set flags.
     60     ProbabilityEntry(const NgramProperty *const ngramProperty)
     61             : mFlags(0), mProbability(ngramProperty->getProbability()),
     62               mHistoricalInfo(ngramProperty->getHistoricalInfo()) {}
     63 
     64     bool isValid() const {
     65         return (mFlags & Ver4DictConstants::FLAG_NOT_A_VALID_ENTRY) == 0;
     66     }
     67 
     68     bool hasHistoricalInfo() const {
     69         return mHistoricalInfo.isValid();
     70     }
     71 
     72     uint8_t getFlags() const {
     73         return mFlags;
     74     }
     75 
     76     int getProbability() const {
     77         return mProbability;
     78     }
     79 
     80     const HistoricalInfo *getHistoricalInfo() const {
     81         return &mHistoricalInfo;
     82     }
     83 
     84     bool representsBeginningOfSentence() const {
     85         return (mFlags & Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE) != 0;
     86     }
     87 
     88     bool isNotAWord() const {
     89         return (mFlags & Ver4DictConstants::FLAG_NOT_A_WORD) != 0;
     90     }
     91 
     92     bool isBlacklisted() const {
     93         return (mFlags & Ver4DictConstants::FLAG_BLACKLISTED) != 0;
     94     }
     95 
     96     bool isPossiblyOffensive() const {
     97         return (mFlags & Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE) != 0;
     98     }
     99 
    100     uint64_t encode(const bool hasHistoricalInfo) const {
    101         uint64_t encodedEntry = static_cast<uint8_t>(mFlags);
    102         if (hasHistoricalInfo) {
    103             encodedEntry = (encodedEntry << (Ver4DictConstants::TIME_STAMP_FIELD_SIZE * CHAR_BIT))
    104                     | static_cast<uint32_t>(mHistoricalInfo.getTimestamp());
    105             encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_LEVEL_FIELD_SIZE * CHAR_BIT))
    106                     | static_cast<uint8_t>(mHistoricalInfo.getLevel());
    107             encodedEntry = (encodedEntry << (Ver4DictConstants::WORD_COUNT_FIELD_SIZE * CHAR_BIT))
    108                     | static_cast<uint16_t>(mHistoricalInfo.getCount());
    109         } else {
    110             encodedEntry = (encodedEntry << (Ver4DictConstants::PROBABILITY_SIZE * CHAR_BIT))
    111                     | static_cast<uint8_t>(mProbability);
    112         }
    113         return encodedEntry;
    114     }
    115 
    116     static ProbabilityEntry decode(const uint64_t encodedEntry, const bool hasHistoricalInfo) {
    117         if (hasHistoricalInfo) {
    118             const int flags = readFromEncodedEntry(encodedEntry,
    119                     Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
    120                     Ver4DictConstants::TIME_STAMP_FIELD_SIZE
    121                             + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
    122                             + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
    123             const int timestamp = readFromEncodedEntry(encodedEntry,
    124                     Ver4DictConstants::TIME_STAMP_FIELD_SIZE,
    125                     Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
    126                             + Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
    127             const int level = readFromEncodedEntry(encodedEntry,
    128                     Ver4DictConstants::WORD_LEVEL_FIELD_SIZE,
    129                     Ver4DictConstants::WORD_COUNT_FIELD_SIZE);
    130             const int count = readFromEncodedEntry(encodedEntry,
    131                     Ver4DictConstants::WORD_COUNT_FIELD_SIZE, 0 /* pos */);
    132             const HistoricalInfo historicalInfo(timestamp, level, count);
    133             return ProbabilityEntry(flags, &historicalInfo);
    134         } else {
    135             const int flags = readFromEncodedEntry(encodedEntry,
    136                     Ver4DictConstants::FLAGS_IN_LANGUAGE_MODEL_SIZE,
    137                     Ver4DictConstants::PROBABILITY_SIZE);
    138             const int probability = readFromEncodedEntry(encodedEntry,
    139                     Ver4DictConstants::PROBABILITY_SIZE, 0 /* pos */);
    140             return ProbabilityEntry(flags, probability);
    141         }
    142     }
    143 
    144  private:
    145     // Copy constructor is public to use this class as a type of return value.
    146     DISALLOW_ASSIGNMENT_OPERATOR(ProbabilityEntry);
    147 
    148     const uint8_t mFlags;
    149     const int mProbability;
    150     const HistoricalInfo mHistoricalInfo;
    151 
    152     static int readFromEncodedEntry(const uint64_t encodedEntry, const int size, const int pos) {
    153         return static_cast<int>(
    154                 (encodedEntry >> (pos * CHAR_BIT)) & ((1ull << (size * CHAR_BIT)) - 1));
    155     }
    156 
    157     static uint8_t createFlags(const bool representsBeginningOfSentence,
    158             const bool isNotAWord, const bool isBlacklisted, const bool isPossiblyOffensive) {
    159         uint8_t flags = 0;
    160         if (representsBeginningOfSentence) {
    161             flags |= Ver4DictConstants::FLAG_REPRESENTS_BEGINNING_OF_SENTENCE;
    162         }
    163         if (isNotAWord) {
    164             flags |= Ver4DictConstants::FLAG_NOT_A_WORD;
    165         }
    166         if (isBlacklisted) {
    167             flags |= Ver4DictConstants::FLAG_BLACKLISTED;
    168         }
    169         if (isPossiblyOffensive) {
    170             flags |= Ver4DictConstants::FLAG_POSSIBLY_OFFENSIVE;
    171         }
    172         return flags;
    173     }
    174 };
    175 } // namespace latinime
    176 #endif /* LATINIME_PROBABILITY_ENTRY_H */
    177