Home | History | Annotate | Download | only in dictionary
      1 /*
      2  * Copyright (C) 2009, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "LatinIME: dictionary.cpp"
     18 
     19 #include "suggest/core/dictionary/dictionary.h"
     20 
     21 #include "defines.h"
     22 #include "dictionary/interface/dictionary_header_structure_policy.h"
     23 #include "dictionary/property/ngram_context.h"
     24 #include "suggest/core/dictionary/dictionary_utils.h"
     25 #include "suggest/core/result/suggestion_results.h"
     26 #include "suggest/core/session/dic_traverse_session.h"
     27 #include "suggest/core/suggest.h"
     28 #include "suggest/core/suggest_options.h"
     29 #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
     30 #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
     31 #include "utils/int_array_view.h"
     32 #include "utils/log_utils.h"
     33 #include "utils/time_keeper.h"
     34 
     35 namespace latinime {
     36 
     37 const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
     38 
     39 Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
     40         dictionaryStructureWithBufferPolicy)
     41         : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
     42           mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
     43           mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
     44     logDictionaryInfo(env);
     45 }
     46 
     47 void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
     48         int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
     49         int inputSize, const NgramContext *const ngramContext,
     50         const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
     51         SuggestionResults *const outSuggestionResults) const {
     52     TimeKeeper::setCurrentTime();
     53     traverseSession->init(this, ngramContext, suggestOptions);
     54     const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
     55     suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
     56             ycoordinates, times, pointerIds, inputCodePoints, inputSize,
     57             weightOfLangModelVsSpatialModel, outSuggestionResults);
     58 }
     59 
     60 Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
     61         const NgramContext *const ngramContext, const WordIdArrayView prevWordIds,
     62         SuggestionResults *const suggestionResults,
     63         const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
     64     : mNgramContext(ngramContext), mPrevWordIds(prevWordIds),
     65       mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {}
     66 
     67 void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
     68         const int targetWordId) {
     69     if (targetWordId == NOT_A_WORD_ID) {
     70         return;
     71     }
     72     if (mNgramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
     73             && ngramProbability == NOT_A_PROBABILITY) {
     74         return;
     75     }
     76     int targetWordCodePoints[MAX_WORD_LENGTH];
     77     const int codePointCount = mDictStructurePolicy->getCodePointsAndReturnCodePointCount(
     78             targetWordId, MAX_WORD_LENGTH, targetWordCodePoints);
     79     if (codePointCount <= 0) {
     80         return;
     81     }
     82     const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext(
     83             mPrevWordIds, targetWordId, nullptr /* multiBigramMap */);
     84     if (wordAttributes.getProbability() == NOT_A_PROBABILITY) {
     85         return;
     86     }
     87     mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount,
     88             wordAttributes.getProbability());
     89 }
     90 
     91 void Dictionary::getPredictions(const NgramContext *const ngramContext,
     92         SuggestionResults *const outSuggestionResults) const {
     93     TimeKeeper::setCurrentTime();
     94     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
     95     const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
     96             mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
     97             true /* tryLowerCaseSearch */);
     98     NgramListenerForPrediction listener(ngramContext, prevWordIds, outSuggestionResults,
     99             mDictionaryStructureWithBufferPolicy.get());
    100     mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
    101 }
    102 
    103 int Dictionary::getProbability(const CodePointArrayView codePoints) const {
    104     return getNgramProbability(nullptr /* ngramContext */, codePoints);
    105 }
    106 
    107 int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
    108     TimeKeeper::setCurrentTime();
    109     return DictionaryUtils::getMaxProbabilityOfExactMatches(
    110             mDictionaryStructureWithBufferPolicy.get(), codePoints);
    111 }
    112 
    113 int Dictionary::getNgramProbability(const NgramContext *const ngramContext,
    114         const CodePointArrayView codePoints) const {
    115     TimeKeeper::setCurrentTime();
    116     const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
    117             false /* forceLowerCaseSearch */);
    118     if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
    119     if (!ngramContext) {
    120         return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
    121     }
    122     WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
    123     const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
    124             mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
    125             true /* tryLowerCaseSearch */);
    126     return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
    127 }
    128 
    129 bool Dictionary::addUnigramEntry(const CodePointArrayView codePoints,
    130         const UnigramProperty *const unigramProperty) {
    131     if (unigramProperty->representsBeginningOfSentence()
    132             && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
    133                     ->supportsBeginningOfSentence()) {
    134         AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
    135         return false;
    136     }
    137     TimeKeeper::setCurrentTime();
    138     return mDictionaryStructureWithBufferPolicy->addUnigramEntry(codePoints, unigramProperty);
    139 }
    140 
    141 bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
    142     TimeKeeper::setCurrentTime();
    143     return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
    144 }
    145 
    146 bool Dictionary::addNgramEntry(const NgramProperty *const ngramProperty) {
    147     TimeKeeper::setCurrentTime();
    148     return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramProperty);
    149 }
    150 
    151 bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext,
    152         const CodePointArrayView codePoints) {
    153     TimeKeeper::setCurrentTime();
    154     return mDictionaryStructureWithBufferPolicy->removeNgramEntry(ngramContext, codePoints);
    155 }
    156 
    157 bool Dictionary::updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
    158         const CodePointArrayView codePoints, const bool isValidWord,
    159         const HistoricalInfo historicalInfo) {
    160     TimeKeeper::setCurrentTime();
    161     return mDictionaryStructureWithBufferPolicy->updateEntriesForWordWithNgramContext(ngramContext,
    162             codePoints, isValidWord, historicalInfo);
    163 }
    164 
    165 bool Dictionary::flush(const char *const filePath) {
    166     TimeKeeper::setCurrentTime();
    167     return mDictionaryStructureWithBufferPolicy->flush(filePath);
    168 }
    169 
    170 bool Dictionary::flushWithGC(const char *const filePath) {
    171     TimeKeeper::setCurrentTime();
    172     return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
    173 }
    174 
    175 bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
    176     TimeKeeper::setCurrentTime();
    177     return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
    178 }
    179 
    180 void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
    181         const int maxResultLength) {
    182     TimeKeeper::setCurrentTime();
    183     return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult,
    184             maxResultLength);
    185 }
    186 
    187 const WordProperty Dictionary::getWordProperty(const CodePointArrayView codePoints) {
    188     TimeKeeper::setCurrentTime();
    189     return mDictionaryStructureWithBufferPolicy->getWordProperty(codePoints);
    190 }
    191 
    192 int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
    193         int *const outCodePointCount) {
    194     TimeKeeper::setCurrentTime();
    195     return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
    196             token, outCodePoints, outCodePointCount);
    197 }
    198 
    199 void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
    200     int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    201     int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    202     int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    203     const DictionaryHeaderStructurePolicy *const headerPolicy =
    204             getDictionaryStructurePolicy()->getHeaderStructurePolicy();
    205     headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
    206             HEADER_ATTRIBUTE_BUFFER_SIZE);
    207     headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
    208             HEADER_ATTRIBUTE_BUFFER_SIZE);
    209     headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
    210             HEADER_ATTRIBUTE_BUFFER_SIZE);
    211 
    212     char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    213     char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    214     char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    215     intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
    216             dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
    217     intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
    218             versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
    219     intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
    220             dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
    221 
    222     LogUtils::logToJava(env,
    223             "Dictionary info: dictionary = %s ; version = %s ; date = %s",
    224             dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
    225 }
    226 
    227 } // namespace latinime
    228