Home | History | Annotate | Download | only in dictionary
      1 /*
      2  * Copyright (C) 2009, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "LatinIME: dictionary.cpp"
     18 
     19 #include "suggest/core/dictionary/dictionary.h"
     20 
     21 #include "defines.h"
     22 #include "suggest/core/dictionary/dictionary_utils.h"
     23 #include "suggest/core/policy/dictionary_header_structure_policy.h"
     24 #include "suggest/core/result/suggestion_results.h"
     25 #include "suggest/core/session/dic_traverse_session.h"
     26 #include "suggest/core/session/prev_words_info.h"
     27 #include "suggest/core/suggest.h"
     28 #include "suggest/core/suggest_options.h"
     29 #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h"
     30 #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h"
     31 #include "utils/log_utils.h"
     32 #include "utils/time_keeper.h"
     33 
     34 namespace latinime {
     35 
     36 const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
     37 
     38 Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr
     39         dictionaryStructureWithBufferPolicy)
     40         : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)),
     41           mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())),
     42           mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) {
     43     logDictionaryInfo(env);
     44 }
     45 
     46 void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
     47         int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
     48         int inputSize, const PrevWordsInfo *const prevWordsInfo,
     49         const SuggestOptions *const suggestOptions, const float languageWeight,
     50         SuggestionResults *const outSuggestionResults) const {
     51     TimeKeeper::setCurrentTime();
     52     traverseSession->init(this, prevWordsInfo, suggestOptions);
     53     const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
     54     suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
     55             ycoordinates, times, pointerIds, inputCodePoints, inputSize,
     56             languageWeight, outSuggestionResults);
     57     if (DEBUG_DICT) {
     58         outSuggestionResults->dumpSuggestions();
     59     }
     60 }
     61 
     62 Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
     63         const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const suggestionResults,
     64         const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
     65     : mPrevWordsInfo(prevWordsInfo), mSuggestionResults(suggestionResults),
     66       mDictStructurePolicy(dictStructurePolicy) {}
     67 
     68 void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
     69         const int targetPtNodePos) {
     70     if (targetPtNodePos == NOT_A_DICT_POS) {
     71         return;
     72     }
     73     if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
     74             && ngramProbability == NOT_A_PROBABILITY) {
     75         return;
     76     }
     77     int targetWordCodePoints[MAX_WORD_LENGTH];
     78     int unigramProbability = 0;
     79     const int codePointCount = mDictStructurePolicy->
     80             getCodePointsAndProbabilityAndReturnCodePointCount(targetPtNodePos,
     81                     MAX_WORD_LENGTH, targetWordCodePoints, &unigramProbability);
     82     if (codePointCount <= 0) {
     83         return;
     84     }
     85     const int probability = mDictStructurePolicy->getProbability(
     86             unigramProbability, ngramProbability);
     87     mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, probability);
     88 }
     89 
     90 void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
     91         SuggestionResults *const outSuggestionResults) const {
     92     TimeKeeper::setCurrentTime();
     93     NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
     94             mDictionaryStructureWithBufferPolicy.get());
     95     int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
     96     prevWordsInfo->getPrevWordsTerminalPtNodePos(
     97             mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
     98             true /* tryLowerCaseSearch */);
     99     mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsPtNodePos, &listener);
    100 }
    101 
    102 int Dictionary::getProbability(const int *word, int length) const {
    103     return getNgramProbability(nullptr /* prevWordsInfo */, word, length);
    104 }
    105 
    106 int Dictionary::getMaxProbabilityOfExactMatches(const int *word, int length) const {
    107     TimeKeeper::setCurrentTime();
    108     return DictionaryUtils::getMaxProbabilityOfExactMatches(
    109             mDictionaryStructureWithBufferPolicy.get(), word, length);
    110 }
    111 
    112 int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word,
    113         int length) const {
    114     TimeKeeper::setCurrentTime();
    115     int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
    116             length, false /* forceLowerCaseSearch */);
    117     if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
    118     if (!prevWordsInfo) {
    119         return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
    120                 nullptr /* prevWordsPtNodePos */, nextWordPos);
    121     }
    122     int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
    123     prevWordsInfo->getPrevWordsTerminalPtNodePos(
    124             mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
    125             true /* tryLowerCaseSearch */);
    126     return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsPtNodePos, nextWordPos);
    127 }
    128 
    129 bool Dictionary::addUnigramEntry(const int *const word, const int length,
    130         const UnigramProperty *const unigramProperty) {
    131     if (unigramProperty->representsBeginningOfSentence()
    132             && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy()
    133                     ->supportsBeginningOfSentence()) {
    134         AKLOGE("The dictionary doesn't support Beginning-of-Sentence.");
    135         return false;
    136     }
    137     TimeKeeper::setCurrentTime();
    138     return mDictionaryStructureWithBufferPolicy->addUnigramEntry(word, length, unigramProperty);
    139 }
    140 
    141 bool Dictionary::removeUnigramEntry(const int *const codePoints, const int codePointCount) {
    142     TimeKeeper::setCurrentTime();
    143     return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints, codePointCount);
    144 }
    145 
    146 bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
    147         const BigramProperty *const bigramProperty) {
    148     TimeKeeper::setCurrentTime();
    149     return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, bigramProperty);
    150 }
    151 
    152 bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
    153         const int *const word, const int length) {
    154     TimeKeeper::setCurrentTime();
    155     return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, word, length);
    156 }
    157 
    158 bool Dictionary::flush(const char *const filePath) {
    159     TimeKeeper::setCurrentTime();
    160     return mDictionaryStructureWithBufferPolicy->flush(filePath);
    161 }
    162 
    163 bool Dictionary::flushWithGC(const char *const filePath) {
    164     TimeKeeper::setCurrentTime();
    165     return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
    166 }
    167 
    168 bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
    169     TimeKeeper::setCurrentTime();
    170     return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
    171 }
    172 
    173 void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult,
    174         const int maxResultLength) {
    175     TimeKeeper::setCurrentTime();
    176     return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult,
    177             maxResultLength);
    178 }
    179 
    180 const WordProperty Dictionary::getWordProperty(const int *const codePoints,
    181         const int codePointCount) {
    182     TimeKeeper::setCurrentTime();
    183     return mDictionaryStructureWithBufferPolicy->getWordProperty(
    184             codePoints, codePointCount);
    185 }
    186 
    187 int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints,
    188         int *const outCodePointCount) {
    189     TimeKeeper::setCurrentTime();
    190     return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken(
    191             token, outCodePoints, outCodePointCount);
    192 }
    193 
    194 void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
    195     int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    196     int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    197     int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    198     const DictionaryHeaderStructurePolicy *const headerPolicy =
    199             getDictionaryStructurePolicy()->getHeaderStructurePolicy();
    200     headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
    201             HEADER_ATTRIBUTE_BUFFER_SIZE);
    202     headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
    203             HEADER_ATTRIBUTE_BUFFER_SIZE);
    204     headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
    205             HEADER_ATTRIBUTE_BUFFER_SIZE);
    206 
    207     char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    208     char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    209     char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
    210     intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
    211             dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
    212     intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
    213             versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
    214     intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
    215             dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
    216 
    217     LogUtils::logToJava(env,
    218             "Dictionary info: dictionary = %s ; version = %s ; date = %s",
    219             dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer);
    220 }
    221 
    222 } // namespace latinime
    223