1 /* 2 * Copyright (C) 2009, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "LatinIME: dictionary.cpp" 18 19 #include "suggest/core/dictionary/dictionary.h" 20 21 #include "defines.h" 22 #include "dictionary/interface/dictionary_header_structure_policy.h" 23 #include "dictionary/property/ngram_context.h" 24 #include "suggest/core/dictionary/dictionary_utils.h" 25 #include "suggest/core/result/suggestion_results.h" 26 #include "suggest/core/session/dic_traverse_session.h" 27 #include "suggest/core/suggest.h" 28 #include "suggest/core/suggest_options.h" 29 #include "suggest/policyimpl/gesture/gesture_suggest_policy_factory.h" 30 #include "suggest/policyimpl/typing/typing_suggest_policy_factory.h" 31 #include "utils/int_array_view.h" 32 #include "utils/log_utils.h" 33 #include "utils/time_keeper.h" 34 35 namespace latinime { 36 37 const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32; 38 39 Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::StructurePolicyPtr 40 dictionaryStructureWithBufferPolicy) 41 : mDictionaryStructureWithBufferPolicy(std::move(dictionaryStructureWithBufferPolicy)), 42 mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), 43 mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { 44 logDictionaryInfo(env); 45 } 46 47 void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession, 48 int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints, 49 int inputSize, const NgramContext *const ngramContext, 50 const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel, 51 SuggestionResults *const outSuggestionResults) const { 52 TimeKeeper::setCurrentTime(); 53 traverseSession->init(this, ngramContext, suggestOptions); 54 const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest; 55 suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates, 56 ycoordinates, times, pointerIds, inputCodePoints, inputSize, 57 weightOfLangModelVsSpatialModel, outSuggestionResults); 58 } 59 60 Dictionary::NgramListenerForPrediction::NgramListenerForPrediction( 61 const NgramContext *const ngramContext, const WordIdArrayView prevWordIds, 62 SuggestionResults *const suggestionResults, 63 const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) 64 : mNgramContext(ngramContext), mPrevWordIds(prevWordIds), 65 mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {} 66 67 void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability, 68 const int targetWordId) { 69 if (targetWordId == NOT_A_WORD_ID) { 70 return; 71 } 72 if (mNgramContext->isNthPrevWordBeginningOfSentence(1 /* n */) 73 && ngramProbability == NOT_A_PROBABILITY) { 74 return; 75 } 76 int targetWordCodePoints[MAX_WORD_LENGTH]; 77 const int codePointCount = mDictStructurePolicy->getCodePointsAndReturnCodePointCount( 78 targetWordId, MAX_WORD_LENGTH, targetWordCodePoints); 79 if (codePointCount <= 0) { 80 return; 81 } 82 const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext( 83 mPrevWordIds, targetWordId, nullptr /* multiBigramMap */); 84 if (wordAttributes.getProbability() == NOT_A_PROBABILITY) { 85 return; 86 } 87 mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, 88 wordAttributes.getProbability()); 89 } 90 91 void Dictionary::getPredictions(const NgramContext *const ngramContext, 92 SuggestionResults *const outSuggestionResults) const { 93 TimeKeeper::setCurrentTime(); 94 WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; 95 const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds( 96 mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray, 97 true /* tryLowerCaseSearch */); 98 NgramListenerForPrediction listener(ngramContext, prevWordIds, outSuggestionResults, 99 mDictionaryStructureWithBufferPolicy.get()); 100 mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener); 101 } 102 103 int Dictionary::getProbability(const CodePointArrayView codePoints) const { 104 return getNgramProbability(nullptr /* ngramContext */, codePoints); 105 } 106 107 int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const { 108 TimeKeeper::setCurrentTime(); 109 return DictionaryUtils::getMaxProbabilityOfExactMatches( 110 mDictionaryStructureWithBufferPolicy.get(), codePoints); 111 } 112 113 int Dictionary::getNgramProbability(const NgramContext *const ngramContext, 114 const CodePointArrayView codePoints) const { 115 TimeKeeper::setCurrentTime(); 116 const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints, 117 false /* forceLowerCaseSearch */); 118 if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY; 119 if (!ngramContext) { 120 return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId); 121 } 122 WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray; 123 const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds( 124 mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray, 125 true /* tryLowerCaseSearch */); 126 return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId); 127 } 128 129 bool Dictionary::addUnigramEntry(const CodePointArrayView codePoints, 130 const UnigramProperty *const unigramProperty) { 131 if (unigramProperty->representsBeginningOfSentence() 132 && !mDictionaryStructureWithBufferPolicy->getHeaderStructurePolicy() 133 ->supportsBeginningOfSentence()) { 134 AKLOGE("The dictionary doesn't support Beginning-of-Sentence."); 135 return false; 136 } 137 TimeKeeper::setCurrentTime(); 138 return mDictionaryStructureWithBufferPolicy->addUnigramEntry(codePoints, unigramProperty); 139 } 140 141 bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) { 142 TimeKeeper::setCurrentTime(); 143 return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints); 144 } 145 146 bool Dictionary::addNgramEntry(const NgramProperty *const ngramProperty) { 147 TimeKeeper::setCurrentTime(); 148 return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramProperty); 149 } 150 151 bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext, 152 const CodePointArrayView codePoints) { 153 TimeKeeper::setCurrentTime(); 154 return mDictionaryStructureWithBufferPolicy->removeNgramEntry(ngramContext, codePoints); 155 } 156 157 bool Dictionary::updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext, 158 const CodePointArrayView codePoints, const bool isValidWord, 159 const HistoricalInfo historicalInfo) { 160 TimeKeeper::setCurrentTime(); 161 return mDictionaryStructureWithBufferPolicy->updateEntriesForWordWithNgramContext(ngramContext, 162 codePoints, isValidWord, historicalInfo); 163 } 164 165 bool Dictionary::flush(const char *const filePath) { 166 TimeKeeper::setCurrentTime(); 167 return mDictionaryStructureWithBufferPolicy->flush(filePath); 168 } 169 170 bool Dictionary::flushWithGC(const char *const filePath) { 171 TimeKeeper::setCurrentTime(); 172 return mDictionaryStructureWithBufferPolicy->flushWithGC(filePath); 173 } 174 175 bool Dictionary::needsToRunGC(const bool mindsBlockByGC) { 176 TimeKeeper::setCurrentTime(); 177 return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC); 178 } 179 180 void Dictionary::getProperty(const char *const query, const int queryLength, char *const outResult, 181 const int maxResultLength) { 182 TimeKeeper::setCurrentTime(); 183 return mDictionaryStructureWithBufferPolicy->getProperty(query, queryLength, outResult, 184 maxResultLength); 185 } 186 187 const WordProperty Dictionary::getWordProperty(const CodePointArrayView codePoints) { 188 TimeKeeper::setCurrentTime(); 189 return mDictionaryStructureWithBufferPolicy->getWordProperty(codePoints); 190 } 191 192 int Dictionary::getNextWordAndNextToken(const int token, int *const outCodePoints, 193 int *const outCodePointCount) { 194 TimeKeeper::setCurrentTime(); 195 return mDictionaryStructureWithBufferPolicy->getNextWordAndNextToken( 196 token, outCodePoints, outCodePointCount); 197 } 198 199 void Dictionary::logDictionaryInfo(JNIEnv *const env) const { 200 int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 201 int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 202 int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 203 const DictionaryHeaderStructurePolicy *const headerPolicy = 204 getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 205 headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer, 206 HEADER_ATTRIBUTE_BUFFER_SIZE); 207 headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer, 208 HEADER_ATTRIBUTE_BUFFER_SIZE); 209 headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, 210 HEADER_ATTRIBUTE_BUFFER_SIZE); 211 212 char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 213 char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 214 char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE]; 215 intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, 216 dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); 217 intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, 218 versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); 219 intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE, 220 dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE); 221 222 LogUtils::logToJava(env, 223 "Dictionary info: dictionary = %s ; version = %s ; date = %s", 224 dictionaryIdCharBuffer, versionStringCharBuffer, dateStringCharBuffer); 225 } 226 227 } // namespace latinime 228