1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "LatinIME: jni: BinaryDictionary" 18 19 #include "com_android_inputmethod_latin_BinaryDictionary.h" 20 21 #include <cstring> // for memset() 22 #include <vector> 23 24 #include "defines.h" 25 #include "dictionary/property/unigram_property.h" 26 #include "dictionary/property/ngram_context.h" 27 #include "dictionary/property/word_property.h" 28 #include "dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" 29 #include "jni.h" 30 #include "jni_common.h" 31 #include "suggest/core/dictionary/dictionary.h" 32 #include "suggest/core/result/suggestion_results.h" 33 #include "suggest/core/suggest_options.h" 34 #include "utils/char_utils.h" 35 #include "utils/int_array_view.h" 36 #include "utils/jni_data_utils.h" 37 #include "utils/log_utils.h" 38 #include "utils/profiler.h" 39 #include "utils/time_keeper.h" 40 41 namespace latinime { 42 43 class ProximityInfo; 44 45 static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, 46 jlong dictOffset, jlong dictSize, jboolean isUpdatable) { 47 PROF_INIT; 48 PROF_TIMER_START(66); 49 const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir); 50 if (sourceDirUtf8Length <= 0) { 51 AKLOGE("DICT: Can't get sourceDir string"); 52 return 0; 53 } 54 char sourceDirChars[sourceDirUtf8Length + 1]; 55 env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); 56 sourceDirChars[sourceDirUtf8Length] = '\0'; 57 DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy( 58 DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile( 59 sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), 60 isUpdatable == JNI_TRUE)); 61 if (!dictionaryStructureWithBufferPolicy) { 62 return 0; 63 } 64 65 Dictionary *const dictionary = 66 new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy)); 67 PROF_TIMER_END(66); 68 return reinterpret_cast<jlong>(dictionary); 69 } 70 71 static jlong latinime_BinaryDictionary_createOnMemory(JNIEnv *env, jclass clazz, 72 jlong formatVersion, jstring locale, jobjectArray attributeKeyStringArray, 73 jobjectArray attributeValueStringArray) { 74 const jsize localeUtf8Length = env->GetStringUTFLength(locale); 75 char localeChars[localeUtf8Length + 1]; 76 env->GetStringUTFRegion(locale, 0, env->GetStringLength(locale), localeChars); 77 localeChars[localeUtf8Length] = '\0'; 78 std::vector<int> localeCodePoints; 79 HeaderReadWriteUtils::insertCharactersIntoVector(localeChars, &localeCodePoints); 80 const int keyCount = env->GetArrayLength(attributeKeyStringArray); 81 const int valueCount = env->GetArrayLength(attributeValueStringArray); 82 if (keyCount != valueCount) { 83 return false; 84 } 85 DictionaryHeaderStructurePolicy::AttributeMap attributeMap = 86 JniDataUtils::constructAttributeMap(env, attributeKeyStringArray, 87 attributeValueStringArray); 88 DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy = 89 DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict( 90 formatVersion, localeCodePoints, &attributeMap); 91 if (!dictionaryStructureWithBufferPolicy) { 92 return 0; 93 } 94 Dictionary *const dictionary = 95 new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy)); 96 return reinterpret_cast<jlong>(dictionary); 97 } 98 99 static bool latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict, 100 jstring filePath) { 101 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 102 if (!dictionary) return false; 103 const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); 104 char filePathChars[filePathUtf8Length + 1]; 105 env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); 106 filePathChars[filePathUtf8Length] = '\0'; 107 return dictionary->flush(filePathChars); 108 } 109 110 static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz, 111 jlong dict, jboolean mindsBlockByGC) { 112 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 113 if (!dictionary) return false; 114 return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE); 115 } 116 117 static bool latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict, 118 jstring filePath) { 119 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 120 if (!dictionary) return false; 121 const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); 122 char filePathChars[filePathUtf8Length + 1]; 123 env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); 124 filePathChars[filePathUtf8Length] = '\0'; 125 return dictionary->flushWithGC(filePathChars); 126 } 127 128 static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) { 129 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 130 if (!dictionary) return; 131 delete dictionary; 132 } 133 134 static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict, 135 jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys, 136 jobject outAttributeValues) { 137 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 138 if (!dictionary) return; 139 const DictionaryHeaderStructurePolicy *const headerPolicy = 140 dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 141 JniDataUtils::putIntToArray(env, outHeaderSize, 0 /* index */, headerPolicy->getSize()); 142 JniDataUtils::putIntToArray(env, outFormatVersion, 0 /* index */, 143 headerPolicy->getFormatVersionNumber()); 144 // Output attribute map 145 jclass arrayListClass = env->FindClass("java/util/ArrayList"); 146 jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); 147 const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap = 148 headerPolicy->getAttributeMap(); 149 for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin(); 150 it != attributeMap->end(); ++it) { 151 // Output key 152 jintArray keyCodePointArray = env->NewIntArray(it->first.size()); 153 JniDataUtils::outputCodePoints(env, keyCodePointArray, 0 /* start */, 154 it->first.size(), it->first.data(), it->first.size(), 155 false /* needsNullTermination */); 156 env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray); 157 env->DeleteLocalRef(keyCodePointArray); 158 // Output value 159 jintArray valueCodePointArray = env->NewIntArray(it->second.size()); 160 JniDataUtils::outputCodePoints(env, valueCodePointArray, 0 /* start */, 161 it->second.size(), it->second.data(), it->second.size(), 162 false /* needsNullTermination */); 163 env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray); 164 env->DeleteLocalRef(valueCodePointArray); 165 } 166 env->DeleteLocalRef(arrayListClass); 167 return; 168 } 169 170 static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) { 171 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 172 if (!dictionary) return 0; 173 const DictionaryHeaderStructurePolicy *const headerPolicy = 174 dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 175 return headerPolicy->getFormatVersionNumber(); 176 } 177 178 static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict, 179 jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray, 180 jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, 181 jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions, 182 jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, 183 jint prevWordCount, jintArray outSuggestionCount, jintArray outCodePointsArray, 184 jintArray outScoresArray, jintArray outSpaceIndicesArray, jintArray outTypesArray, 185 jintArray outAutoCommitFirstWordConfidenceArray, 186 jfloatArray inOutWeightOfLangModelVsSpatialModel) { 187 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 188 // Assign 0 to outSuggestionCount here in case of returning earlier in this method. 189 JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0); 190 if (!dictionary) { 191 return; 192 } 193 ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo); 194 DicTraverseSession *traverseSession = 195 reinterpret_cast<DicTraverseSession *>(dicTraverseSession); 196 if (!traverseSession) { 197 return; 198 } 199 // Input values 200 int xCoordinates[inputSize]; 201 int yCoordinates[inputSize]; 202 int times[inputSize]; 203 int pointerIds[inputSize]; 204 const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray); 205 int inputCodePoints[inputCodePointsLength]; 206 env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates); 207 env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates); 208 env->GetIntArrayRegion(timesArray, 0, inputSize, times); 209 env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds); 210 env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints); 211 212 const jsize numberOfOptions = env->GetArrayLength(suggestOptions); 213 int options[numberOfOptions]; 214 env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options); 215 SuggestOptions givenSuggestOptions(options, numberOfOptions); 216 217 // Output values 218 /* By the way, let's check the output array length here to make sure */ 219 const jsize outputCodePointsLength = env->GetArrayLength(outCodePointsArray); 220 if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) { 221 AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength); 222 ASSERT(false); 223 return; 224 } 225 const jsize scoresLength = env->GetArrayLength(outScoresArray); 226 if (scoresLength != MAX_RESULTS) { 227 AKLOGE("Invalid scoresLength: %d", scoresLength); 228 ASSERT(false); 229 return; 230 } 231 const jsize outputAutoCommitFirstWordConfidenceLength = 232 env->GetArrayLength(outAutoCommitFirstWordConfidenceArray); 233 ASSERT(outputAutoCommitFirstWordConfidenceLength == 1); 234 if (outputAutoCommitFirstWordConfidenceLength != 1) { 235 // We only use the first result, as obviously we will only ever autocommit the first one 236 AKLOGE("Invalid outputAutoCommitFirstWordConfidenceLength: %d", 237 outputAutoCommitFirstWordConfidenceLength); 238 ASSERT(false); 239 return; 240 } 241 float weightOfLangModelVsSpatialModel; 242 env->GetFloatArrayRegion(inOutWeightOfLangModelVsSpatialModel, 0, 1 /* len */, 243 &weightOfLangModelVsSpatialModel); 244 SuggestionResults suggestionResults(MAX_RESULTS); 245 const NgramContext ngramContext = JniDataUtils::constructNgramContext(env, 246 prevWordCodePointArrays, isBeginningOfSentenceArray, prevWordCount); 247 if (givenSuggestOptions.isGesture() || inputSize > 0) { 248 // TODO: Use SuggestionResults to return suggestions. 249 dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates, 250 times, pointerIds, inputCodePoints, inputSize, &ngramContext, 251 &givenSuggestOptions, weightOfLangModelVsSpatialModel, &suggestionResults); 252 } else { 253 dictionary->getPredictions(&ngramContext, &suggestionResults); 254 } 255 if (DEBUG_DICT) { 256 suggestionResults.dumpSuggestions(); 257 } 258 suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray, 259 outScoresArray, outSpaceIndicesArray, outTypesArray, 260 outAutoCommitFirstWordConfidenceArray, inOutWeightOfLangModelVsSpatialModel); 261 } 262 263 static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict, 264 jintArray word) { 265 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 266 if (!dictionary) return NOT_A_PROBABILITY; 267 const jsize codePointCount = env->GetArrayLength(word); 268 int codePoints[codePointCount]; 269 env->GetIntArrayRegion(word, 0, codePointCount, codePoints); 270 return dictionary->getProbability(CodePointArrayView(codePoints, codePointCount)); 271 } 272 273 static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches( 274 JNIEnv *env, jclass clazz, jlong dict, jintArray word) { 275 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 276 if (!dictionary) return NOT_A_PROBABILITY; 277 const jsize codePointCount = env->GetArrayLength(word); 278 int codePoints[codePointCount]; 279 env->GetIntArrayRegion(word, 0, codePointCount, codePoints); 280 return dictionary->getMaxProbabilityOfExactMatches( 281 CodePointArrayView(codePoints, codePointCount)); 282 } 283 284 static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz, 285 jlong dict, jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, 286 jintArray word) { 287 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 288 if (!dictionary) return JNI_FALSE; 289 const jsize wordLength = env->GetArrayLength(word); 290 int wordCodePoints[wordLength]; 291 env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); 292 const NgramContext ngramContext = JniDataUtils::constructNgramContext(env, 293 prevWordCodePointArrays, isBeginningOfSentenceArray, 294 env->GetArrayLength(prevWordCodePointArrays)); 295 return dictionary->getNgramProbability(&ngramContext, 296 CodePointArrayView(wordCodePoints, wordLength)); 297 } 298 299 // Method to iterate all words in the dictionary for makedict. 300 // If token is 0, this method newly starts iterating the dictionary. This method returns 0 when 301 // the dictionary does not have a next word. 302 static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, 303 jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) { 304 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 305 if (!dictionary) return 0; 306 const jsize codePointBufSize = env->GetArrayLength(outCodePoints); 307 if (codePointBufSize != MAX_WORD_LENGTH) { 308 AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize); 309 ASSERT(false); 310 return 0; 311 } 312 int wordCodePoints[codePointBufSize]; 313 int wordCodePointCount = 0; 314 const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints, 315 &wordCodePointCount); 316 JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, 317 MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount, 318 false /* needsNullTermination */); 319 bool isBeginningOfSentence = false; 320 if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { 321 isBeginningOfSentence = true; 322 } 323 JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */, 324 isBeginningOfSentence); 325 return nextToken; 326 } 327 328 static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, 329 jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints, 330 jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outNgramPrevWordsArray, 331 jobject outNgramPrevWordIsBeginningOfSentenceArray, jobject outNgramTargets, 332 jobject outNgramProbabilityInfo, jobject outShortcutTargets, 333 jobject outShortcutProbabilities) { 334 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 335 if (!dictionary) return; 336 const jsize wordLength = env->GetArrayLength(word); 337 if (wordLength > MAX_WORD_LENGTH) { 338 AKLOGE("Invalid wordLength: %d", wordLength); 339 return; 340 } 341 int wordCodePoints[MAX_WORD_LENGTH]; 342 env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); 343 int codePointCount = wordLength; 344 if (isBeginningOfSentence) { 345 codePointCount = CharUtils::attachBeginningOfSentenceMarker( 346 wordCodePoints, wordLength, MAX_WORD_LENGTH); 347 if (codePointCount < 0) { 348 AKLOGE("Cannot attach Beginning-of-Sentence marker."); 349 return; 350 } 351 } 352 const WordProperty wordProperty = dictionary->getWordProperty( 353 CodePointArrayView(wordCodePoints, codePointCount)); 354 JniDataUtils::outputWordProperty(env, wordProperty, outCodePoints, outFlags, outProbabilityInfo, 355 outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray, 356 outNgramTargets, outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); 357 } 358 359 static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict, 360 jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability, 361 jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isPossiblyOffensive, 362 jint timestamp) { 363 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 364 if (!dictionary) { 365 return false; 366 } 367 jsize codePointCount = env->GetArrayLength(word); 368 int codePoints[codePointCount]; 369 env->GetIntArrayRegion(word, 0, codePointCount, codePoints); 370 std::vector<UnigramProperty::ShortcutProperty> shortcuts; 371 { 372 std::vector<int> shortcutTargetCodePoints; 373 JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints); 374 if (!shortcutTargetCodePoints.empty()) { 375 shortcuts.emplace_back(std::move(shortcutTargetCodePoints), shortcutProbability); 376 } 377 } 378 // Use 1 for count to indicate the word has inputted. 379 const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord, 380 isPossiblyOffensive, probability, HistoricalInfo(timestamp, 0 /* level */, 381 1 /* count */), std::move(shortcuts)); 382 return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount), 383 &unigramProperty); 384 } 385 386 static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict, 387 jintArray word) { 388 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 389 if (!dictionary) { 390 return false; 391 } 392 jsize codePointCount = env->GetArrayLength(word); 393 int codePoints[codePointCount]; 394 env->GetIntArrayRegion(word, 0, codePointCount, codePoints); 395 return dictionary->removeUnigramEntry(CodePointArrayView(codePoints, codePointCount)); 396 } 397 398 static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict, 399 jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, 400 jintArray word, jint probability, jint timestamp) { 401 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 402 if (!dictionary) { 403 return false; 404 } 405 const NgramContext ngramContext = JniDataUtils::constructNgramContext(env, 406 prevWordCodePointArrays, isBeginningOfSentenceArray, 407 env->GetArrayLength(prevWordCodePointArrays)); 408 jsize wordLength = env->GetArrayLength(word); 409 int wordCodePoints[wordLength]; 410 env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); 411 // Use 1 for count to indicate the ngram has inputted. 412 const NgramProperty ngramProperty(ngramContext, 413 CodePointArrayView(wordCodePoints, wordLength).toVector(), 414 probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */)); 415 return dictionary->addNgramEntry(&ngramProperty); 416 } 417 418 static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict, 419 jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, 420 jintArray word) { 421 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 422 if (!dictionary) { 423 return false; 424 } 425 const NgramContext ngramContext = JniDataUtils::constructNgramContext(env, 426 prevWordCodePointArrays, isBeginningOfSentenceArray, 427 env->GetArrayLength(prevWordCodePointArrays)); 428 jsize codePointCount = env->GetArrayLength(word); 429 int wordCodePoints[codePointCount]; 430 env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints); 431 return dictionary->removeNgramEntry(&ngramContext, 432 CodePointArrayView(wordCodePoints, codePointCount)); 433 } 434 435 static bool latinime_BinaryDictionary_updateEntriesForWordWithNgramContext(JNIEnv *env, 436 jclass clazz, jlong dict, jobjectArray prevWordCodePointArrays, 437 jbooleanArray isBeginningOfSentenceArray, jintArray word, jboolean isValidWord, jint count, 438 jint timestamp) { 439 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 440 if (!dictionary) { 441 return false; 442 } 443 const NgramContext ngramContext = JniDataUtils::constructNgramContext(env, 444 prevWordCodePointArrays, isBeginningOfSentenceArray, 445 env->GetArrayLength(prevWordCodePointArrays)); 446 jsize codePointCount = env->GetArrayLength(word); 447 int wordCodePoints[codePointCount]; 448 env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints); 449 const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count); 450 return dictionary->updateEntriesForWordWithNgramContext(&ngramContext, 451 CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE, 452 historicalInfo); 453 } 454 455 // Returns how many input events are processed. 456 static int latinime_BinaryDictionary_updateEntriesForInputEvents(JNIEnv *env, jclass clazz, 457 jlong dict, jobjectArray inputEvents, jint startIndex) { 458 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 459 if (!dictionary) { 460 return 0; 461 } 462 jsize inputEventCount = env->GetArrayLength(inputEvents); 463 if (inputEventCount == 0 || startIndex >= inputEventCount) { 464 return 0; 465 } 466 jobject inputEvent = env->GetObjectArrayElement(inputEvents, 0); 467 jclass wordInputEventClass = env->GetObjectClass(inputEvent); 468 env->DeleteLocalRef(inputEvent); 469 470 jfieldID targetWordFieldId = env->GetFieldID(wordInputEventClass, "mTargetWord", "[I"); 471 jfieldID prevWordCountFieldId = env->GetFieldID(wordInputEventClass, "mPrevWordsCount", "I"); 472 jfieldID prevWordArrayFieldId = env->GetFieldID(wordInputEventClass, "mPrevWordArray", "[[I"); 473 jfieldID isPrevWordBoSArrayFieldId = 474 env->GetFieldID(wordInputEventClass, "mIsPrevWordBeginningOfSentenceArray", "[Z"); 475 jfieldID isValidFieldId = env->GetFieldID(wordInputEventClass, "mIsValid", "Z"); 476 jfieldID timestampFieldId = env->GetFieldID(wordInputEventClass, "mTimestamp", "I"); 477 env->DeleteLocalRef(wordInputEventClass); 478 479 for (int i = startIndex; i < inputEventCount; ++i) { 480 jobject inputEvent = env->GetObjectArrayElement(inputEvents, i); 481 jintArray targetWord = static_cast<jintArray>( 482 env->GetObjectField(inputEvent, targetWordFieldId)); 483 jsize wordLength = env->GetArrayLength(targetWord); 484 int wordCodePoints[wordLength]; 485 env->GetIntArrayRegion(targetWord, 0, wordLength, wordCodePoints); 486 env->DeleteLocalRef(targetWord); 487 488 jint prevWordCount = env->GetIntField(inputEvent, prevWordCountFieldId); 489 jobjectArray prevWordArray = 490 static_cast<jobjectArray>(env->GetObjectField(inputEvent, prevWordArrayFieldId)); 491 jbooleanArray isPrevWordBeginningOfSentenceArray = static_cast<jbooleanArray>( 492 env->GetObjectField(inputEvent, isPrevWordBoSArrayFieldId)); 493 jboolean isValid = env->GetBooleanField(inputEvent, isValidFieldId); 494 jint timestamp = env->GetIntField(inputEvent, timestampFieldId); 495 const NgramContext ngramContext = JniDataUtils::constructNgramContext(env, 496 prevWordArray, isPrevWordBeginningOfSentenceArray, prevWordCount); 497 // Use 1 for count to indicate the word has inputted. 498 dictionary->updateEntriesForWordWithNgramContext(&ngramContext, 499 CodePointArrayView(wordCodePoints, wordLength), isValid, 500 HistoricalInfo(timestamp, 0 /* level */, 1 /* count */)); 501 if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { 502 return i + 1; 503 } 504 env->DeleteLocalRef(prevWordArray); 505 env->DeleteLocalRef(isPrevWordBeginningOfSentenceArray); 506 env->DeleteLocalRef(inputEvent); 507 } 508 return inputEventCount; 509 } 510 511 static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict, 512 jstring query) { 513 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 514 if (!dictionary) { 515 return env->NewStringUTF(""); 516 } 517 const jsize queryUtf8Length = env->GetStringUTFLength(query); 518 char queryChars[queryUtf8Length + 1]; 519 env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars); 520 queryChars[queryUtf8Length] = '\0'; 521 static const int GET_PROPERTY_RESULT_LENGTH = 100; 522 char resultChars[GET_PROPERTY_RESULT_LENGTH]; 523 resultChars[0] = '\0'; 524 dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH); 525 return env->NewStringUTF(resultChars); 526 } 527 528 static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) { 529 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 530 if (!dictionary) { 531 return false; 532 } 533 return dictionary->getDictionaryStructurePolicy()->isCorrupted(); 534 } 535 536 static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy( 537 DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy, 538 const char *const dictFilePath) { 539 structurePolicy->flushWithGC(dictFilePath); 540 structurePolicy.release(); 541 return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile( 542 dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */); 543 } 544 545 static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict, 546 jstring dictFilePath, jlong newFormatVersion) { 547 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 548 if (!dictionary) { 549 return false; 550 } 551 const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath); 552 char dictFilePathChars[filePathUtf8Length + 1]; 553 env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars); 554 dictFilePathChars[filePathUtf8Length] = '\0'; 555 556 const DictionaryHeaderStructurePolicy *const headerPolicy = 557 dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 558 DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy = 559 DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict( 560 newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap()); 561 if (!dictionaryStructureWithBufferPolicy) { 562 LogUtils::logToJava(env, "Cannot migrate header."); 563 return false; 564 } 565 566 int wordCodePoints[MAX_WORD_LENGTH]; 567 int wordCodePointCount = 0; 568 int token = 0; 569 // Add unigrams. 570 do { 571 token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); 572 const WordProperty wordProperty = dictionary->getWordProperty( 573 CodePointArrayView(wordCodePoints, wordCodePointCount)); 574 if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { 575 // Skip beginning-of-sentence unigram. 576 continue; 577 } 578 if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { 579 dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( 580 std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); 581 if (!dictionaryStructureWithBufferPolicy) { 582 LogUtils::logToJava(env, "Cannot open dict after GC."); 583 return false; 584 } 585 } 586 if (!dictionaryStructureWithBufferPolicy->addUnigramEntry( 587 CodePointArrayView(wordCodePoints, wordCodePointCount), 588 &wordProperty.getUnigramProperty())) { 589 LogUtils::logToJava(env, "Cannot add unigram to the new dict."); 590 return false; 591 } 592 } while (token != 0); 593 594 // Add ngrams. 595 do { 596 token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); 597 const WordProperty wordProperty = dictionary->getWordProperty( 598 CodePointArrayView(wordCodePoints, wordCodePointCount)); 599 if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { 600 dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( 601 std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); 602 if (!dictionaryStructureWithBufferPolicy) { 603 LogUtils::logToJava(env, "Cannot open dict after GC."); 604 return false; 605 } 606 } 607 for (const NgramProperty &ngramProperty : wordProperty.getNgramProperties()) { 608 if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) { 609 LogUtils::logToJava(env, "Cannot add ngram to the new dict."); 610 return false; 611 } 612 } 613 } while (token != 0); 614 // Save to File. 615 dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars); 616 return true; 617 } 618 619 static const JNINativeMethod sMethods[] = { 620 { 621 const_cast<char *>("openNative"), 622 const_cast<char *>("(Ljava/lang/String;JJZ)J"), 623 reinterpret_cast<void *>(latinime_BinaryDictionary_open) 624 }, 625 { 626 const_cast<char *>("createOnMemoryNative"), 627 const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)J"), 628 reinterpret_cast<void *>(latinime_BinaryDictionary_createOnMemory) 629 }, 630 { 631 const_cast<char *>("closeNative"), 632 const_cast<char *>("(J)V"), 633 reinterpret_cast<void *>(latinime_BinaryDictionary_close) 634 }, 635 { 636 const_cast<char *>("getFormatVersionNative"), 637 const_cast<char *>("(J)I"), 638 reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion) 639 }, 640 { 641 const_cast<char *>("getHeaderInfoNative"), 642 const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"), 643 reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo) 644 }, 645 { 646 const_cast<char *>("flushNative"), 647 const_cast<char *>("(JLjava/lang/String;)Z"), 648 reinterpret_cast<void *>(latinime_BinaryDictionary_flush) 649 }, 650 { 651 const_cast<char *>("needsToRunGCNative"), 652 const_cast<char *>("(JZ)Z"), 653 reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC) 654 }, 655 { 656 const_cast<char *>("flushWithGCNative"), 657 const_cast<char *>("(JLjava/lang/String;)Z"), 658 reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC) 659 }, 660 { 661 const_cast<char *>("getSuggestionsNative"), 662 const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[ZI[I[I[I[I[I[I[F)V"), 663 reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions) 664 }, 665 { 666 const_cast<char *>("getProbabilityNative"), 667 const_cast<char *>("(J[I)I"), 668 reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability) 669 }, 670 { 671 const_cast<char *>("getMaxProbabilityOfExactMatchesNative"), 672 const_cast<char *>("(J[I)I"), 673 reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches) 674 }, 675 { 676 const_cast<char *>("getNgramProbabilityNative"), 677 const_cast<char *>("(J[[I[Z[I)I"), 678 reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability) 679 }, 680 { 681 const_cast<char *>("getWordPropertyNative"), 682 const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;" 683 "Ljava/util/ArrayList;Ljava/util/ArrayList;Ljava/util/ArrayList;" 684 "Ljava/util/ArrayList;)V"), 685 reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty) 686 }, 687 { 688 const_cast<char *>("getNextWordNative"), 689 const_cast<char *>("(JI[I[Z)I"), 690 reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord) 691 }, 692 { 693 const_cast<char *>("addUnigramEntryNative"), 694 const_cast<char *>("(J[II[IIZZZI)Z"), 695 reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramEntry) 696 }, 697 { 698 const_cast<char *>("removeUnigramEntryNative"), 699 const_cast<char *>("(J[I)Z"), 700 reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramEntry) 701 }, 702 { 703 const_cast<char *>("addNgramEntryNative"), 704 const_cast<char *>("(J[[I[Z[III)Z"), 705 reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry) 706 }, 707 { 708 const_cast<char *>("removeNgramEntryNative"), 709 const_cast<char *>("(J[[I[Z[I)Z"), 710 reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry) 711 }, 712 { 713 const_cast<char *>("updateEntriesForWordWithNgramContextNative"), 714 const_cast<char *>("(J[[I[Z[IZII)Z"), 715 reinterpret_cast<void *>(latinime_BinaryDictionary_updateEntriesForWordWithNgramContext) 716 }, 717 { 718 const_cast<char *>("updateEntriesForInputEventsNative"), 719 const_cast<char *>( 720 "(J[Lcom/android/inputmethod/latin/utils/WordInputEventForPersonalization;I)I"), 721 reinterpret_cast<void *>(latinime_BinaryDictionary_updateEntriesForInputEvents) 722 }, 723 { 724 const_cast<char *>("getPropertyNative"), 725 const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"), 726 reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty) 727 }, 728 { 729 const_cast<char *>("isCorruptedNative"), 730 const_cast<char *>("(J)Z"), 731 reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative) 732 }, 733 { 734 const_cast<char *>("migrateNative"), 735 const_cast<char *>("(JLjava/lang/String;J)Z"), 736 reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative) 737 } 738 }; 739 740 int register_BinaryDictionary(JNIEnv *env) { 741 const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary"; 742 return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods)); 743 } 744 } // namespace latinime 745