1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "LatinIME: jni: BinaryDictionary" 18 19 #include "com_android_inputmethod_latin_BinaryDictionary.h" 20 21 #include <cstring> // for memset() 22 #include <vector> 23 24 #include "defines.h" 25 #include "jni.h" 26 #include "jni_common.h" 27 #include "suggest/core/dictionary/dictionary.h" 28 #include "suggest/core/dictionary/property/unigram_property.h" 29 #include "suggest/core/dictionary/property/word_property.h" 30 #include "suggest/core/result/suggestion_results.h" 31 #include "suggest/core/session/prev_words_info.h" 32 #include "suggest/core/suggest_options.h" 33 #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" 34 #include "utils/char_utils.h" 35 #include "utils/jni_data_utils.h" 36 #include "utils/log_utils.h" 37 #include "utils/time_keeper.h" 38 39 namespace latinime { 40 41 class ProximityInfo; 42 43 static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir, 44 jlong dictOffset, jlong dictSize, jboolean isUpdatable) { 45 PROF_OPEN; 46 PROF_START(66); 47 const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir); 48 if (sourceDirUtf8Length <= 0) { 49 AKLOGE("DICT: Can't get sourceDir string"); 50 return 0; 51 } 52 char sourceDirChars[sourceDirUtf8Length + 1]; 53 env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars); 54 sourceDirChars[sourceDirUtf8Length] = '\0'; 55 DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy( 56 DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile( 57 sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize), 58 isUpdatable == JNI_TRUE)); 59 if (!dictionaryStructureWithBufferPolicy) { 60 return 0; 61 } 62 63 Dictionary *const dictionary = 64 new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy)); 65 PROF_END(66); 66 PROF_CLOSE; 67 return reinterpret_cast<jlong>(dictionary); 68 } 69 70 static jlong latinime_BinaryDictionary_createOnMemory(JNIEnv *env, jclass clazz, 71 jlong formatVersion, jstring locale, jobjectArray attributeKeyStringArray, 72 jobjectArray attributeValueStringArray) { 73 const jsize localeUtf8Length = env->GetStringUTFLength(locale); 74 char localeChars[localeUtf8Length + 1]; 75 env->GetStringUTFRegion(locale, 0, env->GetStringLength(locale), localeChars); 76 localeChars[localeUtf8Length] = '\0'; 77 std::vector<int> localeCodePoints; 78 HeaderReadWriteUtils::insertCharactersIntoVector(localeChars, &localeCodePoints); 79 const int keyCount = env->GetArrayLength(attributeKeyStringArray); 80 const int valueCount = env->GetArrayLength(attributeValueStringArray); 81 if (keyCount != valueCount) { 82 return false; 83 } 84 DictionaryHeaderStructurePolicy::AttributeMap attributeMap = 85 JniDataUtils::constructAttributeMap(env, attributeKeyStringArray, 86 attributeValueStringArray); 87 DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy = 88 DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict( 89 formatVersion, localeCodePoints, &attributeMap); 90 if (!dictionaryStructureWithBufferPolicy) { 91 return 0; 92 } 93 Dictionary *const dictionary = 94 new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy)); 95 return reinterpret_cast<jlong>(dictionary); 96 } 97 98 static bool latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict, 99 jstring filePath) { 100 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 101 if (!dictionary) return false; 102 const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); 103 char filePathChars[filePathUtf8Length + 1]; 104 env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); 105 filePathChars[filePathUtf8Length] = '\0'; 106 return dictionary->flush(filePathChars); 107 } 108 109 static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz, 110 jlong dict, jboolean mindsBlockByGC) { 111 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 112 if (!dictionary) return false; 113 return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE); 114 } 115 116 static bool latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict, 117 jstring filePath) { 118 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 119 if (!dictionary) return false; 120 const jsize filePathUtf8Length = env->GetStringUTFLength(filePath); 121 char filePathChars[filePathUtf8Length + 1]; 122 env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars); 123 filePathChars[filePathUtf8Length] = '\0'; 124 return dictionary->flushWithGC(filePathChars); 125 } 126 127 static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) { 128 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 129 if (!dictionary) return; 130 delete dictionary; 131 } 132 133 static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict, 134 jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys, 135 jobject outAttributeValues) { 136 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 137 if (!dictionary) return; 138 const DictionaryHeaderStructurePolicy *const headerPolicy = 139 dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 140 JniDataUtils::putIntToArray(env, outHeaderSize, 0 /* index */, headerPolicy->getSize()); 141 JniDataUtils::putIntToArray(env, outFormatVersion, 0 /* index */, 142 headerPolicy->getFormatVersionNumber()); 143 // Output attribute map 144 jclass arrayListClass = env->FindClass("java/util/ArrayList"); 145 jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z"); 146 const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap = 147 headerPolicy->getAttributeMap(); 148 for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin(); 149 it != attributeMap->end(); ++it) { 150 // Output key 151 jintArray keyCodePointArray = env->NewIntArray(it->first.size()); 152 JniDataUtils::outputCodePoints(env, keyCodePointArray, 0 /* start */, 153 it->first.size(), it->first.data(), it->first.size(), 154 false /* needsNullTermination */); 155 env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray); 156 env->DeleteLocalRef(keyCodePointArray); 157 // Output value 158 jintArray valueCodePointArray = env->NewIntArray(it->second.size()); 159 JniDataUtils::outputCodePoints(env, valueCodePointArray, 0 /* start */, 160 it->second.size(), it->second.data(), it->second.size(), 161 false /* needsNullTermination */); 162 env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray); 163 env->DeleteLocalRef(valueCodePointArray); 164 } 165 env->DeleteLocalRef(arrayListClass); 166 return; 167 } 168 169 static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) { 170 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 171 if (!dictionary) return 0; 172 const DictionaryHeaderStructurePolicy *const headerPolicy = 173 dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 174 return headerPolicy->getFormatVersionNumber(); 175 } 176 177 static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict, 178 jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray, 179 jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray, 180 jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions, 181 jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, 182 jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray, 183 jintArray outSpaceIndicesArray, jintArray outTypesArray, 184 jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) { 185 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 186 // Assign 0 to outSuggestionCount here in case of returning earlier in this method. 187 JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0); 188 if (!dictionary) { 189 return; 190 } 191 ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo); 192 DicTraverseSession *traverseSession = 193 reinterpret_cast<DicTraverseSession *>(dicTraverseSession); 194 if (!traverseSession) { 195 return; 196 } 197 // Input values 198 int xCoordinates[inputSize]; 199 int yCoordinates[inputSize]; 200 int times[inputSize]; 201 int pointerIds[inputSize]; 202 const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray); 203 int inputCodePoints[inputCodePointsLength]; 204 env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates); 205 env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates); 206 env->GetIntArrayRegion(timesArray, 0, inputSize, times); 207 env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds); 208 env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints); 209 210 const jsize numberOfOptions = env->GetArrayLength(suggestOptions); 211 int options[numberOfOptions]; 212 env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options); 213 SuggestOptions givenSuggestOptions(options, numberOfOptions); 214 215 // Output values 216 /* By the way, let's check the output array length here to make sure */ 217 const jsize outputCodePointsLength = env->GetArrayLength(outCodePointsArray); 218 if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) { 219 AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength); 220 ASSERT(false); 221 return; 222 } 223 const jsize scoresLength = env->GetArrayLength(outScoresArray); 224 if (scoresLength != MAX_RESULTS) { 225 AKLOGE("Invalid scoresLength: %d", scoresLength); 226 ASSERT(false); 227 return; 228 } 229 const jsize outputAutoCommitFirstWordConfidenceLength = 230 env->GetArrayLength(outAutoCommitFirstWordConfidenceArray); 231 ASSERT(outputAutoCommitFirstWordConfidenceLength == 1); 232 if (outputAutoCommitFirstWordConfidenceLength != 1) { 233 // We only use the first result, as obviously we will only ever autocommit the first one 234 AKLOGE("Invalid outputAutoCommitFirstWordConfidenceLength: %d", 235 outputAutoCommitFirstWordConfidenceLength); 236 ASSERT(false); 237 return; 238 } 239 float languageWeight; 240 env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight); 241 SuggestionResults suggestionResults(MAX_RESULTS); 242 const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, 243 prevWordCodePointArrays, isBeginningOfSentenceArray); 244 if (givenSuggestOptions.isGesture() || inputSize > 0) { 245 // TODO: Use SuggestionResults to return suggestions. 246 dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates, 247 times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo, 248 &givenSuggestOptions, languageWeight, &suggestionResults); 249 } else { 250 dictionary->getPredictions(&prevWordsInfo, &suggestionResults); 251 } 252 suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray, 253 outScoresArray, outSpaceIndicesArray, outTypesArray, 254 outAutoCommitFirstWordConfidenceArray, inOutLanguageWeight); 255 } 256 257 static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict, 258 jintArray word) { 259 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 260 if (!dictionary) return NOT_A_PROBABILITY; 261 const jsize wordLength = env->GetArrayLength(word); 262 int codePoints[wordLength]; 263 env->GetIntArrayRegion(word, 0, wordLength, codePoints); 264 return dictionary->getProbability(codePoints, wordLength); 265 } 266 267 static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches( 268 JNIEnv *env, jclass clazz, jlong dict, jintArray word) { 269 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 270 if (!dictionary) return NOT_A_PROBABILITY; 271 const jsize wordLength = env->GetArrayLength(word); 272 int codePoints[wordLength]; 273 env->GetIntArrayRegion(word, 0, wordLength, codePoints); 274 return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength); 275 } 276 277 static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz, 278 jlong dict, jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, 279 jintArray word) { 280 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 281 if (!dictionary) return JNI_FALSE; 282 const jsize wordLength = env->GetArrayLength(word); 283 int wordCodePoints[wordLength]; 284 env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); 285 const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, 286 prevWordCodePointArrays, isBeginningOfSentenceArray); 287 return dictionary->getNgramProbability(&prevWordsInfo, wordCodePoints, wordLength); 288 } 289 290 // Method to iterate all words in the dictionary for makedict. 291 // If token is 0, this method newly starts iterating the dictionary. This method returns 0 when 292 // the dictionary does not have a next word. 293 static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz, 294 jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) { 295 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 296 if (!dictionary) return 0; 297 const jsize codePointBufSize = env->GetArrayLength(outCodePoints); 298 if (codePointBufSize != MAX_WORD_LENGTH) { 299 AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize); 300 ASSERT(false); 301 return 0; 302 } 303 int wordCodePoints[codePointBufSize]; 304 int wordCodePointCount = 0; 305 const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints, 306 &wordCodePointCount); 307 JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */, 308 MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount, 309 false /* needsNullTermination */); 310 bool isBeginningOfSentence = false; 311 if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { 312 isBeginningOfSentence = true; 313 } 314 JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */, 315 isBeginningOfSentence); 316 return nextToken; 317 } 318 319 static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz, 320 jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints, 321 jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, 322 jobject outBigramProbabilityInfo, jobject outShortcutTargets, 323 jobject outShortcutProbabilities) { 324 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 325 if (!dictionary) return; 326 const jsize wordLength = env->GetArrayLength(word); 327 if (wordLength > MAX_WORD_LENGTH) { 328 AKLOGE("Invalid wordLength: %d", wordLength); 329 return; 330 } 331 int wordCodePoints[MAX_WORD_LENGTH]; 332 env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); 333 int codePointCount = wordLength; 334 if (isBeginningOfSentence) { 335 codePointCount = CharUtils::attachBeginningOfSentenceMarker( 336 wordCodePoints, wordLength, MAX_WORD_LENGTH); 337 if (codePointCount < 0) { 338 AKLOGE("Cannot attach Beginning-of-Sentence marker."); 339 return; 340 } 341 } 342 const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount); 343 wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo, 344 outBigramTargets, outBigramProbabilityInfo, outShortcutTargets, 345 outShortcutProbabilities); 346 } 347 348 static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict, 349 jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability, 350 jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted, 351 jint timestamp) { 352 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 353 if (!dictionary) { 354 return false; 355 } 356 jsize codePointCount = env->GetArrayLength(word); 357 int codePoints[codePointCount]; 358 env->GetIntArrayRegion(word, 0, codePointCount, codePoints); 359 std::vector<UnigramProperty::ShortcutProperty> shortcuts; 360 std::vector<int> shortcutTargetCodePoints; 361 JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints); 362 if (!shortcutTargetCodePoints.empty()) { 363 shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); 364 } 365 // Use 1 for count to indicate the word has inputted. 366 const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord, 367 isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts); 368 return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty); 369 } 370 371 static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict, 372 jintArray word) { 373 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 374 if (!dictionary) { 375 return false; 376 } 377 jsize codePointCount = env->GetArrayLength(word); 378 int codePoints[codePointCount]; 379 env->GetIntArrayRegion(word, 0, codePointCount, codePoints); 380 return dictionary->removeUnigramEntry(codePoints, codePointCount); 381 } 382 383 static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict, 384 jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, 385 jintArray word, jint probability, jint timestamp) { 386 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 387 if (!dictionary) { 388 return false; 389 } 390 const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, 391 prevWordCodePointArrays, isBeginningOfSentenceArray); 392 jsize wordLength = env->GetArrayLength(word); 393 int wordCodePoints[wordLength]; 394 env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); 395 const std::vector<int> bigramTargetCodePoints( 396 wordCodePoints, wordCodePoints + wordLength); 397 // Use 1 for count to indicate the bigram has inputted. 398 const BigramProperty bigramProperty(&bigramTargetCodePoints, probability, 399 timestamp, 0 /* level */, 1 /* count */); 400 return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); 401 } 402 403 static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict, 404 jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray, 405 jintArray word) { 406 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 407 if (!dictionary) { 408 return false; 409 } 410 const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env, 411 prevWordCodePointArrays, isBeginningOfSentenceArray); 412 jsize wordLength = env->GetArrayLength(word); 413 int wordCodePoints[wordLength]; 414 env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); 415 return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength); 416 } 417 418 // Returns how many language model params are processed. 419 static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz, 420 jlong dict, jobjectArray languageModelParams, jint startIndex) { 421 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 422 if (!dictionary) { 423 return 0; 424 } 425 jsize languageModelParamCount = env->GetArrayLength(languageModelParams); 426 if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) { 427 return 0; 428 } 429 jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0); 430 jclass languageModelParamClass = env->GetObjectClass(languageModelParam); 431 env->DeleteLocalRef(languageModelParam); 432 433 jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I"); 434 jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I"); 435 jfieldID unigramProbabilityFieldId = 436 env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I"); 437 jfieldID bigramProbabilityFieldId = 438 env->GetFieldID(languageModelParamClass, "mBigramProbability", "I"); 439 jfieldID timestampFieldId = 440 env->GetFieldID(languageModelParamClass, "mTimestamp", "I"); 441 jfieldID shortcutTargetFieldId = 442 env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I"); 443 jfieldID shortcutProbabilityFieldId = 444 env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I"); 445 jfieldID isNotAWordFieldId = 446 env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z"); 447 jfieldID isBlacklistedFieldId = 448 env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z"); 449 env->DeleteLocalRef(languageModelParamClass); 450 451 for (int i = startIndex; i < languageModelParamCount; ++i) { 452 jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i); 453 // languageModelParam is a set of params for word1; thus, word1 cannot be null. On the 454 // other hand, word0 can be null and then it means the set of params doesn't contain bigram 455 // information. 456 jintArray word0 = static_cast<jintArray>( 457 env->GetObjectField(languageModelParam, word0FieldId)); 458 jsize word0Length = word0 ? env->GetArrayLength(word0) : 0; 459 int word0CodePoints[word0Length]; 460 if (word0) { 461 env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints); 462 } 463 jintArray word1 = static_cast<jintArray>( 464 env->GetObjectField(languageModelParam, word1FieldId)); 465 jsize word1Length = env->GetArrayLength(word1); 466 int word1CodePoints[word1Length]; 467 env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); 468 jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId); 469 jint timestamp = env->GetIntField(languageModelParam, timestampFieldId); 470 jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId); 471 jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId); 472 jintArray shortcutTarget = static_cast<jintArray>( 473 env->GetObjectField(languageModelParam, shortcutTargetFieldId)); 474 std::vector<UnigramProperty::ShortcutProperty> shortcuts; 475 std::vector<int> shortcutTargetCodePoints; 476 JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints); 477 if (!shortcutTargetCodePoints.empty()) { 478 jint shortcutProbability = 479 env->GetIntField(languageModelParam, shortcutProbabilityFieldId); 480 shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability); 481 } 482 // Use 1 for count to indicate the word has inputted. 483 const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord, 484 isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */, 485 &shortcuts); 486 dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty); 487 if (word0) { 488 jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); 489 const std::vector<int> bigramTargetCodePoints( 490 word1CodePoints, word1CodePoints + word1Length); 491 // Use 1 for count to indicate the bigram has inputted. 492 const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability, 493 timestamp, 0 /* level */, 1 /* count */); 494 const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length, 495 false /* isBeginningOfSentence */); 496 dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty); 497 } 498 if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { 499 return i + 1; 500 } 501 env->DeleteLocalRef(word0); 502 env->DeleteLocalRef(word1); 503 env->DeleteLocalRef(shortcutTarget); 504 env->DeleteLocalRef(languageModelParam); 505 } 506 return languageModelParamCount; 507 } 508 509 static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict, 510 jstring query) { 511 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 512 if (!dictionary) { 513 return env->NewStringUTF(""); 514 } 515 const jsize queryUtf8Length = env->GetStringUTFLength(query); 516 char queryChars[queryUtf8Length + 1]; 517 env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars); 518 queryChars[queryUtf8Length] = '\0'; 519 static const int GET_PROPERTY_RESULT_LENGTH = 100; 520 char resultChars[GET_PROPERTY_RESULT_LENGTH]; 521 resultChars[0] = '\0'; 522 dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH); 523 return env->NewStringUTF(resultChars); 524 } 525 526 static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) { 527 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 528 if (!dictionary) { 529 return false; 530 } 531 return dictionary->getDictionaryStructurePolicy()->isCorrupted(); 532 } 533 534 static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy( 535 DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy, 536 const char *const dictFilePath) { 537 structurePolicy->flushWithGC(dictFilePath); 538 structurePolicy.release(); 539 return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile( 540 dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */); 541 } 542 543 static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict, 544 jstring dictFilePath, jlong newFormatVersion) { 545 Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict); 546 if (!dictionary) { 547 return false; 548 } 549 const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath); 550 char dictFilePathChars[filePathUtf8Length + 1]; 551 env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars); 552 dictFilePathChars[filePathUtf8Length] = '\0'; 553 554 const DictionaryHeaderStructurePolicy *const headerPolicy = 555 dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy(); 556 DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy = 557 DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict( 558 newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap()); 559 if (!dictionaryStructureWithBufferPolicy) { 560 LogUtils::logToJava(env, "Cannot migrate header."); 561 return false; 562 } 563 564 int wordCodePoints[MAX_WORD_LENGTH]; 565 int wordCodePointCount = 0; 566 int token = 0; 567 // Add unigrams. 568 do { 569 token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); 570 const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, 571 wordCodePointCount); 572 if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) { 573 // Skip beginning-of-sentence unigram. 574 continue; 575 } 576 if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { 577 dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( 578 std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); 579 if (!dictionaryStructureWithBufferPolicy) { 580 LogUtils::logToJava(env, "Cannot open dict after GC."); 581 return false; 582 } 583 } 584 if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints, 585 wordCodePointCount, wordProperty.getUnigramProperty())) { 586 LogUtils::logToJava(env, "Cannot add unigram to the new dict."); 587 return false; 588 } 589 } while (token != 0); 590 591 // Add bigrams. 592 do { 593 token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount); 594 const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, 595 wordCodePointCount); 596 if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) { 597 dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy( 598 std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars); 599 if (!dictionaryStructureWithBufferPolicy) { 600 LogUtils::logToJava(env, "Cannot open dict after GC."); 601 return false; 602 } 603 } 604 const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount, 605 wordProperty.getUnigramProperty()->representsBeginningOfSentence()); 606 for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) { 607 if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo, 608 &bigramProperty)) { 609 LogUtils::logToJava(env, "Cannot add bigram to the new dict."); 610 return false; 611 } 612 } 613 } while (token != 0); 614 // Save to File. 615 dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars); 616 return true; 617 } 618 619 static const JNINativeMethod sMethods[] = { 620 { 621 const_cast<char *>("openNative"), 622 const_cast<char *>("(Ljava/lang/String;JJZ)J"), 623 reinterpret_cast<void *>(latinime_BinaryDictionary_open) 624 }, 625 { 626 const_cast<char *>("createOnMemoryNative"), 627 const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)J"), 628 reinterpret_cast<void *>(latinime_BinaryDictionary_createOnMemory) 629 }, 630 { 631 const_cast<char *>("closeNative"), 632 const_cast<char *>("(J)V"), 633 reinterpret_cast<void *>(latinime_BinaryDictionary_close) 634 }, 635 { 636 const_cast<char *>("getFormatVersionNative"), 637 const_cast<char *>("(J)I"), 638 reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion) 639 }, 640 { 641 const_cast<char *>("getHeaderInfoNative"), 642 const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"), 643 reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo) 644 }, 645 { 646 const_cast<char *>("flushNative"), 647 const_cast<char *>("(JLjava/lang/String;)Z"), 648 reinterpret_cast<void *>(latinime_BinaryDictionary_flush) 649 }, 650 { 651 const_cast<char *>("needsToRunGCNative"), 652 const_cast<char *>("(JZ)Z"), 653 reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC) 654 }, 655 { 656 const_cast<char *>("flushWithGCNative"), 657 const_cast<char *>("(JLjava/lang/String;)Z"), 658 reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC) 659 }, 660 { 661 const_cast<char *>("getSuggestionsNative"), 662 const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[Z[I[I[I[I[I[I[F)V"), 663 reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions) 664 }, 665 { 666 const_cast<char *>("getProbabilityNative"), 667 const_cast<char *>("(J[I)I"), 668 reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability) 669 }, 670 { 671 const_cast<char *>("getMaxProbabilityOfExactMatchesNative"), 672 const_cast<char *>("(J[I)I"), 673 reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches) 674 }, 675 { 676 const_cast<char *>("getNgramProbabilityNative"), 677 const_cast<char *>("(J[[I[Z[I)I"), 678 reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability) 679 }, 680 { 681 const_cast<char *>("getWordPropertyNative"), 682 const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;" 683 "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"), 684 reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty) 685 }, 686 { 687 const_cast<char *>("getNextWordNative"), 688 const_cast<char *>("(JI[I[Z)I"), 689 reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord) 690 }, 691 { 692 const_cast<char *>("addUnigramEntryNative"), 693 const_cast<char *>("(J[II[IIZZZI)Z"), 694 reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramEntry) 695 }, 696 { 697 const_cast<char *>("removeUnigramEntryNative"), 698 const_cast<char *>("(J[I)Z"), 699 reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramEntry) 700 }, 701 { 702 const_cast<char *>("addNgramEntryNative"), 703 const_cast<char *>("(J[[I[Z[III)Z"), 704 reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry) 705 }, 706 { 707 const_cast<char *>("removeNgramEntryNative"), 708 const_cast<char *>("(J[[I[Z[I)Z"), 709 reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry) 710 }, 711 { 712 const_cast<char *>("addMultipleDictionaryEntriesNative"), 713 const_cast<char *>( 714 "(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"), 715 reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries) 716 }, 717 { 718 const_cast<char *>("getPropertyNative"), 719 const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"), 720 reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty) 721 }, 722 { 723 const_cast<char *>("isCorruptedNative"), 724 const_cast<char *>("(J)Z"), 725 reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative) 726 }, 727 { 728 const_cast<char *>("migrateNative"), 729 const_cast<char *>("(JLjava/lang/String;J)Z"), 730 reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative) 731 } 732 }; 733 734 int register_BinaryDictionary(JNIEnv *env) { 735 const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary"; 736 return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods)); 737 } 738 } // namespace latinime 739