Home | History | Annotate | Download | only in jni
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "LatinIME: jni: BinaryDictionary"
     18 
     19 #include "com_android_inputmethod_latin_BinaryDictionary.h"
     20 
     21 #include <cstring> // for memset()
     22 #include <vector>
     23 
     24 #include "defines.h"
     25 #include "jni.h"
     26 #include "jni_common.h"
     27 #include "suggest/core/dictionary/dictionary.h"
     28 #include "suggest/core/dictionary/property/unigram_property.h"
     29 #include "suggest/core/dictionary/property/word_property.h"
     30 #include "suggest/core/result/suggestion_results.h"
     31 #include "suggest/core/session/prev_words_info.h"
     32 #include "suggest/core/suggest_options.h"
     33 #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
     34 #include "utils/char_utils.h"
     35 #include "utils/jni_data_utils.h"
     36 #include "utils/log_utils.h"
     37 #include "utils/time_keeper.h"
     38 
     39 namespace latinime {
     40 
     41 class ProximityInfo;
     42 
     43 static jlong latinime_BinaryDictionary_open(JNIEnv *env, jclass clazz, jstring sourceDir,
     44         jlong dictOffset, jlong dictSize, jboolean isUpdatable) {
     45     PROF_OPEN;
     46     PROF_START(66);
     47     const jsize sourceDirUtf8Length = env->GetStringUTFLength(sourceDir);
     48     if (sourceDirUtf8Length <= 0) {
     49         AKLOGE("DICT: Can't get sourceDir string");
     50         return 0;
     51     }
     52     char sourceDirChars[sourceDirUtf8Length + 1];
     53     env->GetStringUTFRegion(sourceDir, 0, env->GetStringLength(sourceDir), sourceDirChars);
     54     sourceDirChars[sourceDirUtf8Length] = '\0';
     55     DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy(
     56             DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
     57                     sourceDirChars, static_cast<int>(dictOffset), static_cast<int>(dictSize),
     58                     isUpdatable == JNI_TRUE));
     59     if (!dictionaryStructureWithBufferPolicy) {
     60         return 0;
     61     }
     62 
     63     Dictionary *const dictionary =
     64             new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
     65     PROF_END(66);
     66     PROF_CLOSE;
     67     return reinterpret_cast<jlong>(dictionary);
     68 }
     69 
     70 static jlong latinime_BinaryDictionary_createOnMemory(JNIEnv *env, jclass clazz,
     71         jlong formatVersion, jstring locale, jobjectArray attributeKeyStringArray,
     72         jobjectArray attributeValueStringArray) {
     73     const jsize localeUtf8Length = env->GetStringUTFLength(locale);
     74     char localeChars[localeUtf8Length + 1];
     75     env->GetStringUTFRegion(locale, 0, env->GetStringLength(locale), localeChars);
     76     localeChars[localeUtf8Length] = '\0';
     77     std::vector<int> localeCodePoints;
     78     HeaderReadWriteUtils::insertCharactersIntoVector(localeChars, &localeCodePoints);
     79     const int keyCount = env->GetArrayLength(attributeKeyStringArray);
     80     const int valueCount = env->GetArrayLength(attributeValueStringArray);
     81     if (keyCount != valueCount) {
     82         return false;
     83     }
     84     DictionaryHeaderStructurePolicy::AttributeMap attributeMap =
     85             JniDataUtils::constructAttributeMap(env, attributeKeyStringArray,
     86                     attributeValueStringArray);
     87     DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
     88             DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
     89                     formatVersion, localeCodePoints, &attributeMap);
     90     if (!dictionaryStructureWithBufferPolicy) {
     91         return 0;
     92     }
     93     Dictionary *const dictionary =
     94             new Dictionary(env, std::move(dictionaryStructureWithBufferPolicy));
     95     return reinterpret_cast<jlong>(dictionary);
     96 }
     97 
     98 static bool latinime_BinaryDictionary_flush(JNIEnv *env, jclass clazz, jlong dict,
     99         jstring filePath) {
    100     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    101     if (!dictionary) return false;
    102     const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
    103     char filePathChars[filePathUtf8Length + 1];
    104     env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
    105     filePathChars[filePathUtf8Length] = '\0';
    106     return dictionary->flush(filePathChars);
    107 }
    108 
    109 static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
    110         jlong dict, jboolean mindsBlockByGC) {
    111     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    112     if (!dictionary) return false;
    113     return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
    114 }
    115 
    116 static bool latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
    117         jstring filePath) {
    118     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    119     if (!dictionary) return false;
    120     const jsize filePathUtf8Length = env->GetStringUTFLength(filePath);
    121     char filePathChars[filePathUtf8Length + 1];
    122     env->GetStringUTFRegion(filePath, 0, env->GetStringLength(filePath), filePathChars);
    123     filePathChars[filePathUtf8Length] = '\0';
    124     return dictionary->flushWithGC(filePathChars);
    125 }
    126 
    127 static void latinime_BinaryDictionary_close(JNIEnv *env, jclass clazz, jlong dict) {
    128     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    129     if (!dictionary) return;
    130     delete dictionary;
    131 }
    132 
    133 static void latinime_BinaryDictionary_getHeaderInfo(JNIEnv *env, jclass clazz, jlong dict,
    134         jintArray outHeaderSize, jintArray outFormatVersion, jobject outAttributeKeys,
    135         jobject outAttributeValues) {
    136     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    137     if (!dictionary) return;
    138     const DictionaryHeaderStructurePolicy *const headerPolicy =
    139             dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
    140     JniDataUtils::putIntToArray(env, outHeaderSize, 0 /* index */, headerPolicy->getSize());
    141     JniDataUtils::putIntToArray(env, outFormatVersion, 0 /* index */,
    142             headerPolicy->getFormatVersionNumber());
    143     // Output attribute map
    144     jclass arrayListClass = env->FindClass("java/util/ArrayList");
    145     jmethodID addMethodId = env->GetMethodID(arrayListClass, "add", "(Ljava/lang/Object;)Z");
    146     const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap =
    147             headerPolicy->getAttributeMap();
    148     for (DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = attributeMap->begin();
    149             it != attributeMap->end(); ++it) {
    150         // Output key
    151         jintArray keyCodePointArray = env->NewIntArray(it->first.size());
    152         JniDataUtils::outputCodePoints(env, keyCodePointArray, 0 /* start */,
    153                 it->first.size(), it->first.data(), it->first.size(),
    154                 false /* needsNullTermination */);
    155         env->CallBooleanMethod(outAttributeKeys, addMethodId, keyCodePointArray);
    156         env->DeleteLocalRef(keyCodePointArray);
    157         // Output value
    158         jintArray valueCodePointArray = env->NewIntArray(it->second.size());
    159         JniDataUtils::outputCodePoints(env, valueCodePointArray, 0 /* start */,
    160                 it->second.size(), it->second.data(), it->second.size(),
    161                 false /* needsNullTermination */);
    162         env->CallBooleanMethod(outAttributeValues, addMethodId, valueCodePointArray);
    163         env->DeleteLocalRef(valueCodePointArray);
    164     }
    165     env->DeleteLocalRef(arrayListClass);
    166     return;
    167 }
    168 
    169 static int latinime_BinaryDictionary_getFormatVersion(JNIEnv *env, jclass clazz, jlong dict) {
    170     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    171     if (!dictionary) return 0;
    172     const DictionaryHeaderStructurePolicy *const headerPolicy =
    173             dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
    174     return headerPolicy->getFormatVersionNumber();
    175 }
    176 
    177 static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz, jlong dict,
    178         jlong proximityInfo, jlong dicTraverseSession, jintArray xCoordinatesArray,
    179         jintArray yCoordinatesArray, jintArray timesArray, jintArray pointerIdsArray,
    180         jintArray inputCodePointsArray, jint inputSize, jintArray suggestOptions,
    181         jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
    182         jintArray outSuggestionCount, jintArray outCodePointsArray, jintArray outScoresArray,
    183         jintArray outSpaceIndicesArray, jintArray outTypesArray,
    184         jintArray outAutoCommitFirstWordConfidenceArray, jfloatArray inOutLanguageWeight) {
    185     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    186     // Assign 0 to outSuggestionCount here in case of returning earlier in this method.
    187     JniDataUtils::putIntToArray(env, outSuggestionCount, 0 /* index */, 0);
    188     if (!dictionary) {
    189         return;
    190     }
    191     ProximityInfo *pInfo = reinterpret_cast<ProximityInfo *>(proximityInfo);
    192     DicTraverseSession *traverseSession =
    193             reinterpret_cast<DicTraverseSession *>(dicTraverseSession);
    194     if (!traverseSession) {
    195         return;
    196     }
    197     // Input values
    198     int xCoordinates[inputSize];
    199     int yCoordinates[inputSize];
    200     int times[inputSize];
    201     int pointerIds[inputSize];
    202     const jsize inputCodePointsLength = env->GetArrayLength(inputCodePointsArray);
    203     int inputCodePoints[inputCodePointsLength];
    204     env->GetIntArrayRegion(xCoordinatesArray, 0, inputSize, xCoordinates);
    205     env->GetIntArrayRegion(yCoordinatesArray, 0, inputSize, yCoordinates);
    206     env->GetIntArrayRegion(timesArray, 0, inputSize, times);
    207     env->GetIntArrayRegion(pointerIdsArray, 0, inputSize, pointerIds);
    208     env->GetIntArrayRegion(inputCodePointsArray, 0, inputCodePointsLength, inputCodePoints);
    209 
    210     const jsize numberOfOptions = env->GetArrayLength(suggestOptions);
    211     int options[numberOfOptions];
    212     env->GetIntArrayRegion(suggestOptions, 0, numberOfOptions, options);
    213     SuggestOptions givenSuggestOptions(options, numberOfOptions);
    214 
    215     // Output values
    216     /* By the way, let's check the output array length here to make sure */
    217     const jsize outputCodePointsLength = env->GetArrayLength(outCodePointsArray);
    218     if (outputCodePointsLength != (MAX_WORD_LENGTH * MAX_RESULTS)) {
    219         AKLOGE("Invalid outputCodePointsLength: %d", outputCodePointsLength);
    220         ASSERT(false);
    221         return;
    222     }
    223     const jsize scoresLength = env->GetArrayLength(outScoresArray);
    224     if (scoresLength != MAX_RESULTS) {
    225         AKLOGE("Invalid scoresLength: %d", scoresLength);
    226         ASSERT(false);
    227         return;
    228     }
    229     const jsize outputAutoCommitFirstWordConfidenceLength =
    230             env->GetArrayLength(outAutoCommitFirstWordConfidenceArray);
    231     ASSERT(outputAutoCommitFirstWordConfidenceLength == 1);
    232     if (outputAutoCommitFirstWordConfidenceLength != 1) {
    233         // We only use the first result, as obviously we will only ever autocommit the first one
    234         AKLOGE("Invalid outputAutoCommitFirstWordConfidenceLength: %d",
    235                 outputAutoCommitFirstWordConfidenceLength);
    236         ASSERT(false);
    237         return;
    238     }
    239     float languageWeight;
    240     env->GetFloatArrayRegion(inOutLanguageWeight, 0, 1 /* len */, &languageWeight);
    241     SuggestionResults suggestionResults(MAX_RESULTS);
    242     const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
    243             prevWordCodePointArrays, isBeginningOfSentenceArray);
    244     if (givenSuggestOptions.isGesture() || inputSize > 0) {
    245         // TODO: Use SuggestionResults to return suggestions.
    246         dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
    247                 times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo,
    248                 &givenSuggestOptions, languageWeight, &suggestionResults);
    249     } else {
    250         dictionary->getPredictions(&prevWordsInfo, &suggestionResults);
    251     }
    252     suggestionResults.outputSuggestions(env, outSuggestionCount, outCodePointsArray,
    253             outScoresArray, outSpaceIndicesArray, outTypesArray,
    254             outAutoCommitFirstWordConfidenceArray, inOutLanguageWeight);
    255 }
    256 
    257 static jint latinime_BinaryDictionary_getProbability(JNIEnv *env, jclass clazz, jlong dict,
    258         jintArray word) {
    259     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    260     if (!dictionary) return NOT_A_PROBABILITY;
    261     const jsize wordLength = env->GetArrayLength(word);
    262     int codePoints[wordLength];
    263     env->GetIntArrayRegion(word, 0, wordLength, codePoints);
    264     return dictionary->getProbability(codePoints, wordLength);
    265 }
    266 
    267 static jint latinime_BinaryDictionary_getMaxProbabilityOfExactMatches(
    268         JNIEnv *env, jclass clazz, jlong dict, jintArray word) {
    269     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    270     if (!dictionary) return NOT_A_PROBABILITY;
    271     const jsize wordLength = env->GetArrayLength(word);
    272     int codePoints[wordLength];
    273     env->GetIntArrayRegion(word, 0, wordLength, codePoints);
    274     return dictionary->getMaxProbabilityOfExactMatches(codePoints, wordLength);
    275 }
    276 
    277 static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass clazz,
    278         jlong dict, jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
    279         jintArray word) {
    280     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    281     if (!dictionary) return JNI_FALSE;
    282     const jsize wordLength = env->GetArrayLength(word);
    283     int wordCodePoints[wordLength];
    284     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
    285     const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
    286             prevWordCodePointArrays, isBeginningOfSentenceArray);
    287     return dictionary->getNgramProbability(&prevWordsInfo, wordCodePoints, wordLength);
    288 }
    289 
    290 // Method to iterate all words in the dictionary for makedict.
    291 // If token is 0, this method newly starts iterating the dictionary. This method returns 0 when
    292 // the dictionary does not have a next word.
    293 static jint latinime_BinaryDictionary_getNextWord(JNIEnv *env, jclass clazz,
    294         jlong dict, jint token, jintArray outCodePoints, jbooleanArray outIsBeginningOfSentence) {
    295     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    296     if (!dictionary) return 0;
    297     const jsize codePointBufSize = env->GetArrayLength(outCodePoints);
    298     if (codePointBufSize != MAX_WORD_LENGTH) {
    299         AKLOGE("Invalid outCodePointsLength: %d", codePointBufSize);
    300         ASSERT(false);
    301         return 0;
    302     }
    303     int wordCodePoints[codePointBufSize];
    304     int wordCodePointCount = 0;
    305     const int nextToken = dictionary->getNextWordAndNextToken(token, wordCodePoints,
    306             &wordCodePointCount);
    307     JniDataUtils::outputCodePoints(env, outCodePoints, 0 /* start */,
    308             MAX_WORD_LENGTH /* maxLength */, wordCodePoints, wordCodePointCount,
    309             false /* needsNullTermination */);
    310     bool isBeginningOfSentence = false;
    311     if (wordCodePointCount > 0 && wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
    312         isBeginningOfSentence = true;
    313     }
    314     JniDataUtils::putBooleanToArray(env, outIsBeginningOfSentence, 0 /* index */,
    315             isBeginningOfSentence);
    316     return nextToken;
    317 }
    318 
    319 static void latinime_BinaryDictionary_getWordProperty(JNIEnv *env, jclass clazz,
    320         jlong dict, jintArray word, jboolean isBeginningOfSentence, jintArray outCodePoints,
    321         jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets,
    322         jobject outBigramProbabilityInfo, jobject outShortcutTargets,
    323         jobject outShortcutProbabilities) {
    324     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    325     if (!dictionary) return;
    326     const jsize wordLength = env->GetArrayLength(word);
    327     if (wordLength > MAX_WORD_LENGTH) {
    328         AKLOGE("Invalid wordLength: %d", wordLength);
    329         return;
    330     }
    331     int wordCodePoints[MAX_WORD_LENGTH];
    332     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
    333     int codePointCount = wordLength;
    334     if (isBeginningOfSentence) {
    335         codePointCount = CharUtils::attachBeginningOfSentenceMarker(
    336                 wordCodePoints, wordLength, MAX_WORD_LENGTH);
    337         if (codePointCount < 0) {
    338             AKLOGE("Cannot attach Beginning-of-Sentence marker.");
    339             return;
    340         }
    341     }
    342     const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints, codePointCount);
    343     wordProperty.outputProperties(env, outCodePoints, outFlags, outProbabilityInfo,
    344             outBigramTargets, outBigramProbabilityInfo, outShortcutTargets,
    345             outShortcutProbabilities);
    346 }
    347 
    348 static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
    349         jintArray word, jint probability, jintArray shortcutTarget, jint shortcutProbability,
    350         jboolean isBeginningOfSentence, jboolean isNotAWord, jboolean isBlacklisted,
    351         jint timestamp) {
    352     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    353     if (!dictionary) {
    354         return false;
    355     }
    356     jsize codePointCount = env->GetArrayLength(word);
    357     int codePoints[codePointCount];
    358     env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
    359     std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    360     std::vector<int> shortcutTargetCodePoints;
    361     JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
    362     if (!shortcutTargetCodePoints.empty()) {
    363         shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
    364     }
    365     // Use 1 for count to indicate the word has inputted.
    366     const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
    367             isBlacklisted, probability, timestamp, 0 /* level */, 1 /* count */, &shortcuts);
    368     return dictionary->addUnigramEntry(codePoints, codePointCount, &unigramProperty);
    369 }
    370 
    371 static bool latinime_BinaryDictionary_removeUnigramEntry(JNIEnv *env, jclass clazz, jlong dict,
    372         jintArray word) {
    373     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    374     if (!dictionary) {
    375         return false;
    376     }
    377     jsize codePointCount = env->GetArrayLength(word);
    378     int codePoints[codePointCount];
    379     env->GetIntArrayRegion(word, 0, codePointCount, codePoints);
    380     return dictionary->removeUnigramEntry(codePoints, codePointCount);
    381 }
    382 
    383 static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
    384         jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
    385         jintArray word, jint probability, jint timestamp) {
    386     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    387     if (!dictionary) {
    388         return false;
    389     }
    390     const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
    391             prevWordCodePointArrays, isBeginningOfSentenceArray);
    392     jsize wordLength = env->GetArrayLength(word);
    393     int wordCodePoints[wordLength];
    394     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
    395     const std::vector<int> bigramTargetCodePoints(
    396             wordCodePoints, wordCodePoints + wordLength);
    397     // Use 1 for count to indicate the bigram has inputted.
    398     const BigramProperty bigramProperty(&bigramTargetCodePoints, probability,
    399             timestamp, 0 /* level */, 1 /* count */);
    400     return dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
    401 }
    402 
    403 static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
    404         jobjectArray prevWordCodePointArrays, jbooleanArray isBeginningOfSentenceArray,
    405         jintArray word) {
    406     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    407     if (!dictionary) {
    408         return false;
    409     }
    410     const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
    411             prevWordCodePointArrays, isBeginningOfSentenceArray);
    412     jsize wordLength = env->GetArrayLength(word);
    413     int wordCodePoints[wordLength];
    414     env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
    415     return dictionary->removeNgramEntry(&prevWordsInfo, wordCodePoints, wordLength);
    416 }
    417 
    418 // Returns how many language model params are processed.
    419 static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, jclass clazz,
    420         jlong dict, jobjectArray languageModelParams, jint startIndex) {
    421     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    422     if (!dictionary) {
    423         return 0;
    424     }
    425     jsize languageModelParamCount = env->GetArrayLength(languageModelParams);
    426     if (languageModelParamCount == 0 || startIndex >= languageModelParamCount) {
    427         return 0;
    428     }
    429     jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, 0);
    430     jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
    431     env->DeleteLocalRef(languageModelParam);
    432 
    433     jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
    434     jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
    435     jfieldID unigramProbabilityFieldId =
    436             env->GetFieldID(languageModelParamClass, "mUnigramProbability", "I");
    437     jfieldID bigramProbabilityFieldId =
    438             env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
    439     jfieldID timestampFieldId =
    440             env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
    441     jfieldID shortcutTargetFieldId =
    442             env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
    443     jfieldID shortcutProbabilityFieldId =
    444             env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
    445     jfieldID isNotAWordFieldId =
    446             env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
    447     jfieldID isBlacklistedFieldId =
    448             env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
    449     env->DeleteLocalRef(languageModelParamClass);
    450 
    451     for (int i = startIndex; i < languageModelParamCount; ++i) {
    452         jobject languageModelParam = env->GetObjectArrayElement(languageModelParams, i);
    453         // languageModelParam is a set of params for word1; thus, word1 cannot be null. On the
    454         // other hand, word0 can be null and then it means the set of params doesn't contain bigram
    455         // information.
    456         jintArray word0 = static_cast<jintArray>(
    457                 env->GetObjectField(languageModelParam, word0FieldId));
    458         jsize word0Length = word0 ? env->GetArrayLength(word0) : 0;
    459         int word0CodePoints[word0Length];
    460         if (word0) {
    461             env->GetIntArrayRegion(word0, 0, word0Length, word0CodePoints);
    462         }
    463         jintArray word1 = static_cast<jintArray>(
    464                 env->GetObjectField(languageModelParam, word1FieldId));
    465         jsize word1Length = env->GetArrayLength(word1);
    466         int word1CodePoints[word1Length];
    467         env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
    468         jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
    469         jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
    470         jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
    471         jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
    472         jintArray shortcutTarget = static_cast<jintArray>(
    473                 env->GetObjectField(languageModelParam, shortcutTargetFieldId));
    474         std::vector<UnigramProperty::ShortcutProperty> shortcuts;
    475         std::vector<int> shortcutTargetCodePoints;
    476         JniDataUtils::jintarrayToVector(env, shortcutTarget, &shortcutTargetCodePoints);
    477         if (!shortcutTargetCodePoints.empty()) {
    478             jint shortcutProbability =
    479                     env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
    480             shortcuts.emplace_back(&shortcutTargetCodePoints, shortcutProbability);
    481         }
    482         // Use 1 for count to indicate the word has inputted.
    483         const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
    484                 isBlacklisted, unigramProbability, timestamp, 0 /* level */, 1 /* count */,
    485                 &shortcuts);
    486         dictionary->addUnigramEntry(word1CodePoints, word1Length, &unigramProperty);
    487         if (word0) {
    488             jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
    489             const std::vector<int> bigramTargetCodePoints(
    490                     word1CodePoints, word1CodePoints + word1Length);
    491             // Use 1 for count to indicate the bigram has inputted.
    492             const BigramProperty bigramProperty(&bigramTargetCodePoints, bigramProbability,
    493                     timestamp, 0 /* level */, 1 /* count */);
    494             const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
    495                     false /* isBeginningOfSentence */);
    496             dictionary->addNgramEntry(&prevWordsInfo, &bigramProperty);
    497         }
    498         if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
    499             return i + 1;
    500         }
    501         env->DeleteLocalRef(word0);
    502         env->DeleteLocalRef(word1);
    503         env->DeleteLocalRef(shortcutTarget);
    504         env->DeleteLocalRef(languageModelParam);
    505     }
    506     return languageModelParamCount;
    507 }
    508 
    509 static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
    510         jstring query) {
    511     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    512     if (!dictionary) {
    513         return env->NewStringUTF("");
    514     }
    515     const jsize queryUtf8Length = env->GetStringUTFLength(query);
    516     char queryChars[queryUtf8Length + 1];
    517     env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars);
    518     queryChars[queryUtf8Length] = '\0';
    519     static const int GET_PROPERTY_RESULT_LENGTH = 100;
    520     char resultChars[GET_PROPERTY_RESULT_LENGTH];
    521     resultChars[0] = '\0';
    522     dictionary->getProperty(queryChars, queryUtf8Length, resultChars, GET_PROPERTY_RESULT_LENGTH);
    523     return env->NewStringUTF(resultChars);
    524 }
    525 
    526 static bool latinime_BinaryDictionary_isCorruptedNative(JNIEnv *env, jclass clazz, jlong dict) {
    527     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    528     if (!dictionary) {
    529         return false;
    530     }
    531     return dictionary->getDictionaryStructurePolicy()->isCorrupted();
    532 }
    533 
    534 static DictionaryStructureWithBufferPolicy::StructurePolicyPtr runGCAndGetNewStructurePolicy(
    535         DictionaryStructureWithBufferPolicy::StructurePolicyPtr structurePolicy,
    536         const char *const dictFilePath) {
    537     structurePolicy->flushWithGC(dictFilePath);
    538     structurePolicy.release();
    539     return DictionaryStructureWithBufferPolicyFactory::newPolicyForExistingDictFile(
    540             dictFilePath, 0 /* offset */, 0 /* size */, true /* isUpdatable */);
    541 }
    542 
    543 static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, jlong dict,
    544         jstring dictFilePath, jlong newFormatVersion) {
    545     Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
    546     if (!dictionary) {
    547         return false;
    548     }
    549     const jsize filePathUtf8Length = env->GetStringUTFLength(dictFilePath);
    550     char dictFilePathChars[filePathUtf8Length + 1];
    551     env->GetStringUTFRegion(dictFilePath, 0, env->GetStringLength(dictFilePath), dictFilePathChars);
    552     dictFilePathChars[filePathUtf8Length] = '\0';
    553 
    554     const DictionaryHeaderStructurePolicy *const headerPolicy =
    555             dictionary->getDictionaryStructurePolicy()->getHeaderStructurePolicy();
    556     DictionaryStructureWithBufferPolicy::StructurePolicyPtr dictionaryStructureWithBufferPolicy =
    557             DictionaryStructureWithBufferPolicyFactory::newPolicyForOnMemoryDict(
    558                     newFormatVersion, *headerPolicy->getLocale(), headerPolicy->getAttributeMap());
    559     if (!dictionaryStructureWithBufferPolicy) {
    560         LogUtils::logToJava(env, "Cannot migrate header.");
    561         return false;
    562     }
    563 
    564     int wordCodePoints[MAX_WORD_LENGTH];
    565     int wordCodePointCount = 0;
    566     int token = 0;
    567     // Add unigrams.
    568     do {
    569         token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
    570         const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
    571                 wordCodePointCount);
    572         if (wordCodePoints[0] == CODE_POINT_BEGINNING_OF_SENTENCE) {
    573             // Skip beginning-of-sentence unigram.
    574             continue;
    575         }
    576         if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
    577             dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
    578                     std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
    579             if (!dictionaryStructureWithBufferPolicy) {
    580                 LogUtils::logToJava(env, "Cannot open dict after GC.");
    581                 return false;
    582             }
    583         }
    584         if (!dictionaryStructureWithBufferPolicy->addUnigramEntry(wordCodePoints,
    585                 wordCodePointCount, wordProperty.getUnigramProperty())) {
    586             LogUtils::logToJava(env, "Cannot add unigram to the new dict.");
    587             return false;
    588         }
    589     } while (token != 0);
    590 
    591     // Add bigrams.
    592     do {
    593         token = dictionary->getNextWordAndNextToken(token, wordCodePoints, &wordCodePointCount);
    594         const WordProperty wordProperty = dictionary->getWordProperty(wordCodePoints,
    595                 wordCodePointCount);
    596         if (dictionaryStructureWithBufferPolicy->needsToRunGC(true /* mindsBlockByGC */)) {
    597             dictionaryStructureWithBufferPolicy = runGCAndGetNewStructurePolicy(
    598                     std::move(dictionaryStructureWithBufferPolicy), dictFilePathChars);
    599             if (!dictionaryStructureWithBufferPolicy) {
    600                 LogUtils::logToJava(env, "Cannot open dict after GC.");
    601                 return false;
    602             }
    603         }
    604         const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount,
    605                 wordProperty.getUnigramProperty()->representsBeginningOfSentence());
    606         for (const BigramProperty &bigramProperty : *wordProperty.getBigramProperties()) {
    607             if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
    608                     &bigramProperty)) {
    609                 LogUtils::logToJava(env, "Cannot add bigram to the new dict.");
    610                 return false;
    611             }
    612         }
    613     } while (token != 0);
    614     // Save to File.
    615     dictionaryStructureWithBufferPolicy->flushWithGC(dictFilePathChars);
    616     return true;
    617 }
    618 
    619 static const JNINativeMethod sMethods[] = {
    620     {
    621         const_cast<char *>("openNative"),
    622         const_cast<char *>("(Ljava/lang/String;JJZ)J"),
    623         reinterpret_cast<void *>(latinime_BinaryDictionary_open)
    624     },
    625     {
    626         const_cast<char *>("createOnMemoryNative"),
    627         const_cast<char *>("(JLjava/lang/String;[Ljava/lang/String;[Ljava/lang/String;)J"),
    628         reinterpret_cast<void *>(latinime_BinaryDictionary_createOnMemory)
    629     },
    630     {
    631         const_cast<char *>("closeNative"),
    632         const_cast<char *>("(J)V"),
    633         reinterpret_cast<void *>(latinime_BinaryDictionary_close)
    634     },
    635     {
    636         const_cast<char *>("getFormatVersionNative"),
    637         const_cast<char *>("(J)I"),
    638         reinterpret_cast<void *>(latinime_BinaryDictionary_getFormatVersion)
    639     },
    640     {
    641         const_cast<char *>("getHeaderInfoNative"),
    642         const_cast<char *>("(J[I[ILjava/util/ArrayList;Ljava/util/ArrayList;)V"),
    643         reinterpret_cast<void *>(latinime_BinaryDictionary_getHeaderInfo)
    644     },
    645     {
    646         const_cast<char *>("flushNative"),
    647         const_cast<char *>("(JLjava/lang/String;)Z"),
    648         reinterpret_cast<void *>(latinime_BinaryDictionary_flush)
    649     },
    650     {
    651         const_cast<char *>("needsToRunGCNative"),
    652         const_cast<char *>("(JZ)Z"),
    653         reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
    654     },
    655     {
    656         const_cast<char *>("flushWithGCNative"),
    657         const_cast<char *>("(JLjava/lang/String;)Z"),
    658         reinterpret_cast<void *>(latinime_BinaryDictionary_flushWithGC)
    659     },
    660     {
    661         const_cast<char *>("getSuggestionsNative"),
    662         const_cast<char *>("(JJJ[I[I[I[I[II[I[[I[Z[I[I[I[I[I[I[F)V"),
    663         reinterpret_cast<void *>(latinime_BinaryDictionary_getSuggestions)
    664     },
    665     {
    666         const_cast<char *>("getProbabilityNative"),
    667         const_cast<char *>("(J[I)I"),
    668         reinterpret_cast<void *>(latinime_BinaryDictionary_getProbability)
    669     },
    670     {
    671         const_cast<char *>("getMaxProbabilityOfExactMatchesNative"),
    672         const_cast<char *>("(J[I)I"),
    673         reinterpret_cast<void *>(latinime_BinaryDictionary_getMaxProbabilityOfExactMatches)
    674     },
    675     {
    676         const_cast<char *>("getNgramProbabilityNative"),
    677         const_cast<char *>("(J[[I[Z[I)I"),
    678         reinterpret_cast<void *>(latinime_BinaryDictionary_getNgramProbability)
    679     },
    680     {
    681         const_cast<char *>("getWordPropertyNative"),
    682         const_cast<char *>("(J[IZ[I[Z[ILjava/util/ArrayList;Ljava/util/ArrayList;"
    683                 "Ljava/util/ArrayList;Ljava/util/ArrayList;)V"),
    684         reinterpret_cast<void *>(latinime_BinaryDictionary_getWordProperty)
    685     },
    686     {
    687         const_cast<char *>("getNextWordNative"),
    688         const_cast<char *>("(JI[I[Z)I"),
    689         reinterpret_cast<void *>(latinime_BinaryDictionary_getNextWord)
    690     },
    691     {
    692         const_cast<char *>("addUnigramEntryNative"),
    693         const_cast<char *>("(J[II[IIZZZI)Z"),
    694         reinterpret_cast<void *>(latinime_BinaryDictionary_addUnigramEntry)
    695     },
    696     {
    697         const_cast<char *>("removeUnigramEntryNative"),
    698         const_cast<char *>("(J[I)Z"),
    699         reinterpret_cast<void *>(latinime_BinaryDictionary_removeUnigramEntry)
    700     },
    701     {
    702         const_cast<char *>("addNgramEntryNative"),
    703         const_cast<char *>("(J[[I[Z[III)Z"),
    704         reinterpret_cast<void *>(latinime_BinaryDictionary_addNgramEntry)
    705     },
    706     {
    707         const_cast<char *>("removeNgramEntryNative"),
    708         const_cast<char *>("(J[[I[Z[I)Z"),
    709         reinterpret_cast<void *>(latinime_BinaryDictionary_removeNgramEntry)
    710     },
    711     {
    712         const_cast<char *>("addMultipleDictionaryEntriesNative"),
    713         const_cast<char *>(
    714                 "(J[Lcom/android/inputmethod/latin/utils/LanguageModelParam;I)I"),
    715         reinterpret_cast<void *>(latinime_BinaryDictionary_addMultipleDictionaryEntries)
    716     },
    717     {
    718         const_cast<char *>("getPropertyNative"),
    719         const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
    720         reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty)
    721     },
    722     {
    723         const_cast<char *>("isCorruptedNative"),
    724         const_cast<char *>("(J)Z"),
    725         reinterpret_cast<void *>(latinime_BinaryDictionary_isCorruptedNative)
    726     },
    727     {
    728         const_cast<char *>("migrateNative"),
    729         const_cast<char *>("(JLjava/lang/String;J)Z"),
    730         reinterpret_cast<void *>(latinime_BinaryDictionary_migrateNative)
    731     }
    732 };
    733 
    734 int register_BinaryDictionary(JNIEnv *env) {
    735     const char *const kClassPathName = "com/android/inputmethod/latin/BinaryDictionary";
    736     return registerNativeMethods(env, kClassPathName, sMethods, NELEMS(sMethods));
    737 }
    738 } // namespace latinime
    739