Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.text.TextUtils;
     20 import android.util.Log;
     21 import android.util.SparseArray;
     22 
     23 import com.android.inputmethod.annotations.UsedForTesting;
     24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     25 import com.android.inputmethod.latin.common.ComposedData;
     26 import com.android.inputmethod.latin.common.Constants;
     27 import com.android.inputmethod.latin.common.FileUtils;
     28 import com.android.inputmethod.latin.common.InputPointers;
     29 import com.android.inputmethod.latin.common.StringUtils;
     30 import com.android.inputmethod.latin.makedict.DictionaryHeader;
     31 import com.android.inputmethod.latin.makedict.FormatSpec;
     32 import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
     33 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
     34 import com.android.inputmethod.latin.makedict.WordProperty;
     35 import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
     36 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
     37 import com.android.inputmethod.latin.utils.JniUtils;
     38 import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;
     39 
     40 import java.io.File;
     41 import java.util.ArrayList;
     42 import java.util.Arrays;
     43 import java.util.HashMap;
     44 import java.util.Locale;
     45 import java.util.Map;
     46 
     47 import javax.annotation.Nonnull;
     48 
     49 /**
     50  * Implements a static, compacted, binary dictionary of standard words.
     51  */
     52 // TODO: All methods which should be locked need to have a suffix "Locked".
     53 public final class BinaryDictionary extends Dictionary {
     54     private static final String TAG = BinaryDictionary.class.getSimpleName();
     55 
     56     // The cutoff returned by native for auto-commit confidence.
     57     // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
     58     private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
     59 
     60     public static final int DICTIONARY_MAX_WORD_LENGTH = 48;
     61     public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;
     62 
     63     @UsedForTesting
     64     public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
     65     @UsedForTesting
     66     public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
     67     @UsedForTesting
     68     public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
     69     @UsedForTesting
     70     public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
     71 
     72     public static final int NOT_A_VALID_TIMESTAMP = -1;
     73 
     74     // Format to get unigram flags from native side via getWordPropertyNative().
     75     private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
     76     private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
     77     private static final int FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX = 1;
     78     private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2;
     79     private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; // DEPRECATED
     80     private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
     81 
     82     // Format to get probability and historical info from native side via getWordPropertyNative().
     83     public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4;
     84     public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0;
     85     public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1;
     86     public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2;
     87     public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3;
     88 
     89     public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate";
     90     public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating";
     91 
     92     private long mNativeDict;
     93     private final long mDictSize;
     94     private final String mDictFilePath;
     95     private final boolean mUseFullEditDistance;
     96     private final boolean mIsUpdatable;
     97     private boolean mHasUpdated;
     98 
     99     private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>();
    100 
    101     // TODO: There should be a way to remove used DicTraverseSession objects from
    102     // {@code mDicTraverseSessions}.
    103     private DicTraverseSession getTraverseSession(final int traverseSessionId) {
    104         synchronized(mDicTraverseSessions) {
    105             DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
    106             if (traverseSession == null) {
    107                 traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
    108                 mDicTraverseSessions.put(traverseSessionId, traverseSession);
    109             }
    110             return traverseSession;
    111         }
    112     }
    113 
    114     /**
    115      * Constructs binary dictionary using existing dictionary file.
    116      * @param filename the name of the file to read through native code.
    117      * @param offset the offset of the dictionary data within the file.
    118      * @param length the length of the binary data.
    119      * @param useFullEditDistance whether to use the full edit distance in suggestions
    120      * @param dictType the dictionary type, as a human-readable string
    121      * @param isUpdatable whether to open the dictionary file in writable mode.
    122      */
    123     public BinaryDictionary(final String filename, final long offset, final long length,
    124             final boolean useFullEditDistance, final Locale locale, final String dictType,
    125             final boolean isUpdatable) {
    126         super(dictType, locale);
    127         mDictSize = length;
    128         mDictFilePath = filename;
    129         mIsUpdatable = isUpdatable;
    130         mHasUpdated = false;
    131         mUseFullEditDistance = useFullEditDistance;
    132         loadDictionary(filename, offset, length, isUpdatable);
    133     }
    134 
    135     /**
    136      * Constructs binary dictionary on memory.
    137      * @param filename the name of the file used to flush.
    138      * @param useFullEditDistance whether to use the full edit distance in suggestions
    139      * @param dictType the dictionary type, as a human-readable string
    140      * @param formatVersion the format version of the dictionary
    141      * @param attributeMap the attributes of the dictionary
    142      */
    143     public BinaryDictionary(final String filename, final boolean useFullEditDistance,
    144             final Locale locale, final String dictType, final long formatVersion,
    145             final Map<String, String> attributeMap) {
    146         super(dictType, locale);
    147         mDictSize = 0;
    148         mDictFilePath = filename;
    149         // On memory dictionary is always updatable.
    150         mIsUpdatable = true;
    151         mHasUpdated = false;
    152         mUseFullEditDistance = useFullEditDistance;
    153         final String[] keyArray = new String[attributeMap.size()];
    154         final String[] valueArray = new String[attributeMap.size()];
    155         int index = 0;
    156         for (final String key : attributeMap.keySet()) {
    157             keyArray[index] = key;
    158             valueArray[index] = attributeMap.get(key);
    159             index++;
    160         }
    161         mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray);
    162     }
    163 
    164 
    165     static {
    166         JniUtils.loadNativeLibrary();
    167     }
    168 
    169     private static native long openNative(String sourceDir, long dictOffset, long dictSize,
    170             boolean isUpdatable);
    171     private static native long createOnMemoryNative(long formatVersion,
    172             String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray);
    173     private static native void getHeaderInfoNative(long dict, int[] outHeaderSize,
    174             int[] outFormatVersion, ArrayList<int[]> outAttributeKeys,
    175             ArrayList<int[]> outAttributeValues);
    176     private static native boolean flushNative(long dict, String filePath);
    177     private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
    178     private static native boolean flushWithGCNative(long dict, String filePath);
    179     private static native void closeNative(long dict);
    180     private static native int getFormatVersionNative(long dict);
    181     private static native int getProbabilityNative(long dict, int[] word);
    182     private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word);
    183     private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays,
    184             boolean[] isBeginningOfSentenceArray, int[] word);
    185     private static native void getWordPropertyNative(long dict, int[] word,
    186             boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags,
    187             int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray,
    188             ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
    189             ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo,
    190             ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities);
    191     private static native int getNextWordNative(long dict, int token, int[] outCodePoints,
    192             boolean[] outIsBeginningOfSentence);
    193     private static native void getSuggestionsNative(long dict, long proximityInfo,
    194             long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
    195             int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
    196             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
    197             int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints,
    198             int[] outputScores, int[] outputIndices, int[] outputTypes,
    199             int[] outputAutoCommitFirstWordConfidence,
    200             float[] inOutWeightOfLangModelVsSpatialModel);
    201     private static native boolean addUnigramEntryNative(long dict, int[] word, int probability,
    202             int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
    203             boolean isNotAWord, boolean isPossiblyOffensive, int timestamp);
    204     private static native boolean removeUnigramEntryNative(long dict, int[] word);
    205     private static native boolean addNgramEntryNative(long dict,
    206             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
    207             int[] word, int probability, int timestamp);
    208     private static native boolean removeNgramEntryNative(long dict,
    209             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word);
    210     private static native boolean updateEntriesForWordWithNgramContextNative(long dict,
    211             int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
    212             int[] word, boolean isValidWord, int count, int timestamp);
    213     private static native int updateEntriesForInputEventsNative(long dict,
    214             WordInputEventForPersonalization[] inputEvents, int startIndex);
    215     private static native String getPropertyNative(long dict, String query);
    216     private static native boolean isCorruptedNative(long dict);
    217     private static native boolean migrateNative(long dict, String dictFilePath,
    218             long newFormatVersion);
    219 
    220     // TODO: Move native dict into session
    221     private void loadDictionary(final String path, final long startOffset,
    222             final long length, final boolean isUpdatable) {
    223         mHasUpdated = false;
    224         mNativeDict = openNative(path, startOffset, length, isUpdatable);
    225     }
    226 
    227     // TODO: Check isCorrupted() for main dictionaries.
    228     public boolean isCorrupted() {
    229         if (!isValidDictionary()) {
    230             return false;
    231         }
    232         if (!isCorruptedNative(mNativeDict)) {
    233             return false;
    234         }
    235         // TODO: Record the corruption.
    236         Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted.");
    237         Log.e(TAG, "locale: " + mLocale);
    238         Log.e(TAG, "dict size: " + mDictSize);
    239         Log.e(TAG, "updatable: " + mIsUpdatable);
    240         return true;
    241     }
    242 
    243     public DictionaryHeader getHeader() throws UnsupportedFormatException {
    244         if (mNativeDict == 0) {
    245             return null;
    246         }
    247         final int[] outHeaderSize = new int[1];
    248         final int[] outFormatVersion = new int[1];
    249         final ArrayList<int[]> outAttributeKeys = new ArrayList<>();
    250         final ArrayList<int[]> outAttributeValues = new ArrayList<>();
    251         getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys,
    252                 outAttributeValues);
    253         final HashMap<String, String> attributes = new HashMap<>();
    254         for (int i = 0; i < outAttributeKeys.size(); i++) {
    255             final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray(
    256                     outAttributeKeys.get(i));
    257             final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray(
    258                     outAttributeValues.get(i));
    259             attributes.put(attributeKey, attributeValue);
    260         }
    261         final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals(
    262                 attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY));
    263         return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes),
    264                 new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo));
    265     }
    266 
    267     @Override
    268     public ArrayList<SuggestedWordInfo> getSuggestions(final ComposedData composedData,
    269             final NgramContext ngramContext, final long proximityInfoHandle,
    270             final SettingsValuesForSuggestion settingsValuesForSuggestion,
    271             final int sessionId, final float weightForLocale,
    272             final float[] inOutWeightOfLangModelVsSpatialModel) {
    273         if (!isValidDictionary()) {
    274             return null;
    275         }
    276         final DicTraverseSession session = getTraverseSession(sessionId);
    277         Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE);
    278         ngramContext.outputToArray(session.mPrevWordCodePointArrays,
    279                 session.mIsBeginningOfSentenceArray);
    280         final InputPointers inputPointers = composedData.mInputPointers;
    281         final boolean isGesture = composedData.mIsBatchMode;
    282         final int inputSize;
    283         if (!isGesture) {
    284             inputSize =
    285                     composedData.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount(
    286                         session.mInputCodePoints);
    287             if (inputSize < 0) {
    288                 return null;
    289             }
    290         } else {
    291             inputSize = inputPointers.getPointerSize();
    292         }
    293         session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance);
    294         session.mNativeSuggestOptions.setIsGesture(isGesture);
    295         session.mNativeSuggestOptions.setBlockOffensiveWords(
    296                 settingsValuesForSuggestion.mBlockPotentiallyOffensive);
    297         session.mNativeSuggestOptions.setWeightForLocale(weightForLocale);
    298         if (inOutWeightOfLangModelVsSpatialModel != null) {
    299             session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
    300                     inOutWeightOfLangModelVsSpatialModel[0];
    301         } else {
    302             session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
    303                     Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL;
    304         }
    305         // TOOD: Pass multiple previous words information for n-gram.
    306         getSuggestionsNative(mNativeDict, proximityInfoHandle,
    307                 getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
    308                 inputPointers.getYCoordinates(), inputPointers.getTimes(),
    309                 inputPointers.getPointerIds(), session.mInputCodePoints, inputSize,
    310                 session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays,
    311                 session.mIsBeginningOfSentenceArray, ngramContext.getPrevWordCount(),
    312                 session.mOutputSuggestionCount, session.mOutputCodePoints, session.mOutputScores,
    313                 session.mSpaceIndices, session.mOutputTypes,
    314                 session.mOutputAutoCommitFirstWordConfidence,
    315                 session.mInputOutputWeightOfLangModelVsSpatialModel);
    316         if (inOutWeightOfLangModelVsSpatialModel != null) {
    317             inOutWeightOfLangModelVsSpatialModel[0] =
    318                     session.mInputOutputWeightOfLangModelVsSpatialModel[0];
    319         }
    320         final int count = session.mOutputSuggestionCount[0];
    321         final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>();
    322         for (int j = 0; j < count; ++j) {
    323             final int start = j * DICTIONARY_MAX_WORD_LENGTH;
    324             int len = 0;
    325             while (len < DICTIONARY_MAX_WORD_LENGTH
    326                     && session.mOutputCodePoints[start + len] != 0) {
    327                 ++len;
    328             }
    329             if (len > 0) {
    330                 suggestions.add(new SuggestedWordInfo(
    331                         new String(session.mOutputCodePoints, start, len),
    332                         "" /* prevWordsContext */,
    333                         (int)(session.mOutputScores[j] * weightForLocale),
    334                         session.mOutputTypes[j],
    335                         this /* sourceDict */,
    336                         session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
    337                         session.mOutputAutoCommitFirstWordConfidence[0]));
    338             }
    339         }
    340         return suggestions;
    341     }
    342 
    343     public boolean isValidDictionary() {
    344         return mNativeDict != 0;
    345     }
    346 
    347     public int getFormatVersion() {
    348         return getFormatVersionNative(mNativeDict);
    349     }
    350 
    351     @Override
    352     public boolean isInDictionary(final String word) {
    353         return getFrequency(word) != NOT_A_PROBABILITY;
    354     }
    355 
    356     @Override
    357     public int getFrequency(final String word) {
    358         if (TextUtils.isEmpty(word)) {
    359             return NOT_A_PROBABILITY;
    360         }
    361         final int[] codePoints = StringUtils.toCodePointArray(word);
    362         return getProbabilityNative(mNativeDict, codePoints);
    363     }
    364 
    365     @Override
    366     public int getMaxFrequencyOfExactMatches(final String word) {
    367         if (TextUtils.isEmpty(word)) {
    368             return NOT_A_PROBABILITY;
    369         }
    370         final int[] codePoints = StringUtils.toCodePointArray(word);
    371         return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints);
    372     }
    373 
    374     @UsedForTesting
    375     public boolean isValidNgram(final NgramContext ngramContext, final String word) {
    376         return getNgramProbability(ngramContext, word) != NOT_A_PROBABILITY;
    377     }
    378 
    379     public int getNgramProbability(final NgramContext ngramContext, final String word) {
    380         if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
    381             return NOT_A_PROBABILITY;
    382         }
    383         final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
    384         final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
    385         ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
    386         final int[] wordCodePoints = StringUtils.toCodePointArray(word);
    387         return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays,
    388                 isBeginningOfSentenceArray, wordCodePoints);
    389     }
    390 
    391     public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) {
    392         if (word == null) {
    393             return null;
    394         }
    395         final int[] codePoints = StringUtils.toCodePointArray(word);
    396         final int[] outCodePoints = new int[DICTIONARY_MAX_WORD_LENGTH];
    397         final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
    398         final int[] outProbabilityInfo =
    399                 new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
    400         final ArrayList<int[][]> outNgramPrevWordsArray = new ArrayList<>();
    401         final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray =
    402                 new ArrayList<>();
    403         final ArrayList<int[]> outNgramTargets = new ArrayList<>();
    404         final ArrayList<int[]> outNgramProbabilityInfo = new ArrayList<>();
    405         final ArrayList<int[]> outShortcutTargets = new ArrayList<>();
    406         final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>();
    407         getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints,
    408                 outFlags, outProbabilityInfo, outNgramPrevWordsArray,
    409                 outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets,
    410                 outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities);
    411         return new WordProperty(codePoints,
    412                 outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
    413                 outFlags[FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX],
    414                 outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX],
    415                 outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
    416                 outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
    417                 outNgramTargets, outNgramProbabilityInfo);
    418     }
    419 
    420     public static class GetNextWordPropertyResult {
    421         public WordProperty mWordProperty;
    422         public int mNextToken;
    423 
    424         public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) {
    425             mWordProperty = wordProperty;
    426             mNextToken = nextToken;
    427         }
    428     }
    429 
    430     /**
    431      * Method to iterate all words in the dictionary for makedict.
    432      * If token is 0, this method newly starts iterating the dictionary.
    433      */
    434     public GetNextWordPropertyResult getNextWordProperty(final int token) {
    435         final int[] codePoints = new int[DICTIONARY_MAX_WORD_LENGTH];
    436         final boolean[] isBeginningOfSentence = new boolean[1];
    437         final int nextToken = getNextWordNative(mNativeDict, token, codePoints,
    438                 isBeginningOfSentence);
    439         final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
    440         return new GetNextWordPropertyResult(
    441                 getWordProperty(word, isBeginningOfSentence[0]), nextToken);
    442     }
    443 
    444     // Add a unigram entry to binary dictionary with unigram attributes in native code.
    445     public boolean addUnigramEntry(
    446             final String word, final int probability, final boolean isBeginningOfSentence,
    447             final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) {
    448         if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
    449             return false;
    450         }
    451         final int[] codePoints = StringUtils.toCodePointArray(word);
    452         if (!addUnigramEntryNative(mNativeDict, codePoints, probability,
    453                 null /* shortcutTargetCodePoints */, 0 /* shortcutProbability */,
    454                 isBeginningOfSentence, isNotAWord, isPossiblyOffensive, timestamp)) {
    455             return false;
    456         }
    457         mHasUpdated = true;
    458         return true;
    459     }
    460 
    461     // Remove a unigram entry from the binary dictionary in native code.
    462     public boolean removeUnigramEntry(final String word) {
    463         if (TextUtils.isEmpty(word)) {
    464             return false;
    465         }
    466         final int[] codePoints = StringUtils.toCodePointArray(word);
    467         if (!removeUnigramEntryNative(mNativeDict, codePoints)) {
    468             return false;
    469         }
    470         mHasUpdated = true;
    471         return true;
    472     }
    473 
    474     // Add an n-gram entry to the binary dictionary with timestamp in native code.
    475     public boolean addNgramEntry(final NgramContext ngramContext, final String word,
    476             final int probability, final int timestamp) {
    477         if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
    478             return false;
    479         }
    480         final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
    481         final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
    482         ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
    483         final int[] wordCodePoints = StringUtils.toCodePointArray(word);
    484         if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays,
    485                 isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) {
    486             return false;
    487         }
    488         mHasUpdated = true;
    489         return true;
    490     }
    491 
    492     // Update entries for the word occurrence with the ngramContext.
    493     public boolean updateEntriesForWordWithNgramContext(@Nonnull final NgramContext ngramContext,
    494             final String word, final boolean isValidWord, final int count, final int timestamp) {
    495         if (TextUtils.isEmpty(word)) {
    496             return false;
    497         }
    498         final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
    499         final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
    500         ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
    501         final int[] wordCodePoints = StringUtils.toCodePointArray(word);
    502         if (!updateEntriesForWordWithNgramContextNative(mNativeDict, prevWordCodePointArrays,
    503                 isBeginningOfSentenceArray, wordCodePoints, isValidWord, count, timestamp)) {
    504             return false;
    505         }
    506         mHasUpdated = true;
    507         return true;
    508     }
    509 
    510     @UsedForTesting
    511     public void updateEntriesForInputEvents(final WordInputEventForPersonalization[] inputEvents) {
    512         if (!isValidDictionary()) {
    513             return;
    514         }
    515         int processedEventCount = 0;
    516         while (processedEventCount < inputEvents.length) {
    517             if (needsToRunGC(true /* mindsBlockByGC */)) {
    518                 flushWithGC();
    519             }
    520             processedEventCount = updateEntriesForInputEventsNative(mNativeDict, inputEvents,
    521                     processedEventCount);
    522             mHasUpdated = true;
    523             if (processedEventCount <= 0) {
    524                 return;
    525             }
    526         }
    527     }
    528 
    529     private void reopen() {
    530         close();
    531         final File dictFile = new File(mDictFilePath);
    532         // WARNING: Because we pass 0 as the offset and file.length() as the length, this can
    533         // only be called for actual files. Right now it's only called by the flush() family of
    534         // functions, which require an updatable dictionary, so it's okay. But beware.
    535         loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
    536                 dictFile.length(), mIsUpdatable);
    537     }
    538 
    539     // Flush to dict file if the dictionary has been updated.
    540     public boolean flush() {
    541         if (!isValidDictionary()) {
    542             return false;
    543         }
    544         if (mHasUpdated) {
    545             if (!flushNative(mNativeDict, mDictFilePath)) {
    546                 return false;
    547             }
    548             reopen();
    549         }
    550         return true;
    551     }
    552 
    553     // Run GC and flush to dict file if the dictionary has been updated.
    554     public boolean flushWithGCIfHasUpdated() {
    555         if (mHasUpdated) {
    556             return flushWithGC();
    557         }
    558         return true;
    559     }
    560 
    561     // Run GC and flush to dict file.
    562     public boolean flushWithGC() {
    563         if (!isValidDictionary()) {
    564             return false;
    565         }
    566         if (!flushWithGCNative(mNativeDict, mDictFilePath)) {
    567             return false;
    568         }
    569         reopen();
    570         return true;
    571     }
    572 
    573     /**
    574      * Checks whether GC is needed to run or not.
    575      * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
    576      * the blocking in some situations such as in idle time or just before closing.
    577      * @return whether GC is needed to run or not.
    578      */
    579     public boolean needsToRunGC(final boolean mindsBlockByGC) {
    580         if (!isValidDictionary()) {
    581             return false;
    582         }
    583         return needsToRunGCNative(mNativeDict, mindsBlockByGC);
    584     }
    585 
    586     public boolean migrateTo(final int newFormatVersion) {
    587         if (!isValidDictionary()) {
    588             return false;
    589         }
    590         final File isMigratingDir =
    591                 new File(mDictFilePath + DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION);
    592         if (isMigratingDir.exists()) {
    593             isMigratingDir.delete();
    594             Log.e(TAG, "Previous migration attempt failed probably due to a crash. "
    595                         + "Giving up using the old dictionary (" + mDictFilePath + ").");
    596             return false;
    597         }
    598         if (!isMigratingDir.mkdir()) {
    599             Log.e(TAG, "Cannot create a dir (" + isMigratingDir.getAbsolutePath()
    600                     + ") to record migration.");
    601             return false;
    602         }
    603         try {
    604             final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION;
    605             if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) {
    606                 return false;
    607             }
    608             close();
    609             final File dictFile = new File(mDictFilePath);
    610             final File tmpDictFile = new File(tmpDictFilePath);
    611             if (!FileUtils.deleteRecursively(dictFile)) {
    612                 return false;
    613             }
    614             if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) {
    615                 return false;
    616             }
    617             loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
    618                     dictFile.length(), mIsUpdatable);
    619             return true;
    620         } finally {
    621             isMigratingDir.delete();
    622         }
    623     }
    624 
    625     @UsedForTesting
    626     public String getPropertyForGettingStats(final String query) {
    627         if (!isValidDictionary()) {
    628             return "";
    629         }
    630         return getPropertyNative(mNativeDict, query);
    631     }
    632 
    633     @Override
    634     public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
    635         return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
    636     }
    637 
    638     @Override
    639     public void close() {
    640         synchronized (mDicTraverseSessions) {
    641             final int sessionsSize = mDicTraverseSessions.size();
    642             for (int index = 0; index < sessionsSize; ++index) {
    643                 final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
    644                 if (traverseSession != null) {
    645                     traverseSession.close();
    646                 }
    647             }
    648             mDicTraverseSessions.clear();
    649         }
    650         closeInternalLocked();
    651     }
    652 
    653     private synchronized void closeInternalLocked() {
    654         if (mNativeDict != 0) {
    655             closeNative(mNativeDict);
    656             mNativeDict = 0;
    657         }
    658     }
    659 
    660     // TODO: Manage BinaryDictionary instances without using WeakReference or something.
    661     @Override
    662     protected void finalize() throws Throwable {
    663         try {
    664             closeInternalLocked();
    665         } finally {
    666             super.finalize();
    667         }
    668     }
    669 }
    670