Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.text.TextUtils;
     20 import android.util.SparseArray;
     21 
     22 import com.android.inputmethod.annotations.UsedForTesting;
     23 import com.android.inputmethod.keyboard.ProximityInfo;
     24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     25 import com.android.inputmethod.latin.settings.NativeSuggestOptions;
     26 import com.android.inputmethod.latin.utils.CollectionUtils;
     27 import com.android.inputmethod.latin.utils.JniUtils;
     28 import com.android.inputmethod.latin.utils.StringUtils;
     29 
     30 import java.io.File;
     31 import java.util.ArrayList;
     32 import java.util.Arrays;
     33 import java.util.Locale;
     34 import java.util.Map;
     35 
     36 /**
     37  * Implements a static, compacted, binary dictionary of standard words.
     38  */
     39 // TODO: All methods which should be locked need to have a suffix "Locked".
     40 public final class BinaryDictionary extends Dictionary {
     41     private static final String TAG = BinaryDictionary.class.getSimpleName();
     42 
     43     // Must be equal to MAX_WORD_LENGTH in native/jni/src/defines.h
     44     private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;
     45     // Must be equal to MAX_RESULTS in native/jni/src/defines.h
     46     private static final int MAX_RESULTS = 18;
     47     // The cutoff returned by native for auto-commit confidence.
     48     // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
     49     private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
     50 
     51     @UsedForTesting
     52     public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
     53     @UsedForTesting
     54     public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
     55     @UsedForTesting
     56     public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
     57     @UsedForTesting
     58     public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
     59 
     60     private long mNativeDict;
     61     private final Locale mLocale;
     62     private final long mDictSize;
     63     private final String mDictFilePath;
     64     private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
     65     private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
     66     private final int[] mSpaceIndices = new int[MAX_RESULTS];
     67     private final int[] mOutputScores = new int[MAX_RESULTS];
     68     private final int[] mOutputTypes = new int[MAX_RESULTS];
     69     // Only one result is ever used
     70     private final int[] mOutputAutoCommitFirstWordConfidence = new int[1];
     71 
     72     private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions();
     73 
     74     private final SparseArray<DicTraverseSession> mDicTraverseSessions =
     75             CollectionUtils.newSparseArray();
     76 
     77     // TODO: There should be a way to remove used DicTraverseSession objects from
     78     // {@code mDicTraverseSessions}.
     79     private DicTraverseSession getTraverseSession(final int traverseSessionId) {
     80         synchronized(mDicTraverseSessions) {
     81             DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
     82             if (traverseSession == null) {
     83                 traverseSession = mDicTraverseSessions.get(traverseSessionId);
     84                 if (traverseSession == null) {
     85                     traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
     86                     mDicTraverseSessions.put(traverseSessionId, traverseSession);
     87                 }
     88             }
     89             return traverseSession;
     90         }
     91     }
     92 
     93     /**
     94      * Constructor for the binary dictionary. This is supposed to be called from the
     95      * dictionary factory.
     96      * @param filename the name of the file to read through native code.
     97      * @param offset the offset of the dictionary data within the file.
     98      * @param length the length of the binary data.
     99      * @param useFullEditDistance whether to use the full edit distance in suggestions
    100      * @param dictType the dictionary type, as a human-readable string
    101      * @param isUpdatable whether to open the dictionary file in writable mode.
    102      */
    103     public BinaryDictionary(final String filename, final long offset, final long length,
    104             final boolean useFullEditDistance, final Locale locale, final String dictType,
    105             final boolean isUpdatable) {
    106         super(dictType);
    107         mLocale = locale;
    108         mDictSize = length;
    109         mDictFilePath = filename;
    110         mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance);
    111         loadDictionary(filename, offset, length, isUpdatable);
    112     }
    113 
    114     static {
    115         JniUtils.loadNativeLibrary();
    116     }
    117 
    118     private static native boolean createEmptyDictFileNative(String filePath, long dictVersion,
    119             String[] attributeKeyStringArray, String[] attributeValueStringArray);
    120     private static native long openNative(String sourceDir, long dictOffset, long dictSize,
    121             boolean isUpdatable);
    122     private static native void flushNative(long dict, String filePath);
    123     private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
    124     private static native void flushWithGCNative(long dict, String filePath);
    125     private static native void closeNative(long dict);
    126     private static native int getProbabilityNative(long dict, int[] word);
    127     private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
    128     private static native int getSuggestionsNative(long dict, long proximityInfo,
    129             long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
    130             int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
    131             int[] suggestOptions, int[] prevWordCodePointArray,
    132             int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes,
    133             int[] outputAutoCommitFirstWordConfidence);
    134     private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
    135     private static native int editDistanceNative(int[] before, int[] after);
    136     private static native void addUnigramWordNative(long dict, int[] word, int probability);
    137     private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
    138             int probability);
    139     private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
    140     private static native int calculateProbabilityNative(long dict, int unigramProbability,
    141             int bigramProbability);
    142     private static native String getPropertyNative(long dict, String query);
    143 
    144     @UsedForTesting
    145     public static boolean createEmptyDictFile(final String filePath, final long dictVersion,
    146             final Map<String, String> attributeMap) {
    147         final String[] keyArray = new String[attributeMap.size()];
    148         final String[] valueArray = new String[attributeMap.size()];
    149         int index = 0;
    150         for (final String key : attributeMap.keySet()) {
    151             keyArray[index] = key;
    152             valueArray[index] = attributeMap.get(key);
    153             index++;
    154         }
    155         return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray);
    156     }
    157 
    158     // TODO: Move native dict into session
    159     private final void loadDictionary(final String path, final long startOffset,
    160             final long length, final boolean isUpdatable) {
    161         mNativeDict = openNative(path, startOffset, length, isUpdatable);
    162     }
    163 
    164     @Override
    165     public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
    166             final String prevWord, final ProximityInfo proximityInfo,
    167             final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) {
    168         return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords,
    169                 additionalFeaturesOptions, 0 /* sessionId */);
    170     }
    171 
    172     @Override
    173     public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer,
    174             final String prevWord, final ProximityInfo proximityInfo,
    175             final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
    176             final int sessionId) {
    177         if (!isValidDictionary()) return null;
    178 
    179         Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE);
    180         // TODO: toLowerCase in the native code
    181         final int[] prevWordCodePointArray = (null == prevWord)
    182                 ? null : StringUtils.toCodePointArray(prevWord);
    183         final int composerSize = composer.size();
    184 
    185         final boolean isGesture = composer.isBatchMode();
    186         if (composerSize <= 1 || !isGesture) {
    187             if (composerSize > MAX_WORD_LENGTH - 1) return null;
    188             for (int i = 0; i < composerSize; i++) {
    189                 mInputCodePoints[i] = composer.getCodeAt(i);
    190             }
    191         }
    192 
    193         final InputPointers ips = composer.getInputPointers();
    194         final int inputSize = isGesture ? ips.getPointerSize() : composerSize;
    195         mNativeSuggestOptions.setIsGesture(isGesture);
    196         mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions);
    197         // proximityInfo and/or prevWordForBigrams may not be null.
    198         final int count = getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(),
    199                 getTraverseSession(sessionId).getSession(), ips.getXCoordinates(),
    200                 ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints,
    201                 inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(),
    202                 prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices,
    203                 mOutputTypes, mOutputAutoCommitFirstWordConfidence);
    204         final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
    205         for (int j = 0; j < count; ++j) {
    206             final int start = j * MAX_WORD_LENGTH;
    207             int len = 0;
    208             while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) {
    209                 ++len;
    210             }
    211             if (len > 0) {
    212                 final int flags = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_FLAGS;
    213                 if (blockOffensiveWords
    214                         && 0 != (flags & SuggestedWordInfo.KIND_FLAG_POSSIBLY_OFFENSIVE)
    215                         && 0 == (flags & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH)) {
    216                     // If we block potentially offensive words, and if the word is possibly
    217                     // offensive, then we don't output it unless it's also an exact match.
    218                     continue;
    219                 }
    220                 final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND;
    221                 final int score = SuggestedWordInfo.KIND_WHITELIST == kind
    222                         ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
    223                 // TODO: check that all users of the `kind' parameter are ready to accept
    224                 // flags too and pass mOutputTypes[j] instead of kind
    225                 suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
    226                         score, kind, this /* sourceDict */,
    227                         mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
    228                         mOutputAutoCommitFirstWordConfidence[0]));
    229             }
    230         }
    231         return suggestions;
    232     }
    233 
    234     public boolean isValidDictionary() {
    235         return mNativeDict != 0;
    236     }
    237 
    238     public static float calcNormalizedScore(final String before, final String after,
    239             final int score) {
    240         return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
    241                 StringUtils.toCodePointArray(after), score);
    242     }
    243 
    244     public static int editDistance(final String before, final String after) {
    245         if (before == null || after == null) {
    246             throw new IllegalArgumentException();
    247         }
    248         return editDistanceNative(StringUtils.toCodePointArray(before),
    249                 StringUtils.toCodePointArray(after));
    250     }
    251 
    252     @Override
    253     public boolean isValidWord(final String word) {
    254         return getFrequency(word) != NOT_A_PROBABILITY;
    255     }
    256 
    257     @Override
    258     public int getFrequency(final String word) {
    259         if (word == null) return NOT_A_PROBABILITY;
    260         int[] codePoints = StringUtils.toCodePointArray(word);
    261         return getProbabilityNative(mNativeDict, codePoints);
    262     }
    263 
    264     // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
    265     // calls when checking for changes in an entire dictionary.
    266     public boolean isValidBigram(final String word0, final String word1) {
    267         return getBigramProbability(word0, word1) != NOT_A_PROBABILITY;
    268     }
    269 
    270     public int getBigramProbability(final String word0, final String word1) {
    271         if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY;
    272         final int[] codePoints0 = StringUtils.toCodePointArray(word0);
    273         final int[] codePoints1 = StringUtils.toCodePointArray(word1);
    274         return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
    275     }
    276 
    277     // Add a unigram entry to binary dictionary in native code.
    278     public void addUnigramWord(final String word, final int probability) {
    279         if (TextUtils.isEmpty(word)) {
    280             return;
    281         }
    282         final int[] codePoints = StringUtils.toCodePointArray(word);
    283         addUnigramWordNative(mNativeDict, codePoints, probability);
    284     }
    285 
    286     // Add a bigram entry to binary dictionary in native code.
    287     public void addBigramWords(final String word0, final String word1, final int probability) {
    288         if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
    289             return;
    290         }
    291         final int[] codePoints0 = StringUtils.toCodePointArray(word0);
    292         final int[] codePoints1 = StringUtils.toCodePointArray(word1);
    293         addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability);
    294     }
    295 
    296     // Remove a bigram entry form binary dictionary in native code.
    297     public void removeBigramWords(final String word0, final String word1) {
    298         if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
    299             return;
    300         }
    301         final int[] codePoints0 = StringUtils.toCodePointArray(word0);
    302         final int[] codePoints1 = StringUtils.toCodePointArray(word1);
    303         removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
    304     }
    305 
    306     private void reopen() {
    307         close();
    308         final File dictFile = new File(mDictFilePath);
    309         mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */,
    310                 dictFile.length(), true /* isUpdatable */);
    311     }
    312 
    313     public void flush() {
    314         if (!isValidDictionary()) return;
    315         flushNative(mNativeDict, mDictFilePath);
    316         reopen();
    317     }
    318 
    319     public void flushWithGC() {
    320         if (!isValidDictionary()) return;
    321         flushWithGCNative(mNativeDict, mDictFilePath);
    322         reopen();
    323     }
    324 
    325     /**
    326      * Checks whether GC is needed to run or not.
    327      * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
    328      * the blocking in some situations such as in idle time or just before closing.
    329      * @return whether GC is needed to run or not.
    330      */
    331     public boolean needsToRunGC(final boolean mindsBlockByGC) {
    332         if (!isValidDictionary()) return false;
    333         return needsToRunGCNative(mNativeDict, mindsBlockByGC);
    334     }
    335 
    336     @UsedForTesting
    337     public int calculateProbability(final int unigramProbability, final int bigramProbability) {
    338         if (!isValidDictionary()) return NOT_A_PROBABILITY;
    339         return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability);
    340     }
    341 
    342     @UsedForTesting
    343     public String getPropertyForTests(String query) {
    344         if (!isValidDictionary()) return "";
    345         return getPropertyNative(mNativeDict, query);
    346     }
    347 
    348     @Override
    349     public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
    350         return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
    351     }
    352 
    353     @Override
    354     public void close() {
    355         synchronized (mDicTraverseSessions) {
    356             final int sessionsSize = mDicTraverseSessions.size();
    357             for (int index = 0; index < sessionsSize; ++index) {
    358                 final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
    359                 if (traverseSession != null) {
    360                     traverseSession.close();
    361                 }
    362             }
    363             mDicTraverseSessions.clear();
    364         }
    365         closeInternalLocked();
    366     }
    367 
    368     private synchronized void closeInternalLocked() {
    369         if (mNativeDict != 0) {
    370             closeNative(mNativeDict);
    371             mNativeDict = 0;
    372         }
    373     }
    374 
    375     // TODO: Manage BinaryDictionary instances without using WeakReference or something.
    376     @Override
    377     protected void finalize() throws Throwable {
    378         try {
    379             closeInternalLocked();
    380         } finally {
    381             super.finalize();
    382         }
    383     }
    384 }
    385