Home | History | Annotate | Download | only in spellcheck
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.spellcheck;
     18 
     19 import android.content.Intent;
     20 import android.content.SharedPreferences;
     21 import android.preference.PreferenceManager;
     22 import android.service.textservice.SpellCheckerService;
     23 import android.util.Log;
     24 import android.view.textservice.SuggestionsInfo;
     25 
     26 import com.android.inputmethod.keyboard.ProximityInfo;
     27 import com.android.inputmethod.latin.BinaryDictionary;
     28 import com.android.inputmethod.latin.CollectionUtils;
     29 import com.android.inputmethod.latin.ContactsBinaryDictionary;
     30 import com.android.inputmethod.latin.Dictionary;
     31 import com.android.inputmethod.latin.DictionaryCollection;
     32 import com.android.inputmethod.latin.DictionaryFactory;
     33 import com.android.inputmethod.latin.LocaleUtils;
     34 import com.android.inputmethod.latin.R;
     35 import com.android.inputmethod.latin.StringUtils;
     36 import com.android.inputmethod.latin.SynchronouslyLoadedContactsBinaryDictionary;
     37 import com.android.inputmethod.latin.SynchronouslyLoadedUserBinaryDictionary;
     38 import com.android.inputmethod.latin.UserBinaryDictionary;
     39 
     40 import java.lang.ref.WeakReference;
     41 import java.util.ArrayList;
     42 import java.util.Arrays;
     43 import java.util.Collections;
     44 import java.util.HashSet;
     45 import java.util.Iterator;
     46 import java.util.Locale;
     47 import java.util.Map;
     48 import java.util.TreeMap;
     49 
     50 /**
     51  * Service for spell checking, using LatinIME's dictionaries and mechanisms.
     52  */
     53 public final class AndroidSpellCheckerService extends SpellCheckerService
     54         implements SharedPreferences.OnSharedPreferenceChangeListener {
     55     private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
     56     private static final boolean DBG = false;
     57     private static final int POOL_SIZE = 2;
     58 
     59     public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts";
     60 
     61     public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
     62     public static final int CAPITALIZE_FIRST = 1; // First only
     63     public static final int CAPITALIZE_ALL = 2; // All caps
     64 
     65     private final static String[] EMPTY_STRING_ARRAY = new String[0];
     66     private Map<String, DictionaryPool> mDictionaryPools = CollectionUtils.newSynchronizedTreeMap();
     67     private Map<String, UserBinaryDictionary> mUserDictionaries =
     68             CollectionUtils.newSynchronizedTreeMap();
     69     private ContactsBinaryDictionary mContactsDictionary;
     70 
     71     // The threshold for a candidate to be offered as a suggestion.
     72     private float mSuggestionThreshold;
     73     // The threshold for a suggestion to be considered "recommended".
     74     private float mRecommendedThreshold;
     75     // Whether to use the contacts dictionary
     76     private boolean mUseContactsDictionary;
     77     private final Object mUseContactsLock = new Object();
     78 
     79     private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList =
     80             CollectionUtils.newHashSet();
     81 
     82     public static final int SCRIPT_LATIN = 0;
     83     public static final int SCRIPT_CYRILLIC = 1;
     84     public static final String SINGLE_QUOTE = "\u0027";
     85     public static final String APOSTROPHE = "\u2019";
     86     private static final TreeMap<String, Integer> mLanguageToScript;
     87     static {
     88         // List of the supported languages and their associated script. We won't check
     89         // words written in another script than the selected script, because we know we
     90         // don't have those in our dictionary so we will underline everything and we
     91         // will never have any suggestions, so it makes no sense checking them, and this
     92         // is done in {@link #shouldFilterOut}. Also, the script is used to choose which
     93         // proximity to pass to the dictionary descent algorithm.
     94         // IMPORTANT: this only contains languages - do not write countries in there.
     95         // Only the language is searched from the map.
     96         mLanguageToScript = CollectionUtils.newTreeMap();
     97         mLanguageToScript.put("en", SCRIPT_LATIN);
     98         mLanguageToScript.put("fr", SCRIPT_LATIN);
     99         mLanguageToScript.put("de", SCRIPT_LATIN);
    100         mLanguageToScript.put("nl", SCRIPT_LATIN);
    101         mLanguageToScript.put("cs", SCRIPT_LATIN);
    102         mLanguageToScript.put("es", SCRIPT_LATIN);
    103         mLanguageToScript.put("it", SCRIPT_LATIN);
    104         mLanguageToScript.put("hr", SCRIPT_LATIN);
    105         mLanguageToScript.put("pt", SCRIPT_LATIN);
    106         mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
    107         // TODO: Make a persian proximity, and activate the Farsi subtype.
    108         // mLanguageToScript.put("fa", SCRIPT_PERSIAN);
    109     }
    110 
    111     @Override public void onCreate() {
    112         super.onCreate();
    113         mSuggestionThreshold =
    114                 Float.parseFloat(getString(R.string.spellchecker_suggestion_threshold_value));
    115         mRecommendedThreshold =
    116                 Float.parseFloat(getString(R.string.spellchecker_recommended_threshold_value));
    117         final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
    118         prefs.registerOnSharedPreferenceChangeListener(this);
    119         onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY);
    120     }
    121 
    122     public static int getScriptFromLocale(final Locale locale) {
    123         final Integer script = mLanguageToScript.get(locale.getLanguage());
    124         if (null == script) {
    125             throw new RuntimeException("We have been called with an unsupported language: \""
    126                     + locale.getLanguage() + "\". Framework bug?");
    127         }
    128         return script;
    129     }
    130 
    131     @Override
    132     public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) {
    133         if (!PREF_USE_CONTACTS_KEY.equals(key)) return;
    134         synchronized(mUseContactsLock) {
    135             mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true);
    136             if (mUseContactsDictionary) {
    137                 startUsingContactsDictionaryLocked();
    138             } else {
    139                 stopUsingContactsDictionaryLocked();
    140             }
    141         }
    142     }
    143 
    144     private void startUsingContactsDictionaryLocked() {
    145         if (null == mContactsDictionary) {
    146             // TODO: use the right locale for each session
    147             mContactsDictionary =
    148                     new SynchronouslyLoadedContactsBinaryDictionary(this, Locale.getDefault());
    149         }
    150         final Iterator<WeakReference<DictionaryCollection>> iterator =
    151                 mDictionaryCollectionsList.iterator();
    152         while (iterator.hasNext()) {
    153             final WeakReference<DictionaryCollection> dictRef = iterator.next();
    154             final DictionaryCollection dict = dictRef.get();
    155             if (null == dict) {
    156                 iterator.remove();
    157             } else {
    158                 dict.addDictionary(mContactsDictionary);
    159             }
    160         }
    161     }
    162 
    163     private void stopUsingContactsDictionaryLocked() {
    164         if (null == mContactsDictionary) return;
    165         final Dictionary contactsDict = mContactsDictionary;
    166         // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no longer needed
    167         mContactsDictionary = null;
    168         final Iterator<WeakReference<DictionaryCollection>> iterator =
    169                 mDictionaryCollectionsList.iterator();
    170         while (iterator.hasNext()) {
    171             final WeakReference<DictionaryCollection> dictRef = iterator.next();
    172             final DictionaryCollection dict = dictRef.get();
    173             if (null == dict) {
    174                 iterator.remove();
    175             } else {
    176                 dict.removeDictionary(contactsDict);
    177             }
    178         }
    179         contactsDict.close();
    180     }
    181 
    182     @Override
    183     public Session createSession() {
    184         // Should not refer to AndroidSpellCheckerSession directly considering
    185         // that AndroidSpellCheckerSession may be overlaid.
    186         return AndroidSpellCheckerSessionFactory.newInstance(this);
    187     }
    188 
    189     public static SuggestionsInfo getNotInDictEmptySuggestions() {
    190         return new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
    191     }
    192 
    193     public static SuggestionsInfo getInDictEmptySuggestions() {
    194         return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
    195                 EMPTY_STRING_ARRAY);
    196     }
    197 
    198     public SuggestionsGatherer newSuggestionsGatherer(final String text, int maxLength) {
    199         return new SuggestionsGatherer(
    200                 text, mSuggestionThreshold, mRecommendedThreshold, maxLength);
    201     }
    202 
    203     // TODO: remove this class and replace it by storage local to the session.
    204     public static final class SuggestionsGatherer {
    205         public static final class Result {
    206             public final String[] mSuggestions;
    207             public final boolean mHasRecommendedSuggestions;
    208             public Result(final String[] gatheredSuggestions,
    209                     final boolean hasRecommendedSuggestions) {
    210                 mSuggestions = gatheredSuggestions;
    211                 mHasRecommendedSuggestions = hasRecommendedSuggestions;
    212             }
    213         }
    214 
    215         private final ArrayList<CharSequence> mSuggestions;
    216         private final int[] mScores;
    217         private final String mOriginalText;
    218         private final float mSuggestionThreshold;
    219         private final float mRecommendedThreshold;
    220         private final int mMaxLength;
    221         private int mLength = 0;
    222 
    223         // The two following attributes are only ever filled if the requested max length
    224         // is 0 (or less, which is treated the same).
    225         private String mBestSuggestion = null;
    226         private int mBestScore = Integer.MIN_VALUE; // As small as possible
    227 
    228         SuggestionsGatherer(final String originalText, final float suggestionThreshold,
    229                 final float recommendedThreshold, final int maxLength) {
    230             mOriginalText = originalText;
    231             mSuggestionThreshold = suggestionThreshold;
    232             mRecommendedThreshold = recommendedThreshold;
    233             mMaxLength = maxLength;
    234             mSuggestions = CollectionUtils.newArrayList(maxLength + 1);
    235             mScores = new int[mMaxLength];
    236         }
    237 
    238         synchronized public boolean addWord(char[] word, int[] spaceIndices, int wordOffset,
    239                 int wordLength, int score) {
    240             final int positionIndex = Arrays.binarySearch(mScores, 0, mLength, score);
    241             // binarySearch returns the index if the element exists, and -<insertion index> - 1
    242             // if it doesn't. See documentation for binarySearch.
    243             final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
    244 
    245             if (insertIndex == 0 && mLength >= mMaxLength) {
    246                 // In the future, we may want to keep track of the best suggestion score even if
    247                 // we are asked for 0 suggestions. In this case, we can use the following
    248                 // (tested) code to keep it:
    249                 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
    250                 // then we need to keep track of the best suggestion in mBestScore and
    251                 // mBestSuggestion. This is so that we know whether the best suggestion makes
    252                 // the score cutoff, since we need to know that to return a meaningful
    253                 // looksLikeTypo.
    254                 // if (0 >= mMaxLength) {
    255                 //     if (score > mBestScore) {
    256                 //         mBestScore = score;
    257                 //         mBestSuggestion = new String(word, wordOffset, wordLength);
    258                 //     }
    259                 // }
    260                 return true;
    261             }
    262             if (insertIndex >= mMaxLength) {
    263                 // We found a suggestion, but its score is too weak to be kept considering
    264                 // the suggestion limit.
    265                 return true;
    266             }
    267 
    268             // Compute the normalized score and skip this word if it's normalized score does not
    269             // make the threshold.
    270             final String wordString = new String(word, wordOffset, wordLength);
    271             final float normalizedScore =
    272                     BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score);
    273             if (normalizedScore < mSuggestionThreshold) {
    274                 if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
    275                 return true;
    276             }
    277 
    278             if (mLength < mMaxLength) {
    279                 final int copyLen = mLength - insertIndex;
    280                 ++mLength;
    281                 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
    282                 mSuggestions.add(insertIndex, wordString);
    283             } else {
    284                 System.arraycopy(mScores, 1, mScores, 0, insertIndex);
    285                 mSuggestions.add(insertIndex, wordString);
    286                 mSuggestions.remove(0);
    287             }
    288             mScores[insertIndex] = score;
    289 
    290             return true;
    291         }
    292 
    293         public Result getResults(final int capitalizeType, final Locale locale) {
    294             final String[] gatheredSuggestions;
    295             final boolean hasRecommendedSuggestions;
    296             if (0 == mLength) {
    297                 // Either we found no suggestions, or we found some BUT the max length was 0.
    298                 // If we found some mBestSuggestion will not be null. If it is null, then
    299                 // we found none, regardless of the max length.
    300                 if (null == mBestSuggestion) {
    301                     gatheredSuggestions = null;
    302                     hasRecommendedSuggestions = false;
    303                 } else {
    304                     gatheredSuggestions = EMPTY_STRING_ARRAY;
    305                     final float normalizedScore = BinaryDictionary.calcNormalizedScore(
    306                             mOriginalText, mBestSuggestion, mBestScore);
    307                     hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
    308                 }
    309             } else {
    310                 if (DBG) {
    311                     if (mLength != mSuggestions.size()) {
    312                         Log.e(TAG, "Suggestion size is not the same as stored mLength");
    313                     }
    314                     for (int i = mLength - 1; i >= 0; --i) {
    315                         Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
    316                     }
    317                 }
    318                 Collections.reverse(mSuggestions);
    319                 StringUtils.removeDupes(mSuggestions);
    320                 if (CAPITALIZE_ALL == capitalizeType) {
    321                     for (int i = 0; i < mSuggestions.size(); ++i) {
    322                         // get(i) returns a CharSequence which is actually a String so .toString()
    323                         // should return the same object.
    324                         mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
    325                     }
    326                 } else if (CAPITALIZE_FIRST == capitalizeType) {
    327                     for (int i = 0; i < mSuggestions.size(); ++i) {
    328                         // Likewise
    329                         mSuggestions.set(i, StringUtils.toTitleCase(
    330                                 mSuggestions.get(i).toString(), locale));
    331                     }
    332                 }
    333                 // This returns a String[], while toArray() returns an Object[] which cannot be cast
    334                 // into a String[].
    335                 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
    336 
    337                 final int bestScore = mScores[mLength - 1];
    338                 final CharSequence bestSuggestion = mSuggestions.get(0);
    339                 final float normalizedScore =
    340                         BinaryDictionary.calcNormalizedScore(
    341                                 mOriginalText, bestSuggestion.toString(), bestScore);
    342                 hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
    343                 if (DBG) {
    344                     Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
    345                     Log.i(TAG, "Normalized score = " + normalizedScore
    346                             + " (threshold " + mRecommendedThreshold
    347                             + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions);
    348                 }
    349             }
    350             return new Result(gatheredSuggestions, hasRecommendedSuggestions);
    351         }
    352     }
    353 
    354     @Override
    355     public boolean onUnbind(final Intent intent) {
    356         closeAllDictionaries();
    357         return false;
    358     }
    359 
    360     private void closeAllDictionaries() {
    361         final Map<String, DictionaryPool> oldPools = mDictionaryPools;
    362         mDictionaryPools = CollectionUtils.newSynchronizedTreeMap();
    363         final Map<String, UserBinaryDictionary> oldUserDictionaries = mUserDictionaries;
    364         mUserDictionaries = CollectionUtils.newSynchronizedTreeMap();
    365         new Thread("spellchecker_close_dicts") {
    366             @Override
    367             public void run() {
    368                 for (DictionaryPool pool : oldPools.values()) {
    369                     pool.close();
    370                 }
    371                 for (Dictionary dict : oldUserDictionaries.values()) {
    372                     dict.close();
    373                 }
    374                 synchronized (mUseContactsLock) {
    375                     if (null != mContactsDictionary) {
    376                         // The synchronously loaded contacts dictionary should have been in one
    377                         // or several pools, but it is shielded against multiple closing and it's
    378                         // safe to call it several times.
    379                         final ContactsBinaryDictionary dictToClose = mContactsDictionary;
    380                         // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY
    381                         // is no longer needed
    382                         mContactsDictionary = null;
    383                         dictToClose.close();
    384                     }
    385                 }
    386             }
    387         }.start();
    388     }
    389 
    390     public DictionaryPool getDictionaryPool(final String locale) {
    391         DictionaryPool pool = mDictionaryPools.get(locale);
    392         if (null == pool) {
    393             final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
    394             pool = new DictionaryPool(POOL_SIZE, this, localeObject);
    395             mDictionaryPools.put(locale, pool);
    396         }
    397         return pool;
    398     }
    399 
    400     public DictAndProximity createDictAndProximity(final Locale locale) {
    401         final int script = getScriptFromLocale(locale);
    402         final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(
    403                 SpellCheckerProximityInfo.getProximityForScript(script),
    404                 SpellCheckerProximityInfo.ROW_SIZE,
    405                 SpellCheckerProximityInfo.PROXIMITY_GRID_WIDTH,
    406                 SpellCheckerProximityInfo.PROXIMITY_GRID_HEIGHT);
    407         final DictionaryCollection dictionaryCollection =
    408                 DictionaryFactory.createMainDictionaryFromManager(this, locale,
    409                         true /* useFullEditDistance */);
    410         final String localeStr = locale.toString();
    411         UserBinaryDictionary userDictionary = mUserDictionaries.get(localeStr);
    412         if (null == userDictionary) {
    413             userDictionary = new SynchronouslyLoadedUserBinaryDictionary(this, localeStr, true);
    414             mUserDictionaries.put(localeStr, userDictionary);
    415         }
    416         dictionaryCollection.addDictionary(userDictionary);
    417         synchronized (mUseContactsLock) {
    418             if (mUseContactsDictionary) {
    419                 if (null == mContactsDictionary) {
    420                     // TODO: use the right locale. We can't do it right now because the
    421                     // spell checker is reusing the contacts dictionary across sessions
    422                     // without regard for their locale, so we need to fix that first.
    423                     mContactsDictionary = new SynchronouslyLoadedContactsBinaryDictionary(this,
    424                             Locale.getDefault());
    425                 }
    426             }
    427             dictionaryCollection.addDictionary(mContactsDictionary);
    428             mDictionaryCollectionsList.add(
    429                     new WeakReference<DictionaryCollection>(dictionaryCollection));
    430         }
    431         return new DictAndProximity(dictionaryCollection, proximityInfo);
    432     }
    433 
    434     // This method assumes the text is not empty or null.
    435     public static int getCapitalizationType(String text) {
    436         // If the first char is not uppercase, then the word is either all lower case,
    437         // and in either case we return CAPITALIZE_NONE.
    438         if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
    439         final int len = text.length();
    440         int capsCount = 1;
    441         for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) {
    442             if (1 != capsCount && i != capsCount) break;
    443             if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
    444         }
    445         // We know the first char is upper case. So we want to test if either everything
    446         // else is lower case, or if everything else is upper case. If the string is
    447         // exactly one char long, then we will arrive here with capsCount 1, and this is
    448         // correct, too.
    449         if (1 == capsCount) return CAPITALIZE_FIRST;
    450         return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
    451     }
    452 }
    453