Home | History | Annotate | Download | only in spellcheck
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.spellcheck;
     18 
     19 import android.content.Intent;
     20 import android.content.res.Resources;
     21 import android.service.textservice.SpellCheckerService;
     22 import android.text.TextUtils;
     23 import android.util.Log;
     24 import android.view.textservice.SuggestionsInfo;
     25 import android.view.textservice.TextInfo;
     26 
     27 import com.android.inputmethod.compat.ArraysCompatUtils;
     28 import com.android.inputmethod.keyboard.ProximityInfo;
     29 import com.android.inputmethod.latin.BinaryDictionary;
     30 import com.android.inputmethod.latin.Dictionary;
     31 import com.android.inputmethod.latin.Dictionary.DataType;
     32 import com.android.inputmethod.latin.Dictionary.WordCallback;
     33 import com.android.inputmethod.latin.DictionaryCollection;
     34 import com.android.inputmethod.latin.DictionaryFactory;
     35 import com.android.inputmethod.latin.Flag;
     36 import com.android.inputmethod.latin.LocaleUtils;
     37 import com.android.inputmethod.latin.R;
     38 import com.android.inputmethod.latin.SynchronouslyLoadedContactsDictionary;
     39 import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary;
     40 import com.android.inputmethod.latin.Utils;
     41 import com.android.inputmethod.latin.WhitelistDictionary;
     42 import com.android.inputmethod.latin.WordComposer;
     43 
     44 import java.util.ArrayList;
     45 import java.util.Arrays;
     46 import java.util.Collections;
     47 import java.util.Locale;
     48 import java.util.Map;
     49 import java.util.TreeMap;
     50 
     51 /**
     52  * Service for spell checking, using LatinIME's dictionaries and mechanisms.
     53  */
     54 public class AndroidSpellCheckerService extends SpellCheckerService {
     55     private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
     56     private static final boolean DBG = false;
     57     private static final int POOL_SIZE = 2;
     58 
     59     private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
     60     private static final int CAPITALIZE_FIRST = 1; // First only
     61     private static final int CAPITALIZE_ALL = 2; // All caps
     62 
     63     private final static String[] EMPTY_STRING_ARRAY = new String[0];
     64     private final static Flag[] USE_FULL_EDIT_DISTANCE_FLAG_ARRAY;
     65     static {
     66         // See BinaryDictionary.java for an explanation of these flags
     67         // Specifially, ALL_CONFIG_FLAGS means that we want to consider all flags with the
     68         // current dictionary configuration - for example, consider the UMLAUT flag
     69         // so that it will be turned on for German dictionaries and off for others.
     70         USE_FULL_EDIT_DISTANCE_FLAG_ARRAY = Arrays.copyOf(BinaryDictionary.ALL_CONFIG_FLAGS,
     71                 BinaryDictionary.ALL_CONFIG_FLAGS.length + 1);
     72         USE_FULL_EDIT_DISTANCE_FLAG_ARRAY[BinaryDictionary.ALL_CONFIG_FLAGS.length] =
     73                 BinaryDictionary.FLAG_USE_FULL_EDIT_DISTANCE;
     74     }
     75     private Map<String, DictionaryPool> mDictionaryPools =
     76             Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
     77     private Map<String, Dictionary> mUserDictionaries =
     78             Collections.synchronizedMap(new TreeMap<String, Dictionary>());
     79     private Map<String, Dictionary> mWhitelistDictionaries =
     80             Collections.synchronizedMap(new TreeMap<String, Dictionary>());
     81     private SynchronouslyLoadedContactsDictionary mContactsDictionary;
     82 
     83     // The threshold for a candidate to be offered as a suggestion.
     84     private double mSuggestionThreshold;
     85     // The threshold for a suggestion to be considered "likely".
     86     private double mLikelyThreshold;
     87 
     88     @Override public void onCreate() {
     89         super.onCreate();
     90         mSuggestionThreshold =
     91                 Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value));
     92         mLikelyThreshold =
     93                 Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value));
     94     }
     95 
     96     @Override
     97     public Session createSession() {
     98         return new AndroidSpellCheckerSession(this);
     99     }
    100 
    101     private static SuggestionsInfo getNotInDictEmptySuggestions() {
    102         return new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
    103     }
    104 
    105     private static SuggestionsInfo getInDictEmptySuggestions() {
    106         return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
    107                 EMPTY_STRING_ARRAY);
    108     }
    109 
    110     private static class SuggestionsGatherer implements WordCallback {
    111         public static class Result {
    112             public final String[] mSuggestions;
    113             public final boolean mHasLikelySuggestions;
    114             public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) {
    115                 mSuggestions = gatheredSuggestions;
    116                 mHasLikelySuggestions = hasLikelySuggestions;
    117             }
    118         }
    119 
    120         private final ArrayList<CharSequence> mSuggestions;
    121         private final int[] mScores;
    122         private final String mOriginalText;
    123         private final double mSuggestionThreshold;
    124         private final double mLikelyThreshold;
    125         private final int mMaxLength;
    126         private int mLength = 0;
    127 
    128         // The two following attributes are only ever filled if the requested max length
    129         // is 0 (or less, which is treated the same).
    130         private String mBestSuggestion = null;
    131         private int mBestScore = Integer.MIN_VALUE; // As small as possible
    132 
    133         SuggestionsGatherer(final String originalText, final double suggestionThreshold,
    134                 final double likelyThreshold, final int maxLength) {
    135             mOriginalText = originalText;
    136             mSuggestionThreshold = suggestionThreshold;
    137             mLikelyThreshold = likelyThreshold;
    138             mMaxLength = maxLength;
    139             mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
    140             mScores = new int[mMaxLength];
    141         }
    142 
    143         @Override
    144         synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score,
    145                 int dicTypeId, DataType dataType) {
    146             final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score);
    147             // binarySearch returns the index if the element exists, and -<insertion index> - 1
    148             // if it doesn't. See documentation for binarySearch.
    149             final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
    150 
    151             if (insertIndex == 0 && mLength >= mMaxLength) {
    152                 // In the future, we may want to keep track of the best suggestion score even if
    153                 // we are asked for 0 suggestions. In this case, we can use the following
    154                 // (tested) code to keep it:
    155                 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
    156                 // then we need to keep track of the best suggestion in mBestScore and
    157                 // mBestSuggestion. This is so that we know whether the best suggestion makes
    158                 // the score cutoff, since we need to know that to return a meaningful
    159                 // looksLikeTypo.
    160                 // if (0 >= mMaxLength) {
    161                 //     if (score > mBestScore) {
    162                 //         mBestScore = score;
    163                 //         mBestSuggestion = new String(word, wordOffset, wordLength);
    164                 //     }
    165                 // }
    166                 return true;
    167             }
    168             if (insertIndex >= mMaxLength) {
    169                 // We found a suggestion, but its score is too weak to be kept considering
    170                 // the suggestion limit.
    171                 return true;
    172             }
    173 
    174             // Compute the normalized score and skip this word if it's normalized score does not
    175             // make the threshold.
    176             final String wordString = new String(word, wordOffset, wordLength);
    177             final double normalizedScore =
    178                     Utils.calcNormalizedScore(mOriginalText, wordString, score);
    179             if (normalizedScore < mSuggestionThreshold) {
    180                 if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
    181                 return true;
    182             }
    183 
    184             if (mLength < mMaxLength) {
    185                 final int copyLen = mLength - insertIndex;
    186                 ++mLength;
    187                 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
    188                 mSuggestions.add(insertIndex, wordString);
    189             } else {
    190                 System.arraycopy(mScores, 1, mScores, 0, insertIndex);
    191                 mSuggestions.add(insertIndex, wordString);
    192                 mSuggestions.remove(0);
    193             }
    194             mScores[insertIndex] = score;
    195 
    196             return true;
    197         }
    198 
    199         public Result getResults(final int capitalizeType, final Locale locale) {
    200             final String[] gatheredSuggestions;
    201             final boolean hasLikelySuggestions;
    202             if (0 == mLength) {
    203                 // Either we found no suggestions, or we found some BUT the max length was 0.
    204                 // If we found some mBestSuggestion will not be null. If it is null, then
    205                 // we found none, regardless of the max length.
    206                 if (null == mBestSuggestion) {
    207                     gatheredSuggestions = null;
    208                     hasLikelySuggestions = false;
    209                 } else {
    210                     gatheredSuggestions = EMPTY_STRING_ARRAY;
    211                     final double normalizedScore =
    212                             Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore);
    213                     hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
    214                 }
    215             } else {
    216                 if (DBG) {
    217                     if (mLength != mSuggestions.size()) {
    218                         Log.e(TAG, "Suggestion size is not the same as stored mLength");
    219                     }
    220                     for (int i = mLength - 1; i >= 0; --i) {
    221                         Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
    222                     }
    223                 }
    224                 Collections.reverse(mSuggestions);
    225                 Utils.removeDupes(mSuggestions);
    226                 if (CAPITALIZE_ALL == capitalizeType) {
    227                     for (int i = 0; i < mSuggestions.size(); ++i) {
    228                         // get(i) returns a CharSequence which is actually a String so .toString()
    229                         // should return the same object.
    230                         mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
    231                     }
    232                 } else if (CAPITALIZE_FIRST == capitalizeType) {
    233                     for (int i = 0; i < mSuggestions.size(); ++i) {
    234                         // Likewise
    235                         mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(),
    236                                 locale));
    237                     }
    238                 }
    239                 // This returns a String[], while toArray() returns an Object[] which cannot be cast
    240                 // into a String[].
    241                 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
    242 
    243                 final int bestScore = mScores[mLength - 1];
    244                 final CharSequence bestSuggestion = mSuggestions.get(0);
    245                 final double normalizedScore =
    246                         Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore);
    247                 hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
    248                 if (DBG) {
    249                     Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
    250                     Log.i(TAG, "Normalized score = " + normalizedScore
    251                             + " (threshold " + mLikelyThreshold
    252                             + ") => hasLikelySuggestions = " + hasLikelySuggestions);
    253                 }
    254             }
    255             return new Result(gatheredSuggestions, hasLikelySuggestions);
    256         }
    257     }
    258 
    259     @Override
    260     public boolean onUnbind(final Intent intent) {
    261         final Map<String, DictionaryPool> oldPools = mDictionaryPools;
    262         mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
    263         final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries;
    264         mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
    265         final Map<String, Dictionary> oldWhitelistDictionaries = mWhitelistDictionaries;
    266         mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
    267         for (DictionaryPool pool : oldPools.values()) {
    268             pool.close();
    269         }
    270         for (Dictionary dict : oldUserDictionaries.values()) {
    271             dict.close();
    272         }
    273         for (Dictionary dict : oldWhitelistDictionaries.values()) {
    274             dict.close();
    275         }
    276         if (null != mContactsDictionary) {
    277             // The synchronously loaded contacts dictionary should have been in one
    278             // or several pools, but it is shielded against multiple closing and it's
    279             // safe to call it several times.
    280             final SynchronouslyLoadedContactsDictionary dictToClose = mContactsDictionary;
    281             mContactsDictionary = null;
    282             dictToClose.close();
    283         }
    284         return false;
    285     }
    286 
    287     private DictionaryPool getDictionaryPool(final String locale) {
    288         DictionaryPool pool = mDictionaryPools.get(locale);
    289         if (null == pool) {
    290             final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
    291             pool = new DictionaryPool(POOL_SIZE, this, localeObject);
    292             mDictionaryPools.put(locale, pool);
    293         }
    294         return pool;
    295     }
    296 
    297     public DictAndProximity createDictAndProximity(final Locale locale) {
    298         final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo();
    299         final Resources resources = getResources();
    300         final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
    301         final DictionaryCollection dictionaryCollection =
    302                 DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId,
    303                         USE_FULL_EDIT_DISTANCE_FLAG_ARRAY);
    304         final String localeStr = locale.toString();
    305         Dictionary userDictionary = mUserDictionaries.get(localeStr);
    306         if (null == userDictionary) {
    307             userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true);
    308             mUserDictionaries.put(localeStr, userDictionary);
    309         }
    310         dictionaryCollection.addDictionary(userDictionary);
    311         Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr);
    312         if (null == whitelistDictionary) {
    313             whitelistDictionary = new WhitelistDictionary(this, locale);
    314             mWhitelistDictionaries.put(localeStr, whitelistDictionary);
    315         }
    316         dictionaryCollection.addDictionary(whitelistDictionary);
    317         if (null == mContactsDictionary) {
    318             mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this);
    319         }
    320         // TODO: add a setting to use or not contacts when checking spelling
    321         dictionaryCollection.addDictionary(mContactsDictionary);
    322         return new DictAndProximity(dictionaryCollection, proximityInfo);
    323     }
    324 
    325     // This method assumes the text is not empty or null.
    326     private static int getCapitalizationType(String text) {
    327         // If the first char is not uppercase, then the word is either all lower case,
    328         // and in either case we return CAPITALIZE_NONE.
    329         if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
    330         final int len = text.codePointCount(0, text.length());
    331         int capsCount = 1;
    332         for (int i = 1; i < len; ++i) {
    333             if (1 != capsCount && i != capsCount) break;
    334             if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
    335         }
    336         // We know the first char is upper case. So we want to test if either everything
    337         // else is lower case, or if everything else is upper case. If the string is
    338         // exactly one char long, then we will arrive here with capsCount 1, and this is
    339         // correct, too.
    340         if (1 == capsCount) return CAPITALIZE_FIRST;
    341         return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
    342     }
    343 
    344     private static class AndroidSpellCheckerSession extends Session {
    345         // Immutable, but need the locale which is not available in the constructor yet
    346         private DictionaryPool mDictionaryPool;
    347         // Likewise
    348         private Locale mLocale;
    349 
    350         private final AndroidSpellCheckerService mService;
    351 
    352         AndroidSpellCheckerSession(final AndroidSpellCheckerService service) {
    353             mService = service;
    354         }
    355 
    356         @Override
    357         public void onCreate() {
    358             final String localeString = getLocale();
    359             mDictionaryPool = mService.getDictionaryPool(localeString);
    360             mLocale = LocaleUtils.constructLocaleFromString(localeString);
    361         }
    362 
    363         /**
    364          * Finds out whether a particular string should be filtered out of spell checking.
    365          *
    366          * This will loosely match URLs, numbers, symbols.
    367          *
    368          * @param text the string to evaluate.
    369          * @return true if we should filter this text out, false otherwise
    370          */
    371         private boolean shouldFilterOut(final String text) {
    372             if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
    373 
    374             // TODO: check if an equivalent processing can't be done more quickly with a
    375             // compiled regexp.
    376             // Filter by first letter
    377             final int firstCodePoint = text.codePointAt(0);
    378             // Filter out words that don't start with a letter or an apostrophe
    379             if (!Character.isLetter(firstCodePoint)
    380                     && '\'' != firstCodePoint) return true;
    381 
    382             // Filter contents
    383             final int length = text.length();
    384             int letterCount = 0;
    385             for (int i = 0; i < length; ++i) {
    386                 final int codePoint = text.codePointAt(i);
    387                 // Any word containing a '@' is probably an e-mail address
    388                 // Any word containing a '/' is probably either an ad-hoc combination of two
    389                 // words or a URI - in either case we don't want to spell check that
    390                 if ('@' == codePoint
    391                         || '/' == codePoint) return true;
    392                 if (Character.isLetter(codePoint)) ++letterCount;
    393             }
    394             // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
    395             // in this word are letters
    396             return (letterCount * 4 < length * 3);
    397         }
    398 
    399         // Note : this must be reentrant
    400         /**
    401          * Gets a list of suggestions for a specific string. This returns a list of possible
    402          * corrections for the text passed as an argument. It may split or group words, and
    403          * even perform grammatical analysis.
    404          */
    405         @Override
    406         public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
    407                 final int suggestionsLimit) {
    408             try {
    409                 final String text = textInfo.getText();
    410 
    411                 if (shouldFilterOut(text)) {
    412                     DictAndProximity dictInfo = null;
    413                     try {
    414                         dictInfo = mDictionaryPool.takeOrGetNull();
    415                         if (null == dictInfo) return getNotInDictEmptySuggestions();
    416                         return dictInfo.mDictionary.isValidWord(text) ? getInDictEmptySuggestions()
    417                                 : getNotInDictEmptySuggestions();
    418                     } finally {
    419                         if (null != dictInfo) {
    420                             if (!mDictionaryPool.offer(dictInfo)) {
    421                                 Log.e(TAG, "Can't re-insert a dictionary into its pool");
    422                             }
    423                         }
    424                     }
    425                 }
    426 
    427                 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
    428                 final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
    429                         mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit);
    430                 final WordComposer composer = new WordComposer();
    431                 final int length = text.length();
    432                 for (int i = 0; i < length; ++i) {
    433                     final int character = text.codePointAt(i);
    434                     final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character);
    435                     final int[] proximities;
    436                     if (-1 == proximityIndex) {
    437                         proximities = new int[] { character };
    438                     } else {
    439                         proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY,
    440                                 proximityIndex,
    441                                 proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
    442                     }
    443                     composer.add(character, proximities,
    444                             WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE);
    445                 }
    446 
    447                 final int capitalizeType = getCapitalizationType(text);
    448                 boolean isInDict = true;
    449                 DictAndProximity dictInfo = null;
    450                 try {
    451                     dictInfo = mDictionaryPool.takeOrGetNull();
    452                     if (null == dictInfo) return getNotInDictEmptySuggestions();
    453                     dictInfo.mDictionary.getWords(composer, suggestionsGatherer,
    454                             dictInfo.mProximityInfo);
    455                     isInDict = dictInfo.mDictionary.isValidWord(text);
    456                     if (!isInDict && CAPITALIZE_NONE != capitalizeType) {
    457                         // We want to test the word again if it's all caps or first caps only.
    458                         // If it's fully down, we already tested it, if it's mixed case, we don't
    459                         // want to test a lowercase version of it.
    460                         isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
    461                     }
    462                 } finally {
    463                     if (null != dictInfo) {
    464                         if (!mDictionaryPool.offer(dictInfo)) {
    465                             Log.e(TAG, "Can't re-insert a dictionary into its pool");
    466                         }
    467                     }
    468                 }
    469 
    470                 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
    471                         capitalizeType, mLocale);
    472 
    473                 if (DBG) {
    474                     Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
    475                             + suggestionsLimit);
    476                     Log.i(TAG, "IsInDict = " + isInDict);
    477                     Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
    478                     Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions);
    479                     if (null != result.mSuggestions) {
    480                         for (String suggestion : result.mSuggestions) {
    481                             Log.i(TAG, suggestion);
    482                         }
    483                     }
    484                 }
    485 
    486                 // TODO: actually use result.mHasLikelySuggestions
    487                 final int flags =
    488                         (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
    489                                 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO);
    490                 return new SuggestionsInfo(flags, result.mSuggestions);
    491             } catch (RuntimeException e) {
    492                 // Don't kill the keyboard if there is a bug in the spell checker
    493                 if (DBG) {
    494                     throw e;
    495                 } else {
    496                     Log.e(TAG, "Exception while spellcheking: " + e);
    497                     return getNotInDictEmptySuggestions();
    498                 }
    499             }
    500         }
    501     }
    502 }
    503