Home | History | Annotate | Download | only in spellcheck
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.spellcheck;
     18 
     19 import android.content.ContentResolver;
     20 import android.database.ContentObserver;
     21 import android.os.Binder;
     22 import android.provider.UserDictionary.Words;
     23 import android.service.textservice.SpellCheckerService.Session;
     24 import android.text.TextUtils;
     25 import android.util.Log;
     26 import android.util.LruCache;
     27 import android.view.textservice.SuggestionsInfo;
     28 import android.view.textservice.TextInfo;
     29 
     30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
     31 import com.android.inputmethod.keyboard.Keyboard;
     32 import com.android.inputmethod.latin.NgramContext;
     33 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     34 import com.android.inputmethod.latin.WordComposer;
     35 import com.android.inputmethod.latin.common.Constants;
     36 import com.android.inputmethod.latin.common.LocaleUtils;
     37 import com.android.inputmethod.latin.common.StringUtils;
     38 import com.android.inputmethod.latin.define.DebugFlags;
     39 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
     40 import com.android.inputmethod.latin.utils.ScriptUtils;
     41 import com.android.inputmethod.latin.utils.StatsUtils;
     42 import com.android.inputmethod.latin.utils.SuggestionResults;
     43 
     44 import java.util.ArrayList;
     45 import java.util.List;
     46 import java.util.Locale;
     47 
     48 public abstract class AndroidWordLevelSpellCheckerSession extends Session {
     49     private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName();
     50 
     51     public final static String[] EMPTY_STRING_ARRAY = new String[0];
     52 
     53     // Immutable, but not available in the constructor.
     54     private Locale mLocale;
     55     // Cache this for performance
     56     private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
     57     private final AndroidSpellCheckerService mService;
     58     protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
     59     private final ContentObserver mObserver;
     60 
     61     private static final String quotesRegexp =
     62             "(\\u0022|\\u0027|\\u0060|\\u00B4|\\u2018|\\u2018|\\u201C|\\u201D)";
     63 
     64     private static final class SuggestionsParams {
     65         public final String[] mSuggestions;
     66         public final int mFlags;
     67         public SuggestionsParams(String[] suggestions, int flags) {
     68             mSuggestions = suggestions;
     69             mFlags = flags;
     70         }
     71     }
     72 
     73     protected static final class SuggestionsCache {
     74         private static final int MAX_CACHE_SIZE = 50;
     75         private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
     76                 new LruCache<>(MAX_CACHE_SIZE);
     77 
     78         private static String generateKey(final String query) {
     79             return query + "";
     80         }
     81 
     82         public SuggestionsParams getSuggestionsFromCache(final String query) {
     83             return mUnigramSuggestionsInfoCache.get(query);
     84         }
     85 
     86         public void putSuggestionsToCache(
     87                 final String query, final String[] suggestions, final int flags) {
     88             if (suggestions == null || TextUtils.isEmpty(query)) {
     89                 return;
     90             }
     91             mUnigramSuggestionsInfoCache.put(
     92                     generateKey(query),
     93                     new SuggestionsParams(suggestions, flags));
     94         }
     95 
     96         public void clearCache() {
     97             mUnigramSuggestionsInfoCache.evictAll();
     98         }
     99     }
    100 
    101     AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) {
    102         mService = service;
    103         final ContentResolver cres = service.getContentResolver();
    104 
    105         mObserver = new ContentObserver(null) {
    106             @Override
    107             public void onChange(boolean self) {
    108                 mSuggestionsCache.clearCache();
    109             }
    110         };
    111         cres.registerContentObserver(Words.CONTENT_URI, true, mObserver);
    112     }
    113 
    114     @Override
    115     public void onCreate() {
    116         final String localeString = getLocale();
    117         mLocale = (null == localeString) ? null
    118                 : LocaleUtils.constructLocaleFromString(localeString);
    119         mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale);
    120     }
    121 
    122     @Override
    123     public void onClose() {
    124         final ContentResolver cres = mService.getContentResolver();
    125         cres.unregisterContentObserver(mObserver);
    126     }
    127 
    128     private static final int CHECKABILITY_CHECKABLE = 0;
    129     private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1;
    130     private static final int CHECKABILITY_CONTAINS_PERIOD = 2;
    131     private static final int CHECKABILITY_EMAIL_OR_URL = 3;
    132     private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4;
    133     private static final int CHECKABILITY_TOO_SHORT = 5;
    134     /**
    135      * Finds out whether a particular string should be filtered out of spell checking.
    136      *
    137      * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
    138      * we know we will never recognize, this accepts a script identifier that should be one
    139      * of the SCRIPT_* constants defined above, to rule out quickly characters from very
    140      * different languages.
    141      *
    142      * @param text the string to evaluate.
    143      * @param script the identifier for the script this spell checker recognizes
    144      * @return one of the FILTER_OUT_* constants above.
    145      */
    146     private static int getCheckabilityInScript(final String text, final int script) {
    147         if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT;
    148 
    149         // TODO: check if an equivalent processing can't be done more quickly with a
    150         // compiled regexp.
    151         // Filter by first letter
    152         final int firstCodePoint = text.codePointAt(0);
    153         // Filter out words that don't start with a letter or an apostrophe
    154         if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
    155                 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
    156 
    157         // Filter contents
    158         final int length = text.length();
    159         int letterCount = 0;
    160         for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
    161             final int codePoint = text.codePointAt(i);
    162             // Any word containing a COMMERCIAL_AT is probably an e-mail address
    163             // Any word containing a SLASH is probably either an ad-hoc combination of two
    164             // words or a URI - in either case we don't want to spell check that
    165             if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
    166                 return CHECKABILITY_EMAIL_OR_URL;
    167             }
    168             // If the string contains a period, native returns strange suggestions (it seems
    169             // to return suggestions for everything up to the period only and to ignore the
    170             // rest), so we suppress lookup if there is a period.
    171             // TODO: investigate why native returns these suggestions and remove this code.
    172             if (Constants.CODE_PERIOD == codePoint) {
    173                 return CHECKABILITY_CONTAINS_PERIOD;
    174             }
    175             if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
    176         }
    177         // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
    178         // in this word are letters
    179         return (letterCount * 4 < length * 3)
    180                 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE;
    181     }
    182 
    183     /**
    184      * Helper method to test valid capitalizations of a word.
    185      *
    186      * If the "text" is lower-case, we test only the exact string.
    187      * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased
    188      *  version of it "text".
    189      * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased
    190      *  version of it "text" and the capitalized version of it "Text".
    191      */
    192     private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) {
    193         // If the word is in there as is, then it's in the dictionary. If not, we'll test lower
    194         // case versions, but only if the word is not already all-lower case or mixed case.
    195         if (mService.isValidWord(mLocale, text)) return true;
    196         if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false;
    197 
    198         // If we come here, we have a capitalized word (either First- or All-).
    199         // Downcase the word and look it up again. If the word is only capitalized, we
    200         // tested all possibilities, so if it's still negative we can return false.
    201         final String lowerCaseText = text.toLowerCase(mLocale);
    202         if (mService.isValidWord(mLocale, lowerCaseText)) return true;
    203         if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false;
    204 
    205         // If the lower case version is not in the dictionary, it's still possible
    206         // that we have an all-caps version of a word that needs to be capitalized
    207         // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans".
    208         return mService.isValidWord(mLocale,
    209                 StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale));
    210     }
    211 
    212     // Note : this must be reentrant
    213     /**
    214      * Gets a list of suggestions for a specific string. This returns a list of possible
    215      * corrections for the text passed as an argument. It may split or group words, and
    216      * even perform grammatical analysis.
    217      */
    218     private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo,
    219             final int suggestionsLimit) {
    220         return onGetSuggestionsInternal(textInfo, null, suggestionsLimit);
    221     }
    222 
    223     protected SuggestionsInfo onGetSuggestionsInternal(
    224             final TextInfo textInfo, final NgramContext ngramContext, final int suggestionsLimit) {
    225         try {
    226             final String text = textInfo.getText().
    227                     replaceAll(AndroidSpellCheckerService.APOSTROPHE,
    228                             AndroidSpellCheckerService.SINGLE_QUOTE).
    229                     replaceAll("^" + quotesRegexp, "").
    230                     replaceAll(quotesRegexp + "$", "");
    231 
    232             if (!mService.hasMainDictionaryForLocale(mLocale)) {
    233                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    234                         false /* reportAsTypo */);
    235             }
    236 
    237             // Handle special patterns like email, URI, telephone number.
    238             final int checkability = getCheckabilityInScript(text, mScript);
    239             if (CHECKABILITY_CHECKABLE != checkability) {
    240                 if (CHECKABILITY_CONTAINS_PERIOD == checkability) {
    241                     final String[] splitText = text.split(Constants.REGEXP_PERIOD);
    242                     boolean allWordsAreValid = true;
    243                     for (final String word : splitText) {
    244                         if (!mService.isValidWord(mLocale, word)) {
    245                             allWordsAreValid = false;
    246                             break;
    247                         }
    248                     }
    249                     if (allWordsAreValid) {
    250                         return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
    251                                 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS,
    252                                 new String[] {
    253                                         TextUtils.join(Constants.STRING_SPACE, splitText) });
    254                     }
    255                 }
    256                 return mService.isValidWord(mLocale, text) ?
    257                         AndroidSpellCheckerService.getInDictEmptySuggestions() :
    258                         AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    259                                 CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */);
    260             }
    261 
    262             // Handle normal words.
    263             final int capitalizeType = StringUtils.getCapitalizationType(text);
    264 
    265             if (isInDictForAnyCapitalization(text, capitalizeType)) {
    266                 if (DebugFlags.DEBUG_ENABLED) {
    267                     Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is a valid word");
    268                 }
    269                 return AndroidSpellCheckerService.getInDictEmptySuggestions();
    270             }
    271             if (DebugFlags.DEBUG_ENABLED) {
    272                 Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is NOT a valid word");
    273             }
    274 
    275             final Keyboard keyboard = mService.getKeyboardForLocale(mLocale);
    276             if (null == keyboard) {
    277                 Log.w(TAG, "onGetSuggestionsInternal() : No keyboard for locale: " + mLocale);
    278                 // If there is no keyboard for this locale, don't do any spell-checking.
    279                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    280                         false /* reportAsTypo */);
    281             }
    282 
    283             final WordComposer composer = new WordComposer();
    284             final int[] codePoints = StringUtils.toCodePointArray(text);
    285             final int[] coordinates;
    286             coordinates = keyboard.getCoordinates(codePoints);
    287             composer.setComposingWord(codePoints, coordinates);
    288             // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
    289             final SuggestionResults suggestionResults = mService.getSuggestionResults(
    290                     mLocale, composer.getComposedDataSnapshot(), ngramContext, keyboard);
    291             final Result result = getResult(capitalizeType, mLocale, suggestionsLimit,
    292                     mService.getRecommendedThreshold(), text, suggestionResults);
    293             if (DebugFlags.DEBUG_ENABLED) {
    294                 if (result.mSuggestions != null && result.mSuggestions.length > 0) {
    295                     final StringBuilder builder = new StringBuilder();
    296                     for (String suggestion : result.mSuggestions) {
    297                         builder.append(" [");
    298                         builder.append(suggestion);
    299                         builder.append("]");
    300                     }
    301                     Log.i(TAG, "onGetSuggestionsInternal() : Suggestions =" + builder);
    302                 }
    303             }
    304             // Handle word not in dictionary.
    305             // This is called only once per unique word, so entering multiple
    306             // instances of the same word does not result in more than one call
    307             // to this method.
    308             // Also, upon changing the orientation of the device, this is called
    309             // again for every unique invalid word in the text box.
    310             StatsUtils.onInvalidWordIdentification(text);
    311 
    312             final int flags =
    313                     SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
    314                     | (result.mHasRecommendedSuggestions
    315                             ? SuggestionsInfoCompatUtils
    316                                     .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
    317                             : 0);
    318             final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
    319             mSuggestionsCache.putSuggestionsToCache(text, result.mSuggestions, flags);
    320             return retval;
    321         } catch (RuntimeException e) {
    322             // Don't kill the keyboard if there is a bug in the spell checker
    323             Log.e(TAG, "Exception while spellchecking", e);
    324             return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    325                     false /* reportAsTypo */);
    326         }
    327     }
    328 
    329     private static final class Result {
    330         public final String[] mSuggestions;
    331         public final boolean mHasRecommendedSuggestions;
    332         public Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions) {
    333             mSuggestions = gatheredSuggestions;
    334             mHasRecommendedSuggestions = hasRecommendedSuggestions;
    335         }
    336     }
    337 
    338     private static Result getResult(final int capitalizeType, final Locale locale,
    339             final int suggestionsLimit, final float recommendedThreshold, final String originalText,
    340             final SuggestionResults suggestionResults) {
    341         if (suggestionResults.isEmpty() || suggestionsLimit <= 0) {
    342             return new Result(null /* gatheredSuggestions */,
    343                     false /* hasRecommendedSuggestions */);
    344         }
    345         final ArrayList<String> suggestions = new ArrayList<>();
    346         for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) {
    347             final String suggestion;
    348             if (StringUtils.CAPITALIZE_ALL == capitalizeType) {
    349                 suggestion = suggestedWordInfo.mWord.toUpperCase(locale);
    350             } else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) {
    351                 suggestion = StringUtils.capitalizeFirstCodePoint(
    352                         suggestedWordInfo.mWord, locale);
    353             } else {
    354                 suggestion = suggestedWordInfo.mWord;
    355             }
    356             suggestions.add(suggestion);
    357         }
    358         StringUtils.removeDupes(suggestions);
    359         // This returns a String[], while toArray() returns an Object[] which cannot be cast
    360         // into a String[].
    361         final List<String> gatheredSuggestionsList =
    362                 suggestions.subList(0, Math.min(suggestions.size(), suggestionsLimit));
    363         final String[] gatheredSuggestions =
    364                 gatheredSuggestionsList.toArray(new String[gatheredSuggestionsList.size()]);
    365 
    366         final int bestScore = suggestionResults.first().mScore;
    367         final String bestSuggestion = suggestions.get(0);
    368         final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
    369                 originalText, bestSuggestion, bestScore);
    370         final boolean hasRecommendedSuggestions = (normalizedScore > recommendedThreshold);
    371         return new Result(gatheredSuggestions, hasRecommendedSuggestions);
    372     }
    373 
    374     /*
    375      * The spell checker acts on its own behalf. That is needed, in particular, to be able to
    376      * access the dictionary files, which the provider restricts to the identity of Latin IME.
    377      * Since it's called externally by the application, the spell checker is using the identity
    378      * of the application by default unless we clearCallingIdentity.
    379      * That's what the following method does.
    380      */
    381     @Override
    382     public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit) {
    383         long ident = Binder.clearCallingIdentity();
    384         try {
    385             return onGetSuggestionsInternal(textInfo, suggestionsLimit);
    386         } finally {
    387             Binder.restoreCallingIdentity(ident);
    388         }
    389     }
    390 }
    391