Home | History | Annotate | Download | only in spellcheck
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.spellcheck;
     18 
     19 import android.content.ContentResolver;
     20 import android.database.ContentObserver;
     21 import android.os.Binder;
     22 import android.provider.UserDictionary.Words;
     23 import android.service.textservice.SpellCheckerService.Session;
     24 import android.text.TextUtils;
     25 import android.util.Log;
     26 import android.util.LruCache;
     27 import android.view.textservice.SuggestionsInfo;
     28 import android.view.textservice.TextInfo;
     29 
     30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
     31 import com.android.inputmethod.keyboard.Keyboard;
     32 import com.android.inputmethod.keyboard.ProximityInfo;
     33 import com.android.inputmethod.latin.Constants;
     34 import com.android.inputmethod.latin.PrevWordsInfo;
     35 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     36 import com.android.inputmethod.latin.WordComposer;
     37 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
     38 import com.android.inputmethod.latin.utils.CoordinateUtils;
     39 import com.android.inputmethod.latin.utils.LocaleUtils;
     40 import com.android.inputmethod.latin.utils.ScriptUtils;
     41 import com.android.inputmethod.latin.utils.StringUtils;
     42 import com.android.inputmethod.latin.utils.SuggestionResults;
     43 
     44 import java.util.ArrayList;
     45 import java.util.Locale;
     46 
     47 public abstract class AndroidWordLevelSpellCheckerSession extends Session {
     48     private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName();
     49     private static final boolean DBG = false;
     50 
     51     public final static String[] EMPTY_STRING_ARRAY = new String[0];
     52 
     53     // Immutable, but not available in the constructor.
     54     private Locale mLocale;
     55     // Cache this for performance
     56     private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
     57     private final AndroidSpellCheckerService mService;
     58     protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
     59     private final ContentObserver mObserver;
     60 
     61     private static final class SuggestionsParams {
     62         public final String[] mSuggestions;
     63         public final int mFlags;
     64         public SuggestionsParams(String[] suggestions, int flags) {
     65             mSuggestions = suggestions;
     66             mFlags = flags;
     67         }
     68     }
     69 
     70     protected static final class SuggestionsCache {
     71         private static final char CHAR_DELIMITER = '\uFFFC';
     72         private static final int MAX_CACHE_SIZE = 50;
     73         private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
     74                 new LruCache<>(MAX_CACHE_SIZE);
     75 
     76         // TODO: Support n-gram input
     77         private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) {
     78             if (TextUtils.isEmpty(query) || !prevWordsInfo.isValid()) {
     79                 return query;
     80             }
     81             return query + CHAR_DELIMITER + prevWordsInfo;
     82         }
     83 
     84         public SuggestionsParams getSuggestionsFromCache(String query,
     85                 final PrevWordsInfo prevWordsInfo) {
     86             return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo));
     87         }
     88 
     89         public void putSuggestionsToCache(
     90                 final String query, final PrevWordsInfo prevWordsInfo,
     91                 final String[] suggestions, final int flags) {
     92             if (suggestions == null || TextUtils.isEmpty(query)) {
     93                 return;
     94             }
     95             mUnigramSuggestionsInfoCache.put(
     96                     generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags));
     97         }
     98 
     99         public void clearCache() {
    100             mUnigramSuggestionsInfoCache.evictAll();
    101         }
    102     }
    103 
    104     AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) {
    105         mService = service;
    106         final ContentResolver cres = service.getContentResolver();
    107 
    108         mObserver = new ContentObserver(null) {
    109             @Override
    110             public void onChange(boolean self) {
    111                 mSuggestionsCache.clearCache();
    112             }
    113         };
    114         cres.registerContentObserver(Words.CONTENT_URI, true, mObserver);
    115     }
    116 
    117     @Override
    118     public void onCreate() {
    119         final String localeString = getLocale();
    120         mLocale = LocaleUtils.constructLocaleFromString(localeString);
    121         mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale);
    122     }
    123 
    124     @Override
    125     public void onClose() {
    126         final ContentResolver cres = mService.getContentResolver();
    127         cres.unregisterContentObserver(mObserver);
    128     }
    129 
    130     private static final int CHECKABILITY_CHECKABLE = 0;
    131     private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1;
    132     private static final int CHECKABILITY_CONTAINS_PERIOD = 2;
    133     private static final int CHECKABILITY_EMAIL_OR_URL = 3;
    134     private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4;
    135     private static final int CHECKABILITY_TOO_SHORT = 5;
    136     /**
    137      * Finds out whether a particular string should be filtered out of spell checking.
    138      *
    139      * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
    140      * we know we will never recognize, this accepts a script identifier that should be one
    141      * of the SCRIPT_* constants defined above, to rule out quickly characters from very
    142      * different languages.
    143      *
    144      * @param text the string to evaluate.
    145      * @param script the identifier for the script this spell checker recognizes
    146      * @return one of the FILTER_OUT_* constants above.
    147      */
    148     private static int getCheckabilityInScript(final String text, final int script) {
    149         if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT;
    150 
    151         // TODO: check if an equivalent processing can't be done more quickly with a
    152         // compiled regexp.
    153         // Filter by first letter
    154         final int firstCodePoint = text.codePointAt(0);
    155         // Filter out words that don't start with a letter or an apostrophe
    156         if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
    157                 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
    158 
    159         // Filter contents
    160         final int length = text.length();
    161         int letterCount = 0;
    162         for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
    163             final int codePoint = text.codePointAt(i);
    164             // Any word containing a COMMERCIAL_AT is probably an e-mail address
    165             // Any word containing a SLASH is probably either an ad-hoc combination of two
    166             // words or a URI - in either case we don't want to spell check that
    167             if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
    168                 return CHECKABILITY_EMAIL_OR_URL;
    169             }
    170             // If the string contains a period, native returns strange suggestions (it seems
    171             // to return suggestions for everything up to the period only and to ignore the
    172             // rest), so we suppress lookup if there is a period.
    173             // TODO: investigate why native returns these suggestions and remove this code.
    174             if (Constants.CODE_PERIOD == codePoint) {
    175                 return CHECKABILITY_CONTAINS_PERIOD;
    176             }
    177             if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
    178         }
    179         // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
    180         // in this word are letters
    181         return (letterCount * 4 < length * 3)
    182                 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE;
    183     }
    184 
    185     /**
    186      * Helper method to test valid capitalizations of a word.
    187      *
    188      * If the "text" is lower-case, we test only the exact string.
    189      * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased
    190      *  version of it "text".
    191      * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased
    192      *  version of it "text" and the capitalized version of it "Text".
    193      */
    194     private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) {
    195         // If the word is in there as is, then it's in the dictionary. If not, we'll test lower
    196         // case versions, but only if the word is not already all-lower case or mixed case.
    197         if (mService.isValidWord(mLocale, text)) return true;
    198         if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false;
    199 
    200         // If we come here, we have a capitalized word (either First- or All-).
    201         // Downcase the word and look it up again. If the word is only capitalized, we
    202         // tested all possibilities, so if it's still negative we can return false.
    203         final String lowerCaseText = text.toLowerCase(mLocale);
    204         if (mService.isValidWord(mLocale, lowerCaseText)) return true;
    205         if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false;
    206 
    207         // If the lower case version is not in the dictionary, it's still possible
    208         // that we have an all-caps version of a word that needs to be capitalized
    209         // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans".
    210         return mService.isValidWord(mLocale,
    211                 StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale));
    212     }
    213 
    214     // Note : this must be reentrant
    215     /**
    216      * Gets a list of suggestions for a specific string. This returns a list of possible
    217      * corrections for the text passed as an argument. It may split or group words, and
    218      * even perform grammatical analysis.
    219      */
    220     private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo,
    221             final int suggestionsLimit) {
    222         return onGetSuggestionsInternal(textInfo, null, suggestionsLimit);
    223     }
    224 
    225     protected SuggestionsInfo onGetSuggestionsInternal(
    226             final TextInfo textInfo, final PrevWordsInfo prevWordsInfo,
    227             final int suggestionsLimit) {
    228         try {
    229             final String inText = textInfo.getText();
    230             final SuggestionsParams cachedSuggestionsParams =
    231                     mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo);
    232             if (cachedSuggestionsParams != null) {
    233                 if (DBG) {
    234                     Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags);
    235                 }
    236                 return new SuggestionsInfo(
    237                         cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions);
    238             }
    239             final int checkability = getCheckabilityInScript(inText, mScript);
    240             if (CHECKABILITY_CHECKABLE != checkability) {
    241                 if (CHECKABILITY_CONTAINS_PERIOD == checkability) {
    242                     final String[] splitText = inText.split(Constants.REGEXP_PERIOD);
    243                     boolean allWordsAreValid = true;
    244                     for (final String word : splitText) {
    245                         if (!mService.isValidWord(mLocale, word)) {
    246                             allWordsAreValid = false;
    247                             break;
    248                         }
    249                     }
    250                     if (allWordsAreValid) {
    251                         return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
    252                                 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS,
    253                                 new String[] {
    254                                         TextUtils.join(Constants.STRING_SPACE, splitText) });
    255                     }
    256                 }
    257                 return mService.isValidWord(mLocale, inText) ?
    258                         AndroidSpellCheckerService.getInDictEmptySuggestions() :
    259                         AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    260                                 CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */);
    261             }
    262             final String text = inText.replaceAll(
    263                     AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE);
    264             final int capitalizeType = StringUtils.getCapitalizationType(text);
    265             boolean isInDict = true;
    266             if (!mService.hasMainDictionaryForLocale(mLocale)) {
    267                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    268                         false /* reportAsTypo */);
    269             }
    270             final Keyboard keyboard = mService.getKeyboardForLocale(mLocale);
    271             final WordComposer composer = new WordComposer();
    272             final int[] codePoints = StringUtils.toCodePointArray(text);
    273             final int[] coordinates;
    274             final ProximityInfo proximityInfo;
    275             if (null == keyboard) {
    276                 coordinates = CoordinateUtils.newCoordinateArray(codePoints.length,
    277                         Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE);
    278                 proximityInfo = null;
    279             } else {
    280                 coordinates = keyboard.getCoordinates(codePoints);
    281                 proximityInfo = keyboard.getProximityInfo();
    282             }
    283             composer.setComposingWord(codePoints, coordinates);
    284             // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
    285             final SuggestionResults suggestionResults = mService.getSuggestionResults(
    286                     mLocale, composer, prevWordsInfo, proximityInfo);
    287             final Result result = getResult(capitalizeType, mLocale, suggestionsLimit,
    288                     mService.getRecommendedThreshold(), text, suggestionResults);
    289             isInDict = isInDictForAnyCapitalization(text, capitalizeType);
    290             if (DBG) {
    291                 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
    292                         + suggestionsLimit);
    293                 Log.i(TAG, "IsInDict = " + isInDict);
    294                 Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
    295                 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions);
    296                 if (null != result.mSuggestions) {
    297                     for (String suggestion : result.mSuggestions) {
    298                         Log.i(TAG, suggestion);
    299                     }
    300                 }
    301             }
    302 
    303             final int flags =
    304                     (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
    305                             : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO)
    306                     | (result.mHasRecommendedSuggestions
    307                             ? SuggestionsInfoCompatUtils
    308                                     .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
    309                             : 0);
    310             final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
    311             mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions,
    312                     flags);
    313             return retval;
    314         } catch (RuntimeException e) {
    315             // Don't kill the keyboard if there is a bug in the spell checker
    316             if (DBG) {
    317                 throw e;
    318             } else {
    319                 Log.e(TAG, "Exception while spellcheking", e);
    320                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    321                         false /* reportAsTypo */);
    322             }
    323         }
    324     }
    325 
    326     private static final class Result {
    327         public final String[] mSuggestions;
    328         public final boolean mHasRecommendedSuggestions;
    329         public Result(final String[] gatheredSuggestions,
    330                 final boolean hasRecommendedSuggestions) {
    331             mSuggestions = gatheredSuggestions;
    332             mHasRecommendedSuggestions = hasRecommendedSuggestions;
    333         }
    334     }
    335 
    336     private static Result getResult(final int capitalizeType, final Locale locale,
    337             final int suggestionsLimit, final float recommendedThreshold, final String originalText,
    338             final SuggestionResults suggestionResults) {
    339         if (suggestionResults.isEmpty() || suggestionsLimit <= 0) {
    340             return new Result(null /* gatheredSuggestions */,
    341                     false /* hasRecommendedSuggestions */);
    342         }
    343         if (DBG) {
    344             for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) {
    345                 Log.i(TAG, "" + suggestedWordInfo.mScore + " " + suggestedWordInfo.mWord);
    346             }
    347         }
    348         final ArrayList<String> suggestions = new ArrayList<>();
    349         for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) {
    350             final String suggestion;
    351             if (StringUtils.CAPITALIZE_ALL == capitalizeType) {
    352                 suggestion = suggestedWordInfo.mWord.toUpperCase(locale);
    353             } else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) {
    354                 suggestion = StringUtils.capitalizeFirstCodePoint(
    355                         suggestedWordInfo.mWord, locale);
    356             } else {
    357                 suggestion = suggestedWordInfo.mWord;
    358             }
    359             suggestions.add(suggestion);
    360         }
    361         StringUtils.removeDupes(suggestions);
    362         // This returns a String[], while toArray() returns an Object[] which cannot be cast
    363         // into a String[].
    364         final String[] gatheredSuggestions =
    365                 suggestions.subList(0, Math.min(suggestions.size(), suggestionsLimit))
    366                         .toArray(EMPTY_STRING_ARRAY);
    367 
    368         final int bestScore = suggestionResults.first().mScore;
    369         final String bestSuggestion = suggestions.get(0);
    370         final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
    371                 originalText, bestSuggestion.toString(), bestScore);
    372         final boolean hasRecommendedSuggestions = (normalizedScore > recommendedThreshold);
    373         if (DBG) {
    374             Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
    375             Log.i(TAG, "Normalized score = " + normalizedScore
    376                     + " (threshold " + recommendedThreshold
    377                     + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions);
    378         }
    379         return new Result(gatheredSuggestions, hasRecommendedSuggestions);
    380     }
    381 
    382     /*
    383      * The spell checker acts on its own behalf. That is needed, in particular, to be able to
    384      * access the dictionary files, which the provider restricts to the identity of Latin IME.
    385      * Since it's called externally by the application, the spell checker is using the identity
    386      * of the application by default unless we clearCallingIdentity.
    387      * That's what the following method does.
    388      */
    389     @Override
    390     public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
    391             final int suggestionsLimit) {
    392         long ident = Binder.clearCallingIdentity();
    393         try {
    394             return onGetSuggestionsInternal(textInfo, suggestionsLimit);
    395         } finally {
    396             Binder.restoreCallingIdentity(ident);
    397         }
    398     }
    399 }
    400