Home | History | Annotate | Download | only in spellcheck
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.spellcheck;
     18 
     19 import android.content.ContentResolver;
     20 import android.database.ContentObserver;
     21 import android.os.Binder;
     22 import android.provider.UserDictionary.Words;
     23 import android.service.textservice.SpellCheckerService.Session;
     24 import android.text.TextUtils;
     25 import android.util.Log;
     26 import android.util.LruCache;
     27 import android.view.textservice.SuggestionsInfo;
     28 import android.view.textservice.TextInfo;
     29 
     30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
     31 import com.android.inputmethod.latin.Constants;
     32 import com.android.inputmethod.latin.Dictionary;
     33 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     34 import com.android.inputmethod.latin.WordComposer;
     35 import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer;
     36 import com.android.inputmethod.latin.utils.LocaleUtils;
     37 import com.android.inputmethod.latin.utils.StringUtils;
     38 
     39 import java.util.ArrayList;
     40 import java.util.Locale;
     41 
     42 public abstract class AndroidWordLevelSpellCheckerSession extends Session {
     43     private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName();
     44     private static final boolean DBG = false;
     45 
     46     // Immutable, but need the locale which is not available in the constructor yet
     47     private DictionaryPool mDictionaryPool;
     48     // Likewise
     49     private Locale mLocale;
     50     // Cache this for performance
     51     private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
     52     private final AndroidSpellCheckerService mService;
     53     protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
     54     private final ContentObserver mObserver;
     55 
     56     private static final class SuggestionsParams {
     57         public final String[] mSuggestions;
     58         public final int mFlags;
     59         public SuggestionsParams(String[] suggestions, int flags) {
     60             mSuggestions = suggestions;
     61             mFlags = flags;
     62         }
     63     }
     64 
     65     protected static final class SuggestionsCache {
     66         private static final char CHAR_DELIMITER = '\uFFFC';
     67         private static final int MAX_CACHE_SIZE = 50;
     68         private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
     69                 new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE);
     70 
     71         // TODO: Support n-gram input
     72         private static String generateKey(String query, String prevWord) {
     73             if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) {
     74                 return query;
     75             }
     76             return query + CHAR_DELIMITER + prevWord;
     77         }
     78 
     79         // TODO: Support n-gram input
     80         public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) {
     81             return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord));
     82         }
     83 
     84         // TODO: Support n-gram input
     85         public void putSuggestionsToCache(
     86                 String query, String prevWord, String[] suggestions, int flags) {
     87             if (suggestions == null || TextUtils.isEmpty(query)) {
     88                 return;
     89             }
     90             mUnigramSuggestionsInfoCache.put(
     91                     generateKey(query, prevWord), new SuggestionsParams(suggestions, flags));
     92         }
     93 
     94         public void clearCache() {
     95             mUnigramSuggestionsInfoCache.evictAll();
     96         }
     97     }
     98 
     99     AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) {
    100         mService = service;
    101         final ContentResolver cres = service.getContentResolver();
    102 
    103         mObserver = new ContentObserver(null) {
    104             @Override
    105             public void onChange(boolean self) {
    106                 mSuggestionsCache.clearCache();
    107             }
    108         };
    109         cres.registerContentObserver(Words.CONTENT_URI, true, mObserver);
    110     }
    111 
    112     @Override
    113     public void onCreate() {
    114         final String localeString = getLocale();
    115         mDictionaryPool = mService.getDictionaryPool(localeString);
    116         mLocale = LocaleUtils.constructLocaleFromString(localeString);
    117         mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale);
    118     }
    119 
    120     @Override
    121     public void onClose() {
    122         final ContentResolver cres = mService.getContentResolver();
    123         cres.unregisterContentObserver(mObserver);
    124     }
    125 
    126     /*
    127      * Returns whether the code point is a letter that makes sense for the specified
    128      * locale for this spell checker.
    129      * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml
    130      * and is limited to EFIGS languages and Russian.
    131      * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters
    132      * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters.
    133      */
    134     private static boolean isLetterCheckableByLanguage(final int codePoint,
    135             final int script) {
    136         switch (script) {
    137         case AndroidSpellCheckerService.SCRIPT_LATIN:
    138             // Our supported latin script dictionaries (EFIGS) at the moment only include
    139             // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode
    140             // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF,
    141             // so the below is a very efficient way to test for it. As for the 0-0x3F, it's
    142             // excluded from isLetter anyway.
    143             return codePoint <= 0x2AF && Character.isLetter(codePoint);
    144         case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
    145             // All Cyrillic characters are in the 400~52F block. There are some in the upper
    146             // Unicode range, but they are archaic characters that are not used in modern
    147             // Russian and are not used by our dictionary.
    148             return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint);
    149         case AndroidSpellCheckerService.SCRIPT_GREEK:
    150             // Greek letters are either in the 370~3FF range (Greek & Coptic), or in the
    151             // 1F00~1FFF range (Greek extended). Our dictionary contains both sort of characters.
    152             // Our dictionary also contains a few words with 0xF2; it would be best to check
    153             // if that's correct, but a web search does return results for these words so
    154             // they are probably okay.
    155             return (codePoint >= 0x370 && codePoint <= 0x3FF)
    156                     || (codePoint >= 0x1F00 && codePoint <= 0x1FFF)
    157                     || codePoint == 0xF2;
    158         default:
    159             // Should never come here
    160             throw new RuntimeException("Impossible value of script: " + script);
    161         }
    162     }
    163 
    164     private static final int CHECKABILITY_CHECKABLE = 0;
    165     private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1;
    166     private static final int CHECKABILITY_CONTAINS_PERIOD = 2;
    167     private static final int CHECKABILITY_EMAIL_OR_URL = 3;
    168     private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4;
    169     private static final int CHECKABILITY_TOO_SHORT = 5;
    170     /**
    171      * Finds out whether a particular string should be filtered out of spell checking.
    172      *
    173      * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
    174      * we know we will never recognize, this accepts a script identifier that should be one
    175      * of the SCRIPT_* constants defined above, to rule out quickly characters from very
    176      * different languages.
    177      *
    178      * @param text the string to evaluate.
    179      * @param script the identifier for the script this spell checker recognizes
    180      * @return one of the FILTER_OUT_* constants above.
    181      */
    182     private static int getCheckabilityInScript(final String text, final int script) {
    183         if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT;
    184 
    185         // TODO: check if an equivalent processing can't be done more quickly with a
    186         // compiled regexp.
    187         // Filter by first letter
    188         final int firstCodePoint = text.codePointAt(0);
    189         // Filter out words that don't start with a letter or an apostrophe
    190         if (!isLetterCheckableByLanguage(firstCodePoint, script)
    191                 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
    192 
    193         // Filter contents
    194         final int length = text.length();
    195         int letterCount = 0;
    196         for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
    197             final int codePoint = text.codePointAt(i);
    198             // Any word containing a COMMERCIAL_AT is probably an e-mail address
    199             // Any word containing a SLASH is probably either an ad-hoc combination of two
    200             // words or a URI - in either case we don't want to spell check that
    201             if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
    202                 return CHECKABILITY_EMAIL_OR_URL;
    203             }
    204             // If the string contains a period, native returns strange suggestions (it seems
    205             // to return suggestions for everything up to the period only and to ignore the
    206             // rest), so we suppress lookup if there is a period.
    207             // TODO: investigate why native returns these suggestions and remove this code.
    208             if (Constants.CODE_PERIOD == codePoint) {
    209                 return CHECKABILITY_CONTAINS_PERIOD;
    210             }
    211             if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount;
    212         }
    213         // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
    214         // in this word are letters
    215         return (letterCount * 4 < length * 3)
    216                 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE;
    217     }
    218 
    219     /**
    220      * Helper method to test valid capitalizations of a word.
    221      *
    222      * If the "text" is lower-case, we test only the exact string.
    223      * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased
    224      *  version of it "text".
    225      * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased
    226      *  version of it "text" and the capitalized version of it "Text".
    227      */
    228     private boolean isInDictForAnyCapitalization(final Dictionary dict, final String text,
    229             final int capitalizeType) {
    230         // If the word is in there as is, then it's in the dictionary. If not, we'll test lower
    231         // case versions, but only if the word is not already all-lower case or mixed case.
    232         if (dict.isValidWord(text)) return true;
    233         if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false;
    234 
    235         // If we come here, we have a capitalized word (either First- or All-).
    236         // Downcase the word and look it up again. If the word is only capitalized, we
    237         // tested all possibilities, so if it's still negative we can return false.
    238         final String lowerCaseText = text.toLowerCase(mLocale);
    239         if (dict.isValidWord(lowerCaseText)) return true;
    240         if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false;
    241 
    242         // If the lower case version is not in the dictionary, it's still possible
    243         // that we have an all-caps version of a word that needs to be capitalized
    244         // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans".
    245         return dict.isValidWord(StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale));
    246     }
    247 
    248     // Note : this must be reentrant
    249     /**
    250      * Gets a list of suggestions for a specific string. This returns a list of possible
    251      * corrections for the text passed as an argument. It may split or group words, and
    252      * even perform grammatical analysis.
    253      */
    254     private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo,
    255             final int suggestionsLimit) {
    256         return onGetSuggestionsInternal(textInfo, null, suggestionsLimit);
    257     }
    258 
    259     protected SuggestionsInfo onGetSuggestionsInternal(
    260             final TextInfo textInfo, final String prevWord, final int suggestionsLimit) {
    261         try {
    262             final String inText = textInfo.getText();
    263             final SuggestionsParams cachedSuggestionsParams =
    264                     mSuggestionsCache.getSuggestionsFromCache(inText, prevWord);
    265             if (cachedSuggestionsParams != null) {
    266                 if (DBG) {
    267                     Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags);
    268                 }
    269                 return new SuggestionsInfo(
    270                         cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions);
    271             }
    272 
    273             final int checkability = getCheckabilityInScript(inText, mScript);
    274             if (CHECKABILITY_CHECKABLE != checkability) {
    275                 DictAndKeyboard dictInfo = null;
    276                 try {
    277                     dictInfo = mDictionaryPool.pollWithDefaultTimeout();
    278                     if (!DictionaryPool.isAValidDictionary(dictInfo)) {
    279                         return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    280                                 false /* reportAsTypo */);
    281                     }
    282                     return dictInfo.mDictionary.isValidWord(inText)
    283                             ? AndroidSpellCheckerService.getInDictEmptySuggestions()
    284                             : AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    285                                     CHECKABILITY_CONTAINS_PERIOD == checkability
    286                                     /* reportAsTypo */);
    287                 } finally {
    288                     if (null != dictInfo) {
    289                         if (!mDictionaryPool.offer(dictInfo)) {
    290                             Log.e(TAG, "Can't re-insert a dictionary into its pool");
    291                         }
    292                     }
    293                 }
    294             }
    295             final String text = inText.replaceAll(
    296                     AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE);
    297 
    298             // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
    299             //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
    300             //mService.mSuggestionThreshold, mService.mRecommendedThreshold,
    301             //suggestionsLimit);
    302             final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer(
    303                     text, suggestionsLimit);
    304 
    305             final int capitalizeType = StringUtils.getCapitalizationType(text);
    306             boolean isInDict = true;
    307             DictAndKeyboard dictInfo = null;
    308             try {
    309                 dictInfo = mDictionaryPool.pollWithDefaultTimeout();
    310                 if (!DictionaryPool.isAValidDictionary(dictInfo)) {
    311                     return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    312                             false /* reportAsTypo */);
    313                 }
    314                 final WordComposer composer = new WordComposer();
    315                 final int length = text.length();
    316                 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
    317                     final int codePoint = text.codePointAt(i);
    318                     composer.addKeyInfo(codePoint, dictInfo.getKeyboard(codePoint));
    319                 }
    320                 // TODO: make a spell checker option to block offensive words or not
    321                 final ArrayList<SuggestedWordInfo> suggestions =
    322                         dictInfo.mDictionary.getSuggestions(composer, prevWord,
    323                                 dictInfo.getProximityInfo(), true /* blockOffensiveWords */,
    324                                 null /* additionalFeaturesOptions */);
    325                 if (suggestions != null) {
    326                     for (final SuggestedWordInfo suggestion : suggestions) {
    327                         final String suggestionStr = suggestion.mWord;
    328                         suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0,
    329                                 suggestionStr.length(), suggestion.mScore);
    330                     }
    331                 }
    332                 isInDict = isInDictForAnyCapitalization(dictInfo.mDictionary, text, capitalizeType);
    333             } finally {
    334                 if (null != dictInfo) {
    335                     if (!mDictionaryPool.offer(dictInfo)) {
    336                         Log.e(TAG, "Can't re-insert a dictionary into its pool");
    337                     }
    338                 }
    339             }
    340 
    341             final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
    342                     capitalizeType, mLocale);
    343 
    344             if (DBG) {
    345                 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
    346                         + suggestionsLimit);
    347                 Log.i(TAG, "IsInDict = " + isInDict);
    348                 Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
    349                 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions);
    350                 if (null != result.mSuggestions) {
    351                     for (String suggestion : result.mSuggestions) {
    352                         Log.i(TAG, suggestion);
    353                     }
    354                 }
    355             }
    356 
    357             final int flags =
    358                     (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
    359                             : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO)
    360                     | (result.mHasRecommendedSuggestions
    361                             ? SuggestionsInfoCompatUtils
    362                                     .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
    363                             : 0);
    364             final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
    365             mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags);
    366             return retval;
    367         } catch (RuntimeException e) {
    368             // Don't kill the keyboard if there is a bug in the spell checker
    369             if (DBG) {
    370                 throw e;
    371             } else {
    372                 Log.e(TAG, "Exception while spellcheking", e);
    373                 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
    374                         false /* reportAsTypo */);
    375             }
    376         }
    377     }
    378 
    379     /*
    380      * The spell checker acts on its own behalf. That is needed, in particular, to be able to
    381      * access the dictionary files, which the provider restricts to the identity of Latin IME.
    382      * Since it's called externally by the application, the spell checker is using the identity
    383      * of the application by default unless we clearCallingIdentity.
    384      * That's what the following method does.
    385      */
    386     @Override
    387     public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
    388             final int suggestionsLimit) {
    389         long ident = Binder.clearCallingIdentity();
    390         try {
    391             return onGetSuggestionsInternal(textInfo, suggestionsLimit);
    392         } finally {
    393             Binder.restoreCallingIdentity(ident);
    394         }
    395     }
    396 }
    397