1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.ContentResolver; 20 import android.database.ContentObserver; 21 import android.os.Binder; 22 import android.provider.UserDictionary.Words; 23 import android.service.textservice.SpellCheckerService.Session; 24 import android.text.TextUtils; 25 import android.util.Log; 26 import android.util.LruCache; 27 import android.view.textservice.SuggestionsInfo; 28 import android.view.textservice.TextInfo; 29 30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31 import com.android.inputmethod.latin.Constants; 32 import com.android.inputmethod.latin.Dictionary; 33 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 34 import com.android.inputmethod.latin.WordComposer; 35 import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer; 36 import com.android.inputmethod.latin.utils.LocaleUtils; 37 import com.android.inputmethod.latin.utils.StringUtils; 38 39 import java.util.ArrayList; 40 import java.util.Locale; 41 42 public abstract class AndroidWordLevelSpellCheckerSession extends Session { 43 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 44 private static final boolean DBG = false; 45 46 // Immutable, but need the locale which is not available in the constructor yet 47 private DictionaryPool mDictionaryPool; 48 // Likewise 49 private Locale mLocale; 50 // Cache this for performance 51 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 52 private final AndroidSpellCheckerService mService; 53 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 54 private final ContentObserver mObserver; 55 56 private static final class SuggestionsParams { 57 public final String[] mSuggestions; 58 public final int mFlags; 59 public SuggestionsParams(String[] suggestions, int flags) { 60 mSuggestions = suggestions; 61 mFlags = flags; 62 } 63 } 64 65 protected static final class SuggestionsCache { 66 private static final char CHAR_DELIMITER = '\uFFFC'; 67 private static final int MAX_CACHE_SIZE = 50; 68 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 69 new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE); 70 71 // TODO: Support n-gram input 72 private static String generateKey(String query, String prevWord) { 73 if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) { 74 return query; 75 } 76 return query + CHAR_DELIMITER + prevWord; 77 } 78 79 // TODO: Support n-gram input 80 public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) { 81 return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord)); 82 } 83 84 // TODO: Support n-gram input 85 public void putSuggestionsToCache( 86 String query, String prevWord, String[] suggestions, int flags) { 87 if (suggestions == null || TextUtils.isEmpty(query)) { 88 return; 89 } 90 mUnigramSuggestionsInfoCache.put( 91 generateKey(query, prevWord), new SuggestionsParams(suggestions, flags)); 92 } 93 94 public void clearCache() { 95 mUnigramSuggestionsInfoCache.evictAll(); 96 } 97 } 98 99 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 100 mService = service; 101 final ContentResolver cres = service.getContentResolver(); 102 103 mObserver = new ContentObserver(null) { 104 @Override 105 public void onChange(boolean self) { 106 mSuggestionsCache.clearCache(); 107 } 108 }; 109 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 110 } 111 112 @Override 113 public void onCreate() { 114 final String localeString = getLocale(); 115 mDictionaryPool = mService.getDictionaryPool(localeString); 116 mLocale = LocaleUtils.constructLocaleFromString(localeString); 117 mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale); 118 } 119 120 @Override 121 public void onClose() { 122 final ContentResolver cres = mService.getContentResolver(); 123 cres.unregisterContentObserver(mObserver); 124 } 125 126 /* 127 * Returns whether the code point is a letter that makes sense for the specified 128 * locale for this spell checker. 129 * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml 130 * and is limited to EFIGS languages and Russian. 131 * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters 132 * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. 133 */ 134 private static boolean isLetterCheckableByLanguage(final int codePoint, 135 final int script) { 136 switch (script) { 137 case AndroidSpellCheckerService.SCRIPT_LATIN: 138 // Our supported latin script dictionaries (EFIGS) at the moment only include 139 // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode 140 // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, 141 // so the below is a very efficient way to test for it. As for the 0-0x3F, it's 142 // excluded from isLetter anyway. 143 return codePoint <= 0x2AF && Character.isLetter(codePoint); 144 case AndroidSpellCheckerService.SCRIPT_CYRILLIC: 145 // All Cyrillic characters are in the 400~52F block. There are some in the upper 146 // Unicode range, but they are archaic characters that are not used in modern 147 // Russian and are not used by our dictionary. 148 return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); 149 case AndroidSpellCheckerService.SCRIPT_GREEK: 150 // Greek letters are either in the 370~3FF range (Greek & Coptic), or in the 151 // 1F00~1FFF range (Greek extended). Our dictionary contains both sort of characters. 152 // Our dictionary also contains a few words with 0xF2; it would be best to check 153 // if that's correct, but a web search does return results for these words so 154 // they are probably okay. 155 return (codePoint >= 0x370 && codePoint <= 0x3FF) 156 || (codePoint >= 0x1F00 && codePoint <= 0x1FFF) 157 || codePoint == 0xF2; 158 default: 159 // Should never come here 160 throw new RuntimeException("Impossible value of script: " + script); 161 } 162 } 163 164 private static final int CHECKABILITY_CHECKABLE = 0; 165 private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1; 166 private static final int CHECKABILITY_CONTAINS_PERIOD = 2; 167 private static final int CHECKABILITY_EMAIL_OR_URL = 3; 168 private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4; 169 private static final int CHECKABILITY_TOO_SHORT = 5; 170 /** 171 * Finds out whether a particular string should be filtered out of spell checking. 172 * 173 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 174 * we know we will never recognize, this accepts a script identifier that should be one 175 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 176 * different languages. 177 * 178 * @param text the string to evaluate. 179 * @param script the identifier for the script this spell checker recognizes 180 * @return one of the FILTER_OUT_* constants above. 181 */ 182 private static int getCheckabilityInScript(final String text, final int script) { 183 if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT; 184 185 // TODO: check if an equivalent processing can't be done more quickly with a 186 // compiled regexp. 187 // Filter by first letter 188 final int firstCodePoint = text.codePointAt(0); 189 // Filter out words that don't start with a letter or an apostrophe 190 if (!isLetterCheckableByLanguage(firstCodePoint, script) 191 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE; 192 193 // Filter contents 194 final int length = text.length(); 195 int letterCount = 0; 196 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 197 final int codePoint = text.codePointAt(i); 198 // Any word containing a COMMERCIAL_AT is probably an e-mail address 199 // Any word containing a SLASH is probably either an ad-hoc combination of two 200 // words or a URI - in either case we don't want to spell check that 201 if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { 202 return CHECKABILITY_EMAIL_OR_URL; 203 } 204 // If the string contains a period, native returns strange suggestions (it seems 205 // to return suggestions for everything up to the period only and to ignore the 206 // rest), so we suppress lookup if there is a period. 207 // TODO: investigate why native returns these suggestions and remove this code. 208 if (Constants.CODE_PERIOD == codePoint) { 209 return CHECKABILITY_CONTAINS_PERIOD; 210 } 211 if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; 212 } 213 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 214 // in this word are letters 215 return (letterCount * 4 < length * 3) 216 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE; 217 } 218 219 /** 220 * Helper method to test valid capitalizations of a word. 221 * 222 * If the "text" is lower-case, we test only the exact string. 223 * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased 224 * version of it "text". 225 * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased 226 * version of it "text" and the capitalized version of it "Text". 227 */ 228 private boolean isInDictForAnyCapitalization(final Dictionary dict, final String text, 229 final int capitalizeType) { 230 // If the word is in there as is, then it's in the dictionary. If not, we'll test lower 231 // case versions, but only if the word is not already all-lower case or mixed case. 232 if (dict.isValidWord(text)) return true; 233 if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; 234 235 // If we come here, we have a capitalized word (either First- or All-). 236 // Downcase the word and look it up again. If the word is only capitalized, we 237 // tested all possibilities, so if it's still negative we can return false. 238 final String lowerCaseText = text.toLowerCase(mLocale); 239 if (dict.isValidWord(lowerCaseText)) return true; 240 if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; 241 242 // If the lower case version is not in the dictionary, it's still possible 243 // that we have an all-caps version of a word that needs to be capitalized 244 // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans". 245 return dict.isValidWord(StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale)); 246 } 247 248 // Note : this must be reentrant 249 /** 250 * Gets a list of suggestions for a specific string. This returns a list of possible 251 * corrections for the text passed as an argument. It may split or group words, and 252 * even perform grammatical analysis. 253 */ 254 private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo, 255 final int suggestionsLimit) { 256 return onGetSuggestionsInternal(textInfo, null, suggestionsLimit); 257 } 258 259 protected SuggestionsInfo onGetSuggestionsInternal( 260 final TextInfo textInfo, final String prevWord, final int suggestionsLimit) { 261 try { 262 final String inText = textInfo.getText(); 263 final SuggestionsParams cachedSuggestionsParams = 264 mSuggestionsCache.getSuggestionsFromCache(inText, prevWord); 265 if (cachedSuggestionsParams != null) { 266 if (DBG) { 267 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 268 } 269 return new SuggestionsInfo( 270 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 271 } 272 273 final int checkability = getCheckabilityInScript(inText, mScript); 274 if (CHECKABILITY_CHECKABLE != checkability) { 275 DictAndKeyboard dictInfo = null; 276 try { 277 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 278 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 279 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 280 false /* reportAsTypo */); 281 } 282 return dictInfo.mDictionary.isValidWord(inText) 283 ? AndroidSpellCheckerService.getInDictEmptySuggestions() 284 : AndroidSpellCheckerService.getNotInDictEmptySuggestions( 285 CHECKABILITY_CONTAINS_PERIOD == checkability 286 /* reportAsTypo */); 287 } finally { 288 if (null != dictInfo) { 289 if (!mDictionaryPool.offer(dictInfo)) { 290 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 291 } 292 } 293 } 294 } 295 final String text = inText.replaceAll( 296 AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); 297 298 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 299 //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 300 //mService.mSuggestionThreshold, mService.mRecommendedThreshold, 301 //suggestionsLimit); 302 final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer( 303 text, suggestionsLimit); 304 305 final int capitalizeType = StringUtils.getCapitalizationType(text); 306 boolean isInDict = true; 307 DictAndKeyboard dictInfo = null; 308 try { 309 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 310 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 311 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 312 false /* reportAsTypo */); 313 } 314 final WordComposer composer = new WordComposer(); 315 final int length = text.length(); 316 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 317 final int codePoint = text.codePointAt(i); 318 composer.addKeyInfo(codePoint, dictInfo.getKeyboard(codePoint)); 319 } 320 // TODO: make a spell checker option to block offensive words or not 321 final ArrayList<SuggestedWordInfo> suggestions = 322 dictInfo.mDictionary.getSuggestions(composer, prevWord, 323 dictInfo.getProximityInfo(), true /* blockOffensiveWords */, 324 null /* additionalFeaturesOptions */); 325 if (suggestions != null) { 326 for (final SuggestedWordInfo suggestion : suggestions) { 327 final String suggestionStr = suggestion.mWord; 328 suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, 329 suggestionStr.length(), suggestion.mScore); 330 } 331 } 332 isInDict = isInDictForAnyCapitalization(dictInfo.mDictionary, text, capitalizeType); 333 } finally { 334 if (null != dictInfo) { 335 if (!mDictionaryPool.offer(dictInfo)) { 336 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 337 } 338 } 339 } 340 341 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 342 capitalizeType, mLocale); 343 344 if (DBG) { 345 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 346 + suggestionsLimit); 347 Log.i(TAG, "IsInDict = " + isInDict); 348 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 349 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 350 if (null != result.mSuggestions) { 351 for (String suggestion : result.mSuggestions) { 352 Log.i(TAG, suggestion); 353 } 354 } 355 } 356 357 final int flags = 358 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 359 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 360 | (result.mHasRecommendedSuggestions 361 ? SuggestionsInfoCompatUtils 362 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 363 : 0); 364 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 365 mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags); 366 return retval; 367 } catch (RuntimeException e) { 368 // Don't kill the keyboard if there is a bug in the spell checker 369 if (DBG) { 370 throw e; 371 } else { 372 Log.e(TAG, "Exception while spellcheking", e); 373 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 374 false /* reportAsTypo */); 375 } 376 } 377 } 378 379 /* 380 * The spell checker acts on its own behalf. That is needed, in particular, to be able to 381 * access the dictionary files, which the provider restricts to the identity of Latin IME. 382 * Since it's called externally by the application, the spell checker is using the identity 383 * of the application by default unless we clearCallingIdentity. 384 * That's what the following method does. 385 */ 386 @Override 387 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 388 final int suggestionsLimit) { 389 long ident = Binder.clearCallingIdentity(); 390 try { 391 return onGetSuggestionsInternal(textInfo, suggestionsLimit); 392 } finally { 393 Binder.restoreCallingIdentity(ident); 394 } 395 } 396 } 397