1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.ContentResolver; 20 import android.database.ContentObserver; 21 import android.os.Binder; 22 import android.provider.UserDictionary.Words; 23 import android.service.textservice.SpellCheckerService.Session; 24 import android.text.TextUtils; 25 import android.util.Log; 26 import android.util.LruCache; 27 import android.view.textservice.SuggestionsInfo; 28 import android.view.textservice.TextInfo; 29 30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31 import com.android.inputmethod.latin.Constants; 32 import com.android.inputmethod.latin.Dictionary; 33 import com.android.inputmethod.latin.LocaleUtils; 34 import com.android.inputmethod.latin.StringUtils; 35 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 36 import com.android.inputmethod.latin.WordComposer; 37 import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer; 38 39 import java.util.ArrayList; 40 import java.util.Locale; 41 42 public abstract class AndroidWordLevelSpellCheckerSession extends Session { 43 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 44 private static final boolean DBG = false; 45 46 // Immutable, but need the locale which is not available in the constructor yet 47 private DictionaryPool mDictionaryPool; 48 // Likewise 49 private Locale mLocale; 50 // Cache this for performance 51 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 52 private final AndroidSpellCheckerService mService; 53 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 54 private final ContentObserver mObserver; 55 56 private static final class SuggestionsParams { 57 public final String[] mSuggestions; 58 public final int mFlags; 59 public SuggestionsParams(String[] suggestions, int flags) { 60 mSuggestions = suggestions; 61 mFlags = flags; 62 } 63 } 64 65 protected static final class SuggestionsCache { 66 private static final char CHAR_DELIMITER = '\uFFFC'; 67 private static final int MAX_CACHE_SIZE = 50; 68 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 69 new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE); 70 71 // TODO: Support n-gram input 72 private static String generateKey(String query, String prevWord) { 73 if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) { 74 return query; 75 } 76 return query + CHAR_DELIMITER + prevWord; 77 } 78 79 // TODO: Support n-gram input 80 public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) { 81 return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord)); 82 } 83 84 // TODO: Support n-gram input 85 public void putSuggestionsToCache( 86 String query, String prevWord, String[] suggestions, int flags) { 87 if (suggestions == null || TextUtils.isEmpty(query)) { 88 return; 89 } 90 mUnigramSuggestionsInfoCache.put( 91 generateKey(query, prevWord), new SuggestionsParams(suggestions, flags)); 92 } 93 94 public void clearCache() { 95 mUnigramSuggestionsInfoCache.evictAll(); 96 } 97 } 98 99 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 100 mService = service; 101 final ContentResolver cres = service.getContentResolver(); 102 103 mObserver = new ContentObserver(null) { 104 @Override 105 public void onChange(boolean self) { 106 mSuggestionsCache.clearCache(); 107 } 108 }; 109 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 110 } 111 112 @Override 113 public void onCreate() { 114 final String localeString = getLocale(); 115 mDictionaryPool = mService.getDictionaryPool(localeString); 116 mLocale = LocaleUtils.constructLocaleFromString(localeString); 117 mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale); 118 } 119 120 @Override 121 public void onClose() { 122 final ContentResolver cres = mService.getContentResolver(); 123 cres.unregisterContentObserver(mObserver); 124 } 125 126 /* 127 * Returns whether the code point is a letter that makes sense for the specified 128 * locale for this spell checker. 129 * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml 130 * and is limited to EFIGS languages and Russian. 131 * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters 132 * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. 133 */ 134 private static boolean isLetterCheckableByLanguage(final int codePoint, 135 final int script) { 136 switch (script) { 137 case AndroidSpellCheckerService.SCRIPT_LATIN: 138 // Our supported latin script dictionaries (EFIGS) at the moment only include 139 // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode 140 // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, 141 // so the below is a very efficient way to test for it. As for the 0-0x3F, it's 142 // excluded from isLetter anyway. 143 return codePoint <= 0x2AF && Character.isLetter(codePoint); 144 case AndroidSpellCheckerService.SCRIPT_CYRILLIC: 145 // All Cyrillic characters are in the 400~52F block. There are some in the upper 146 // Unicode range, but they are archaic characters that are not used in modern 147 // Russian and are not used by our dictionary. 148 return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); 149 case AndroidSpellCheckerService.SCRIPT_GREEK: 150 // Greek letters are either in the 370~3FF range (Greek & Coptic), or in the 151 // 1F00~1FFF range (Greek extended). Our dictionary contains both sort of characters. 152 // Our dictionary also contains a few words with 0xF2; it would be best to check 153 // if that's correct, but a web search does return results for these words so 154 // they are probably okay. 155 return (codePoint >= 0x370 && codePoint <= 0x3FF) 156 || (codePoint >= 0x1F00 && codePoint <= 0x1FFF) 157 || codePoint == 0xF2; 158 default: 159 // Should never come here 160 throw new RuntimeException("Impossible value of script: " + script); 161 } 162 } 163 164 /** 165 * Finds out whether a particular string should be filtered out of spell checking. 166 * 167 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 168 * we know we will never recognize, this accepts a script identifier that should be one 169 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 170 * different languages. 171 * 172 * @param text the string to evaluate. 173 * @param script the identifier for the script this spell checker recognizes 174 * @return true if we should filter this text out, false otherwise 175 */ 176 private static boolean shouldFilterOut(final String text, final int script) { 177 if (TextUtils.isEmpty(text) || text.length() <= 1) return true; 178 179 // TODO: check if an equivalent processing can't be done more quickly with a 180 // compiled regexp. 181 // Filter by first letter 182 final int firstCodePoint = text.codePointAt(0); 183 // Filter out words that don't start with a letter or an apostrophe 184 if (!isLetterCheckableByLanguage(firstCodePoint, script) 185 && '\'' != firstCodePoint) return true; 186 187 // Filter contents 188 final int length = text.length(); 189 int letterCount = 0; 190 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 191 final int codePoint = text.codePointAt(i); 192 // Any word containing a COMMERCIAL_AT is probably an e-mail address 193 // Any word containing a SLASH is probably either an ad-hoc combination of two 194 // words or a URI - in either case we don't want to spell check that 195 if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { 196 return true; 197 } 198 if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; 199 } 200 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 201 // in this word are letters 202 return (letterCount * 4 < length * 3); 203 } 204 205 /** 206 * Helper method to test valid capitalizations of a word. 207 * 208 * If the "text" is lower-case, we test only the exact string. 209 * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased 210 * version of it "text". 211 * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased 212 * version of it "text" and the capitalized version of it "Text". 213 */ 214 private boolean isInDictForAnyCapitalization(final Dictionary dict, final String text, 215 final int capitalizeType) { 216 // If the word is in there as is, then it's in the dictionary. If not, we'll test lower 217 // case versions, but only if the word is not already all-lower case or mixed case. 218 if (dict.isValidWord(text)) return true; 219 if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; 220 221 // If we come here, we have a capitalized word (either First- or All-). 222 // Downcase the word and look it up again. If the word is only capitalized, we 223 // tested all possibilities, so if it's still negative we can return false. 224 final String lowerCaseText = text.toLowerCase(mLocale); 225 if (dict.isValidWord(lowerCaseText)) return true; 226 if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; 227 228 // If the lower case version is not in the dictionary, it's still possible 229 // that we have an all-caps version of a word that needs to be capitalized 230 // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans". 231 return dict.isValidWord(StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale)); 232 } 233 234 // Note : this must be reentrant 235 /** 236 * Gets a list of suggestions for a specific string. This returns a list of possible 237 * corrections for the text passed as an argument. It may split or group words, and 238 * even perform grammatical analysis. 239 */ 240 private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo, 241 final int suggestionsLimit) { 242 return onGetSuggestionsInternal(textInfo, null, suggestionsLimit); 243 } 244 245 protected SuggestionsInfo onGetSuggestionsInternal( 246 final TextInfo textInfo, final String prevWord, final int suggestionsLimit) { 247 try { 248 final String inText = textInfo.getText(); 249 final SuggestionsParams cachedSuggestionsParams = 250 mSuggestionsCache.getSuggestionsFromCache(inText, prevWord); 251 if (cachedSuggestionsParams != null) { 252 if (DBG) { 253 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 254 } 255 return new SuggestionsInfo( 256 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 257 } 258 259 if (shouldFilterOut(inText, mScript)) { 260 DictAndKeyboard dictInfo = null; 261 try { 262 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 263 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 264 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); 265 } 266 return dictInfo.mDictionary.isValidWord(inText) 267 ? AndroidSpellCheckerService.getInDictEmptySuggestions() 268 : AndroidSpellCheckerService.getNotInDictEmptySuggestions(); 269 } finally { 270 if (null != dictInfo) { 271 if (!mDictionaryPool.offer(dictInfo)) { 272 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 273 } 274 } 275 } 276 } 277 final String text = inText.replaceAll( 278 AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); 279 280 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 281 //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 282 //mService.mSuggestionThreshold, mService.mRecommendedThreshold, 283 //suggestionsLimit); 284 final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer( 285 text, suggestionsLimit); 286 287 final int capitalizeType = StringUtils.getCapitalizationType(text); 288 boolean isInDict = true; 289 DictAndKeyboard dictInfo = null; 290 try { 291 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 292 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 293 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); 294 } 295 final WordComposer composer = new WordComposer(); 296 final int length = text.length(); 297 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 298 final int codePoint = text.codePointAt(i); 299 composer.addKeyInfo(codePoint, dictInfo.getKeyboard(codePoint)); 300 } 301 // TODO: make a spell checker option to block offensive words or not 302 final ArrayList<SuggestedWordInfo> suggestions = 303 dictInfo.mDictionary.getSuggestions(composer, prevWord, 304 dictInfo.getProximityInfo(), 305 true /* blockOffensiveWords */); 306 for (final SuggestedWordInfo suggestion : suggestions) { 307 final String suggestionStr = suggestion.mWord; 308 suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, 309 suggestionStr.length(), suggestion.mScore); 310 } 311 isInDict = isInDictForAnyCapitalization(dictInfo.mDictionary, text, capitalizeType); 312 } finally { 313 if (null != dictInfo) { 314 if (!mDictionaryPool.offer(dictInfo)) { 315 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 316 } 317 } 318 } 319 320 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 321 capitalizeType, mLocale); 322 323 if (DBG) { 324 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 325 + suggestionsLimit); 326 Log.i(TAG, "IsInDict = " + isInDict); 327 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 328 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 329 if (null != result.mSuggestions) { 330 for (String suggestion : result.mSuggestions) { 331 Log.i(TAG, suggestion); 332 } 333 } 334 } 335 336 final int flags = 337 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 338 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 339 | (result.mHasRecommendedSuggestions 340 ? SuggestionsInfoCompatUtils 341 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 342 : 0); 343 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 344 mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags); 345 return retval; 346 } catch (RuntimeException e) { 347 // Don't kill the keyboard if there is a bug in the spell checker 348 if (DBG) { 349 throw e; 350 } else { 351 Log.e(TAG, "Exception while spellcheking", e); 352 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); 353 } 354 } 355 } 356 357 /* 358 * The spell checker acts on its own behalf. That is needed, in particular, to be able to 359 * access the dictionary files, which the provider restricts to the identity of Latin IME. 360 * Since it's called externally by the application, the spell checker is using the identity 361 * of the application by default unless we clearCallingIdentity. 362 * That's what the following method does. 363 */ 364 @Override 365 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 366 final int suggestionsLimit) { 367 long ident = Binder.clearCallingIdentity(); 368 try { 369 return onGetSuggestionsInternal(textInfo, suggestionsLimit); 370 } finally { 371 Binder.restoreCallingIdentity(ident); 372 } 373 } 374 } 375