1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.ContentResolver; 20 import android.database.ContentObserver; 21 import android.os.Binder; 22 import android.provider.UserDictionary.Words; 23 import android.service.textservice.SpellCheckerService.Session; 24 import android.text.TextUtils; 25 import android.util.Log; 26 import android.util.LruCache; 27 import android.view.textservice.SuggestionsInfo; 28 import android.view.textservice.TextInfo; 29 30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31 import com.android.inputmethod.keyboard.Keyboard; 32 import com.android.inputmethod.keyboard.ProximityInfo; 33 import com.android.inputmethod.latin.Constants; 34 import com.android.inputmethod.latin.PrevWordsInfo; 35 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 36 import com.android.inputmethod.latin.WordComposer; 37 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 38 import com.android.inputmethod.latin.utils.CoordinateUtils; 39 import com.android.inputmethod.latin.utils.LocaleUtils; 40 import com.android.inputmethod.latin.utils.ScriptUtils; 41 import com.android.inputmethod.latin.utils.StringUtils; 42 import com.android.inputmethod.latin.utils.SuggestionResults; 43 44 import java.util.ArrayList; 45 import java.util.Locale; 46 47 public abstract class AndroidWordLevelSpellCheckerSession extends Session { 48 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 49 private static final boolean DBG = false; 50 51 public final static String[] EMPTY_STRING_ARRAY = new String[0]; 52 53 // Immutable, but not available in the constructor. 54 private Locale mLocale; 55 // Cache this for performance 56 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 57 private final AndroidSpellCheckerService mService; 58 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 59 private final ContentObserver mObserver; 60 61 private static final class SuggestionsParams { 62 public final String[] mSuggestions; 63 public final int mFlags; 64 public SuggestionsParams(String[] suggestions, int flags) { 65 mSuggestions = suggestions; 66 mFlags = flags; 67 } 68 } 69 70 protected static final class SuggestionsCache { 71 private static final char CHAR_DELIMITER = '\uFFFC'; 72 private static final int MAX_CACHE_SIZE = 50; 73 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 74 new LruCache<>(MAX_CACHE_SIZE); 75 76 // TODO: Support n-gram input 77 private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) { 78 if (TextUtils.isEmpty(query) || !prevWordsInfo.isValid()) { 79 return query; 80 } 81 return query + CHAR_DELIMITER + prevWordsInfo; 82 } 83 84 public SuggestionsParams getSuggestionsFromCache(String query, 85 final PrevWordsInfo prevWordsInfo) { 86 return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo)); 87 } 88 89 public void putSuggestionsToCache( 90 final String query, final PrevWordsInfo prevWordsInfo, 91 final String[] suggestions, final int flags) { 92 if (suggestions == null || TextUtils.isEmpty(query)) { 93 return; 94 } 95 mUnigramSuggestionsInfoCache.put( 96 generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags)); 97 } 98 99 public void clearCache() { 100 mUnigramSuggestionsInfoCache.evictAll(); 101 } 102 } 103 104 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 105 mService = service; 106 final ContentResolver cres = service.getContentResolver(); 107 108 mObserver = new ContentObserver(null) { 109 @Override 110 public void onChange(boolean self) { 111 mSuggestionsCache.clearCache(); 112 } 113 }; 114 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 115 } 116 117 @Override 118 public void onCreate() { 119 final String localeString = getLocale(); 120 mLocale = LocaleUtils.constructLocaleFromString(localeString); 121 mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale); 122 } 123 124 @Override 125 public void onClose() { 126 final ContentResolver cres = mService.getContentResolver(); 127 cres.unregisterContentObserver(mObserver); 128 } 129 130 private static final int CHECKABILITY_CHECKABLE = 0; 131 private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1; 132 private static final int CHECKABILITY_CONTAINS_PERIOD = 2; 133 private static final int CHECKABILITY_EMAIL_OR_URL = 3; 134 private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4; 135 private static final int CHECKABILITY_TOO_SHORT = 5; 136 /** 137 * Finds out whether a particular string should be filtered out of spell checking. 138 * 139 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 140 * we know we will never recognize, this accepts a script identifier that should be one 141 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 142 * different languages. 143 * 144 * @param text the string to evaluate. 145 * @param script the identifier for the script this spell checker recognizes 146 * @return one of the FILTER_OUT_* constants above. 147 */ 148 private static int getCheckabilityInScript(final String text, final int script) { 149 if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT; 150 151 // TODO: check if an equivalent processing can't be done more quickly with a 152 // compiled regexp. 153 // Filter by first letter 154 final int firstCodePoint = text.codePointAt(0); 155 // Filter out words that don't start with a letter or an apostrophe 156 if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script) 157 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE; 158 159 // Filter contents 160 final int length = text.length(); 161 int letterCount = 0; 162 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 163 final int codePoint = text.codePointAt(i); 164 // Any word containing a COMMERCIAL_AT is probably an e-mail address 165 // Any word containing a SLASH is probably either an ad-hoc combination of two 166 // words or a URI - in either case we don't want to spell check that 167 if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { 168 return CHECKABILITY_EMAIL_OR_URL; 169 } 170 // If the string contains a period, native returns strange suggestions (it seems 171 // to return suggestions for everything up to the period only and to ignore the 172 // rest), so we suppress lookup if there is a period. 173 // TODO: investigate why native returns these suggestions and remove this code. 174 if (Constants.CODE_PERIOD == codePoint) { 175 return CHECKABILITY_CONTAINS_PERIOD; 176 } 177 if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount; 178 } 179 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 180 // in this word are letters 181 return (letterCount * 4 < length * 3) 182 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE; 183 } 184 185 /** 186 * Helper method to test valid capitalizations of a word. 187 * 188 * If the "text" is lower-case, we test only the exact string. 189 * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased 190 * version of it "text". 191 * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased 192 * version of it "text" and the capitalized version of it "Text". 193 */ 194 private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) { 195 // If the word is in there as is, then it's in the dictionary. If not, we'll test lower 196 // case versions, but only if the word is not already all-lower case or mixed case. 197 if (mService.isValidWord(mLocale, text)) return true; 198 if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; 199 200 // If we come here, we have a capitalized word (either First- or All-). 201 // Downcase the word and look it up again. If the word is only capitalized, we 202 // tested all possibilities, so if it's still negative we can return false. 203 final String lowerCaseText = text.toLowerCase(mLocale); 204 if (mService.isValidWord(mLocale, lowerCaseText)) return true; 205 if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; 206 207 // If the lower case version is not in the dictionary, it's still possible 208 // that we have an all-caps version of a word that needs to be capitalized 209 // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans". 210 return mService.isValidWord(mLocale, 211 StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale)); 212 } 213 214 // Note : this must be reentrant 215 /** 216 * Gets a list of suggestions for a specific string. This returns a list of possible 217 * corrections for the text passed as an argument. It may split or group words, and 218 * even perform grammatical analysis. 219 */ 220 private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo, 221 final int suggestionsLimit) { 222 return onGetSuggestionsInternal(textInfo, null, suggestionsLimit); 223 } 224 225 protected SuggestionsInfo onGetSuggestionsInternal( 226 final TextInfo textInfo, final PrevWordsInfo prevWordsInfo, 227 final int suggestionsLimit) { 228 try { 229 final String inText = textInfo.getText(); 230 final SuggestionsParams cachedSuggestionsParams = 231 mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo); 232 if (cachedSuggestionsParams != null) { 233 if (DBG) { 234 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 235 } 236 return new SuggestionsInfo( 237 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 238 } 239 final int checkability = getCheckabilityInScript(inText, mScript); 240 if (CHECKABILITY_CHECKABLE != checkability) { 241 if (CHECKABILITY_CONTAINS_PERIOD == checkability) { 242 final String[] splitText = inText.split(Constants.REGEXP_PERIOD); 243 boolean allWordsAreValid = true; 244 for (final String word : splitText) { 245 if (!mService.isValidWord(mLocale, word)) { 246 allWordsAreValid = false; 247 break; 248 } 249 } 250 if (allWordsAreValid) { 251 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO 252 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS, 253 new String[] { 254 TextUtils.join(Constants.STRING_SPACE, splitText) }); 255 } 256 } 257 return mService.isValidWord(mLocale, inText) ? 258 AndroidSpellCheckerService.getInDictEmptySuggestions() : 259 AndroidSpellCheckerService.getNotInDictEmptySuggestions( 260 CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */); 261 } 262 final String text = inText.replaceAll( 263 AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); 264 final int capitalizeType = StringUtils.getCapitalizationType(text); 265 boolean isInDict = true; 266 if (!mService.hasMainDictionaryForLocale(mLocale)) { 267 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 268 false /* reportAsTypo */); 269 } 270 final Keyboard keyboard = mService.getKeyboardForLocale(mLocale); 271 final WordComposer composer = new WordComposer(); 272 final int[] codePoints = StringUtils.toCodePointArray(text); 273 final int[] coordinates; 274 final ProximityInfo proximityInfo; 275 if (null == keyboard) { 276 coordinates = CoordinateUtils.newCoordinateArray(codePoints.length, 277 Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE); 278 proximityInfo = null; 279 } else { 280 coordinates = keyboard.getCoordinates(codePoints); 281 proximityInfo = keyboard.getProximityInfo(); 282 } 283 composer.setComposingWord(codePoints, coordinates); 284 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 285 final SuggestionResults suggestionResults = mService.getSuggestionResults( 286 mLocale, composer, prevWordsInfo, proximityInfo); 287 final Result result = getResult(capitalizeType, mLocale, suggestionsLimit, 288 mService.getRecommendedThreshold(), text, suggestionResults); 289 isInDict = isInDictForAnyCapitalization(text, capitalizeType); 290 if (DBG) { 291 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 292 + suggestionsLimit); 293 Log.i(TAG, "IsInDict = " + isInDict); 294 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 295 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 296 if (null != result.mSuggestions) { 297 for (String suggestion : result.mSuggestions) { 298 Log.i(TAG, suggestion); 299 } 300 } 301 } 302 303 final int flags = 304 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 305 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 306 | (result.mHasRecommendedSuggestions 307 ? SuggestionsInfoCompatUtils 308 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 309 : 0); 310 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 311 mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions, 312 flags); 313 return retval; 314 } catch (RuntimeException e) { 315 // Don't kill the keyboard if there is a bug in the spell checker 316 if (DBG) { 317 throw e; 318 } else { 319 Log.e(TAG, "Exception while spellcheking", e); 320 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 321 false /* reportAsTypo */); 322 } 323 } 324 } 325 326 private static final class Result { 327 public final String[] mSuggestions; 328 public final boolean mHasRecommendedSuggestions; 329 public Result(final String[] gatheredSuggestions, 330 final boolean hasRecommendedSuggestions) { 331 mSuggestions = gatheredSuggestions; 332 mHasRecommendedSuggestions = hasRecommendedSuggestions; 333 } 334 } 335 336 private static Result getResult(final int capitalizeType, final Locale locale, 337 final int suggestionsLimit, final float recommendedThreshold, final String originalText, 338 final SuggestionResults suggestionResults) { 339 if (suggestionResults.isEmpty() || suggestionsLimit <= 0) { 340 return new Result(null /* gatheredSuggestions */, 341 false /* hasRecommendedSuggestions */); 342 } 343 if (DBG) { 344 for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) { 345 Log.i(TAG, "" + suggestedWordInfo.mScore + " " + suggestedWordInfo.mWord); 346 } 347 } 348 final ArrayList<String> suggestions = new ArrayList<>(); 349 for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) { 350 final String suggestion; 351 if (StringUtils.CAPITALIZE_ALL == capitalizeType) { 352 suggestion = suggestedWordInfo.mWord.toUpperCase(locale); 353 } else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) { 354 suggestion = StringUtils.capitalizeFirstCodePoint( 355 suggestedWordInfo.mWord, locale); 356 } else { 357 suggestion = suggestedWordInfo.mWord; 358 } 359 suggestions.add(suggestion); 360 } 361 StringUtils.removeDupes(suggestions); 362 // This returns a String[], while toArray() returns an Object[] which cannot be cast 363 // into a String[]. 364 final String[] gatheredSuggestions = 365 suggestions.subList(0, Math.min(suggestions.size(), suggestionsLimit)) 366 .toArray(EMPTY_STRING_ARRAY); 367 368 final int bestScore = suggestionResults.first().mScore; 369 final String bestSuggestion = suggestions.get(0); 370 final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( 371 originalText, bestSuggestion.toString(), bestScore); 372 final boolean hasRecommendedSuggestions = (normalizedScore > recommendedThreshold); 373 if (DBG) { 374 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 375 Log.i(TAG, "Normalized score = " + normalizedScore 376 + " (threshold " + recommendedThreshold 377 + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions); 378 } 379 return new Result(gatheredSuggestions, hasRecommendedSuggestions); 380 } 381 382 /* 383 * The spell checker acts on its own behalf. That is needed, in particular, to be able to 384 * access the dictionary files, which the provider restricts to the identity of Latin IME. 385 * Since it's called externally by the application, the spell checker is using the identity 386 * of the application by default unless we clearCallingIdentity. 387 * That's what the following method does. 388 */ 389 @Override 390 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 391 final int suggestionsLimit) { 392 long ident = Binder.clearCallingIdentity(); 393 try { 394 return onGetSuggestionsInternal(textInfo, suggestionsLimit); 395 } finally { 396 Binder.restoreCallingIdentity(ident); 397 } 398 } 399 } 400