1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.ContentResolver; 20 import android.database.ContentObserver; 21 import android.os.Binder; 22 import android.provider.UserDictionary.Words; 23 import android.service.textservice.SpellCheckerService.Session; 24 import android.text.TextUtils; 25 import android.util.Log; 26 import android.util.LruCache; 27 import android.view.textservice.SuggestionsInfo; 28 import android.view.textservice.TextInfo; 29 30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31 import com.android.inputmethod.keyboard.Keyboard; 32 import com.android.inputmethod.latin.NgramContext; 33 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 34 import com.android.inputmethod.latin.WordComposer; 35 import com.android.inputmethod.latin.common.Constants; 36 import com.android.inputmethod.latin.common.LocaleUtils; 37 import com.android.inputmethod.latin.common.StringUtils; 38 import com.android.inputmethod.latin.define.DebugFlags; 39 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 40 import com.android.inputmethod.latin.utils.ScriptUtils; 41 import com.android.inputmethod.latin.utils.StatsUtils; 42 import com.android.inputmethod.latin.utils.SuggestionResults; 43 44 import java.util.ArrayList; 45 import java.util.List; 46 import java.util.Locale; 47 48 public abstract class AndroidWordLevelSpellCheckerSession extends Session { 49 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 50 51 public final static String[] EMPTY_STRING_ARRAY = new String[0]; 52 53 // Immutable, but not available in the constructor. 54 private Locale mLocale; 55 // Cache this for performance 56 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 57 private final AndroidSpellCheckerService mService; 58 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 59 private final ContentObserver mObserver; 60 61 private static final String quotesRegexp = 62 "(\\u0022|\\u0027|\\u0060|\\u00B4|\\u2018|\\u2018|\\u201C|\\u201D)"; 63 64 private static final class SuggestionsParams { 65 public final String[] mSuggestions; 66 public final int mFlags; 67 public SuggestionsParams(String[] suggestions, int flags) { 68 mSuggestions = suggestions; 69 mFlags = flags; 70 } 71 } 72 73 protected static final class SuggestionsCache { 74 private static final int MAX_CACHE_SIZE = 50; 75 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 76 new LruCache<>(MAX_CACHE_SIZE); 77 78 private static String generateKey(final String query) { 79 return query + ""; 80 } 81 82 public SuggestionsParams getSuggestionsFromCache(final String query) { 83 return mUnigramSuggestionsInfoCache.get(query); 84 } 85 86 public void putSuggestionsToCache( 87 final String query, final String[] suggestions, final int flags) { 88 if (suggestions == null || TextUtils.isEmpty(query)) { 89 return; 90 } 91 mUnigramSuggestionsInfoCache.put( 92 generateKey(query), 93 new SuggestionsParams(suggestions, flags)); 94 } 95 96 public void clearCache() { 97 mUnigramSuggestionsInfoCache.evictAll(); 98 } 99 } 100 101 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 102 mService = service; 103 final ContentResolver cres = service.getContentResolver(); 104 105 mObserver = new ContentObserver(null) { 106 @Override 107 public void onChange(boolean self) { 108 mSuggestionsCache.clearCache(); 109 } 110 }; 111 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 112 } 113 114 @Override 115 public void onCreate() { 116 final String localeString = getLocale(); 117 mLocale = (null == localeString) ? null 118 : LocaleUtils.constructLocaleFromString(localeString); 119 mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale); 120 } 121 122 @Override 123 public void onClose() { 124 final ContentResolver cres = mService.getContentResolver(); 125 cres.unregisterContentObserver(mObserver); 126 } 127 128 private static final int CHECKABILITY_CHECKABLE = 0; 129 private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1; 130 private static final int CHECKABILITY_CONTAINS_PERIOD = 2; 131 private static final int CHECKABILITY_EMAIL_OR_URL = 3; 132 private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4; 133 private static final int CHECKABILITY_TOO_SHORT = 5; 134 /** 135 * Finds out whether a particular string should be filtered out of spell checking. 136 * 137 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 138 * we know we will never recognize, this accepts a script identifier that should be one 139 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 140 * different languages. 141 * 142 * @param text the string to evaluate. 143 * @param script the identifier for the script this spell checker recognizes 144 * @return one of the FILTER_OUT_* constants above. 145 */ 146 private static int getCheckabilityInScript(final String text, final int script) { 147 if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT; 148 149 // TODO: check if an equivalent processing can't be done more quickly with a 150 // compiled regexp. 151 // Filter by first letter 152 final int firstCodePoint = text.codePointAt(0); 153 // Filter out words that don't start with a letter or an apostrophe 154 if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script) 155 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE; 156 157 // Filter contents 158 final int length = text.length(); 159 int letterCount = 0; 160 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 161 final int codePoint = text.codePointAt(i); 162 // Any word containing a COMMERCIAL_AT is probably an e-mail address 163 // Any word containing a SLASH is probably either an ad-hoc combination of two 164 // words or a URI - in either case we don't want to spell check that 165 if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { 166 return CHECKABILITY_EMAIL_OR_URL; 167 } 168 // If the string contains a period, native returns strange suggestions (it seems 169 // to return suggestions for everything up to the period only and to ignore the 170 // rest), so we suppress lookup if there is a period. 171 // TODO: investigate why native returns these suggestions and remove this code. 172 if (Constants.CODE_PERIOD == codePoint) { 173 return CHECKABILITY_CONTAINS_PERIOD; 174 } 175 if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount; 176 } 177 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 178 // in this word are letters 179 return (letterCount * 4 < length * 3) 180 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE; 181 } 182 183 /** 184 * Helper method to test valid capitalizations of a word. 185 * 186 * If the "text" is lower-case, we test only the exact string. 187 * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased 188 * version of it "text". 189 * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased 190 * version of it "text" and the capitalized version of it "Text". 191 */ 192 private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) { 193 // If the word is in there as is, then it's in the dictionary. If not, we'll test lower 194 // case versions, but only if the word is not already all-lower case or mixed case. 195 if (mService.isValidWord(mLocale, text)) return true; 196 if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; 197 198 // If we come here, we have a capitalized word (either First- or All-). 199 // Downcase the word and look it up again. If the word is only capitalized, we 200 // tested all possibilities, so if it's still negative we can return false. 201 final String lowerCaseText = text.toLowerCase(mLocale); 202 if (mService.isValidWord(mLocale, lowerCaseText)) return true; 203 if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; 204 205 // If the lower case version is not in the dictionary, it's still possible 206 // that we have an all-caps version of a word that needs to be capitalized 207 // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans". 208 return mService.isValidWord(mLocale, 209 StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale)); 210 } 211 212 // Note : this must be reentrant 213 /** 214 * Gets a list of suggestions for a specific string. This returns a list of possible 215 * corrections for the text passed as an argument. It may split or group words, and 216 * even perform grammatical analysis. 217 */ 218 private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo, 219 final int suggestionsLimit) { 220 return onGetSuggestionsInternal(textInfo, null, suggestionsLimit); 221 } 222 223 protected SuggestionsInfo onGetSuggestionsInternal( 224 final TextInfo textInfo, final NgramContext ngramContext, final int suggestionsLimit) { 225 try { 226 final String text = textInfo.getText(). 227 replaceAll(AndroidSpellCheckerService.APOSTROPHE, 228 AndroidSpellCheckerService.SINGLE_QUOTE). 229 replaceAll("^" + quotesRegexp, ""). 230 replaceAll(quotesRegexp + "$", ""); 231 232 if (!mService.hasMainDictionaryForLocale(mLocale)) { 233 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 234 false /* reportAsTypo */); 235 } 236 237 // Handle special patterns like email, URI, telephone number. 238 final int checkability = getCheckabilityInScript(text, mScript); 239 if (CHECKABILITY_CHECKABLE != checkability) { 240 if (CHECKABILITY_CONTAINS_PERIOD == checkability) { 241 final String[] splitText = text.split(Constants.REGEXP_PERIOD); 242 boolean allWordsAreValid = true; 243 for (final String word : splitText) { 244 if (!mService.isValidWord(mLocale, word)) { 245 allWordsAreValid = false; 246 break; 247 } 248 } 249 if (allWordsAreValid) { 250 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO 251 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS, 252 new String[] { 253 TextUtils.join(Constants.STRING_SPACE, splitText) }); 254 } 255 } 256 return mService.isValidWord(mLocale, text) ? 257 AndroidSpellCheckerService.getInDictEmptySuggestions() : 258 AndroidSpellCheckerService.getNotInDictEmptySuggestions( 259 CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */); 260 } 261 262 // Handle normal words. 263 final int capitalizeType = StringUtils.getCapitalizationType(text); 264 265 if (isInDictForAnyCapitalization(text, capitalizeType)) { 266 if (DebugFlags.DEBUG_ENABLED) { 267 Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is a valid word"); 268 } 269 return AndroidSpellCheckerService.getInDictEmptySuggestions(); 270 } 271 if (DebugFlags.DEBUG_ENABLED) { 272 Log.i(TAG, "onGetSuggestionsInternal() : [" + text + "] is NOT a valid word"); 273 } 274 275 final Keyboard keyboard = mService.getKeyboardForLocale(mLocale); 276 if (null == keyboard) { 277 Log.w(TAG, "onGetSuggestionsInternal() : No keyboard for locale: " + mLocale); 278 // If there is no keyboard for this locale, don't do any spell-checking. 279 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 280 false /* reportAsTypo */); 281 } 282 283 final WordComposer composer = new WordComposer(); 284 final int[] codePoints = StringUtils.toCodePointArray(text); 285 final int[] coordinates; 286 coordinates = keyboard.getCoordinates(codePoints); 287 composer.setComposingWord(codePoints, coordinates); 288 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 289 final SuggestionResults suggestionResults = mService.getSuggestionResults( 290 mLocale, composer.getComposedDataSnapshot(), ngramContext, keyboard); 291 final Result result = getResult(capitalizeType, mLocale, suggestionsLimit, 292 mService.getRecommendedThreshold(), text, suggestionResults); 293 if (DebugFlags.DEBUG_ENABLED) { 294 if (result.mSuggestions != null && result.mSuggestions.length > 0) { 295 final StringBuilder builder = new StringBuilder(); 296 for (String suggestion : result.mSuggestions) { 297 builder.append(" ["); 298 builder.append(suggestion); 299 builder.append("]"); 300 } 301 Log.i(TAG, "onGetSuggestionsInternal() : Suggestions =" + builder); 302 } 303 } 304 // Handle word not in dictionary. 305 // This is called only once per unique word, so entering multiple 306 // instances of the same word does not result in more than one call 307 // to this method. 308 // Also, upon changing the orientation of the device, this is called 309 // again for every unique invalid word in the text box. 310 StatsUtils.onInvalidWordIdentification(text); 311 312 final int flags = 313 SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO 314 | (result.mHasRecommendedSuggestions 315 ? SuggestionsInfoCompatUtils 316 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 317 : 0); 318 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 319 mSuggestionsCache.putSuggestionsToCache(text, result.mSuggestions, flags); 320 return retval; 321 } catch (RuntimeException e) { 322 // Don't kill the keyboard if there is a bug in the spell checker 323 Log.e(TAG, "Exception while spellchecking", e); 324 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 325 false /* reportAsTypo */); 326 } 327 } 328 329 private static final class Result { 330 public final String[] mSuggestions; 331 public final boolean mHasRecommendedSuggestions; 332 public Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions) { 333 mSuggestions = gatheredSuggestions; 334 mHasRecommendedSuggestions = hasRecommendedSuggestions; 335 } 336 } 337 338 private static Result getResult(final int capitalizeType, final Locale locale, 339 final int suggestionsLimit, final float recommendedThreshold, final String originalText, 340 final SuggestionResults suggestionResults) { 341 if (suggestionResults.isEmpty() || suggestionsLimit <= 0) { 342 return new Result(null /* gatheredSuggestions */, 343 false /* hasRecommendedSuggestions */); 344 } 345 final ArrayList<String> suggestions = new ArrayList<>(); 346 for (final SuggestedWordInfo suggestedWordInfo : suggestionResults) { 347 final String suggestion; 348 if (StringUtils.CAPITALIZE_ALL == capitalizeType) { 349 suggestion = suggestedWordInfo.mWord.toUpperCase(locale); 350 } else if (StringUtils.CAPITALIZE_FIRST == capitalizeType) { 351 suggestion = StringUtils.capitalizeFirstCodePoint( 352 suggestedWordInfo.mWord, locale); 353 } else { 354 suggestion = suggestedWordInfo.mWord; 355 } 356 suggestions.add(suggestion); 357 } 358 StringUtils.removeDupes(suggestions); 359 // This returns a String[], while toArray() returns an Object[] which cannot be cast 360 // into a String[]. 361 final List<String> gatheredSuggestionsList = 362 suggestions.subList(0, Math.min(suggestions.size(), suggestionsLimit)); 363 final String[] gatheredSuggestions = 364 gatheredSuggestionsList.toArray(new String[gatheredSuggestionsList.size()]); 365 366 final int bestScore = suggestionResults.first().mScore; 367 final String bestSuggestion = suggestions.get(0); 368 final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( 369 originalText, bestSuggestion, bestScore); 370 final boolean hasRecommendedSuggestions = (normalizedScore > recommendedThreshold); 371 return new Result(gatheredSuggestions, hasRecommendedSuggestions); 372 } 373 374 /* 375 * The spell checker acts on its own behalf. That is needed, in particular, to be able to 376 * access the dictionary files, which the provider restricts to the identity of Latin IME. 377 * Since it's called externally by the application, the spell checker is using the identity 378 * of the application by default unless we clearCallingIdentity. 379 * That's what the following method does. 380 */ 381 @Override 382 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit) { 383 long ident = Binder.clearCallingIdentity(); 384 try { 385 return onGetSuggestionsInternal(textInfo, suggestionsLimit); 386 } finally { 387 Binder.restoreCallingIdentity(ident); 388 } 389 } 390 } 391