1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.ContentResolver; 20 import android.content.Intent; 21 import android.content.SharedPreferences; 22 import android.database.ContentObserver; 23 import android.preference.PreferenceManager; 24 import android.provider.UserDictionary.Words; 25 import android.service.textservice.SpellCheckerService; 26 import android.text.TextUtils; 27 import android.util.Log; 28 import android.util.LruCache; 29 import android.view.textservice.SentenceSuggestionsInfo; 30 import android.view.textservice.SuggestionsInfo; 31 import android.view.textservice.TextInfo; 32 33 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 34 import com.android.inputmethod.keyboard.ProximityInfo; 35 import com.android.inputmethod.latin.BinaryDictionary; 36 import com.android.inputmethod.latin.Dictionary; 37 import com.android.inputmethod.latin.Dictionary.WordCallback; 38 import com.android.inputmethod.latin.DictionaryCollection; 39 import com.android.inputmethod.latin.DictionaryFactory; 40 import com.android.inputmethod.latin.LatinIME; 41 import com.android.inputmethod.latin.LocaleUtils; 42 import com.android.inputmethod.latin.R; 43 import com.android.inputmethod.latin.StringUtils; 44 import com.android.inputmethod.latin.SynchronouslyLoadedContactsBinaryDictionary; 45 import com.android.inputmethod.latin.SynchronouslyLoadedContactsDictionary; 46 import com.android.inputmethod.latin.SynchronouslyLoadedUserBinaryDictionary; 47 import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary; 48 import com.android.inputmethod.latin.WhitelistDictionary; 49 import com.android.inputmethod.latin.WordComposer; 50 51 import java.lang.ref.WeakReference; 52 import java.util.ArrayList; 53 import java.util.Arrays; 54 import java.util.Collections; 55 import java.util.HashSet; 56 import java.util.Iterator; 57 import java.util.Locale; 58 import java.util.Map; 59 import java.util.TreeMap; 60 61 /** 62 * Service for spell checking, using LatinIME's dictionaries and mechanisms. 63 */ 64 public class AndroidSpellCheckerService extends SpellCheckerService 65 implements SharedPreferences.OnSharedPreferenceChangeListener { 66 private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); 67 private static final boolean DBG = false; 68 private static final int POOL_SIZE = 2; 69 70 public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts"; 71 72 private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 73 private static final int CAPITALIZE_FIRST = 1; // First only 74 private static final int CAPITALIZE_ALL = 2; // All caps 75 76 private final static String[] EMPTY_STRING_ARRAY = new String[0]; 77 private Map<String, DictionaryPool> mDictionaryPools = 78 Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 79 private Map<String, Dictionary> mUserDictionaries = 80 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 81 private Map<String, Dictionary> mWhitelistDictionaries = 82 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 83 private Dictionary mContactsDictionary; 84 85 // The threshold for a candidate to be offered as a suggestion. 86 private float mSuggestionThreshold; 87 // The threshold for a suggestion to be considered "recommended". 88 private float mRecommendedThreshold; 89 // Whether to use the contacts dictionary 90 private boolean mUseContactsDictionary; 91 private final Object mUseContactsLock = new Object(); 92 93 private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList = 94 new HashSet<WeakReference<DictionaryCollection>>(); 95 96 public static final int SCRIPT_LATIN = 0; 97 public static final int SCRIPT_CYRILLIC = 1; 98 private static final String SINGLE_QUOTE = "\u0027"; 99 private static final String APOSTROPHE = "\u2019"; 100 private static final TreeMap<String, Integer> mLanguageToScript; 101 static { 102 // List of the supported languages and their associated script. We won't check 103 // words written in another script than the selected script, because we know we 104 // don't have those in our dictionary so we will underline everything and we 105 // will never have any suggestions, so it makes no sense checking them, and this 106 // is done in {@link #shouldFilterOut}. Also, the script is used to choose which 107 // proximity to pass to the dictionary descent algorithm. 108 // IMPORTANT: this only contains languages - do not write countries in there. 109 // Only the language is searched from the map. 110 mLanguageToScript = new TreeMap<String, Integer>(); 111 mLanguageToScript.put("en", SCRIPT_LATIN); 112 mLanguageToScript.put("fr", SCRIPT_LATIN); 113 mLanguageToScript.put("de", SCRIPT_LATIN); 114 mLanguageToScript.put("nl", SCRIPT_LATIN); 115 mLanguageToScript.put("cs", SCRIPT_LATIN); 116 mLanguageToScript.put("es", SCRIPT_LATIN); 117 mLanguageToScript.put("it", SCRIPT_LATIN); 118 mLanguageToScript.put("hr", SCRIPT_LATIN); 119 mLanguageToScript.put("pt", SCRIPT_LATIN); 120 mLanguageToScript.put("ru", SCRIPT_CYRILLIC); 121 // TODO: Make a persian proximity, and activate the Farsi subtype. 122 // mLanguageToScript.put("fa", SCRIPT_PERSIAN); 123 } 124 125 @Override public void onCreate() { 126 super.onCreate(); 127 mSuggestionThreshold = 128 Float.parseFloat(getString(R.string.spellchecker_suggestion_threshold_value)); 129 mRecommendedThreshold = 130 Float.parseFloat(getString(R.string.spellchecker_recommended_threshold_value)); 131 final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this); 132 prefs.registerOnSharedPreferenceChangeListener(this); 133 onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY); 134 } 135 136 private static int getScriptFromLocale(final Locale locale) { 137 final Integer script = mLanguageToScript.get(locale.getLanguage()); 138 if (null == script) { 139 throw new RuntimeException("We have been called with an unsupported language: \"" 140 + locale.getLanguage() + "\". Framework bug?"); 141 } 142 return script; 143 } 144 145 @Override 146 public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) { 147 if (!PREF_USE_CONTACTS_KEY.equals(key)) return; 148 synchronized(mUseContactsLock) { 149 mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true); 150 if (mUseContactsDictionary) { 151 startUsingContactsDictionaryLocked(); 152 } else { 153 stopUsingContactsDictionaryLocked(); 154 } 155 } 156 } 157 158 private void startUsingContactsDictionaryLocked() { 159 if (null == mContactsDictionary) { 160 if (LatinIME.USE_BINARY_CONTACTS_DICTIONARY) { 161 // TODO: use the right locale for each session 162 mContactsDictionary = 163 new SynchronouslyLoadedContactsBinaryDictionary(this, Locale.getDefault()); 164 } else { 165 mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); 166 } 167 } 168 final Iterator<WeakReference<DictionaryCollection>> iterator = 169 mDictionaryCollectionsList.iterator(); 170 while (iterator.hasNext()) { 171 final WeakReference<DictionaryCollection> dictRef = iterator.next(); 172 final DictionaryCollection dict = dictRef.get(); 173 if (null == dict) { 174 iterator.remove(); 175 } else { 176 dict.addDictionary(mContactsDictionary); 177 } 178 } 179 } 180 181 private void stopUsingContactsDictionaryLocked() { 182 if (null == mContactsDictionary) return; 183 final Dictionary contactsDict = mContactsDictionary; 184 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no longer needed 185 mContactsDictionary = null; 186 final Iterator<WeakReference<DictionaryCollection>> iterator = 187 mDictionaryCollectionsList.iterator(); 188 while (iterator.hasNext()) { 189 final WeakReference<DictionaryCollection> dictRef = iterator.next(); 190 final DictionaryCollection dict = dictRef.get(); 191 if (null == dict) { 192 iterator.remove(); 193 } else { 194 dict.removeDictionary(contactsDict); 195 } 196 } 197 contactsDict.close(); 198 } 199 200 @Override 201 public Session createSession() { 202 return new AndroidSpellCheckerSession(this); 203 } 204 205 private static SuggestionsInfo getNotInDictEmptySuggestions() { 206 return new SuggestionsInfo(0, EMPTY_STRING_ARRAY); 207 } 208 209 private static SuggestionsInfo getInDictEmptySuggestions() { 210 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, 211 EMPTY_STRING_ARRAY); 212 } 213 214 private static class SuggestionsGatherer implements WordCallback { 215 public static class Result { 216 public final String[] mSuggestions; 217 public final boolean mHasRecommendedSuggestions; 218 public Result(final String[] gatheredSuggestions, 219 final boolean hasRecommendedSuggestions) { 220 mSuggestions = gatheredSuggestions; 221 mHasRecommendedSuggestions = hasRecommendedSuggestions; 222 } 223 } 224 225 private final ArrayList<CharSequence> mSuggestions; 226 private final int[] mScores; 227 private final String mOriginalText; 228 private final float mSuggestionThreshold; 229 private final float mRecommendedThreshold; 230 private final int mMaxLength; 231 private int mLength = 0; 232 233 // The two following attributes are only ever filled if the requested max length 234 // is 0 (or less, which is treated the same). 235 private String mBestSuggestion = null; 236 private int mBestScore = Integer.MIN_VALUE; // As small as possible 237 238 SuggestionsGatherer(final String originalText, final float suggestionThreshold, 239 final float recommendedThreshold, final int maxLength) { 240 mOriginalText = originalText; 241 mSuggestionThreshold = suggestionThreshold; 242 mRecommendedThreshold = recommendedThreshold; 243 mMaxLength = maxLength; 244 mSuggestions = new ArrayList<CharSequence>(maxLength + 1); 245 mScores = new int[mMaxLength]; 246 } 247 248 @Override 249 synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, 250 int dicTypeId, int dataType) { 251 final int positionIndex = Arrays.binarySearch(mScores, 0, mLength, score); 252 // binarySearch returns the index if the element exists, and -<insertion index> - 1 253 // if it doesn't. See documentation for binarySearch. 254 final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; 255 256 if (insertIndex == 0 && mLength >= mMaxLength) { 257 // In the future, we may want to keep track of the best suggestion score even if 258 // we are asked for 0 suggestions. In this case, we can use the following 259 // (tested) code to keep it: 260 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) 261 // then we need to keep track of the best suggestion in mBestScore and 262 // mBestSuggestion. This is so that we know whether the best suggestion makes 263 // the score cutoff, since we need to know that to return a meaningful 264 // looksLikeTypo. 265 // if (0 >= mMaxLength) { 266 // if (score > mBestScore) { 267 // mBestScore = score; 268 // mBestSuggestion = new String(word, wordOffset, wordLength); 269 // } 270 // } 271 return true; 272 } 273 if (insertIndex >= mMaxLength) { 274 // We found a suggestion, but its score is too weak to be kept considering 275 // the suggestion limit. 276 return true; 277 } 278 279 // Compute the normalized score and skip this word if it's normalized score does not 280 // make the threshold. 281 final String wordString = new String(word, wordOffset, wordLength); 282 final float normalizedScore = 283 BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score); 284 if (normalizedScore < mSuggestionThreshold) { 285 if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); 286 return true; 287 } 288 289 if (mLength < mMaxLength) { 290 final int copyLen = mLength - insertIndex; 291 ++mLength; 292 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); 293 mSuggestions.add(insertIndex, wordString); 294 } else { 295 System.arraycopy(mScores, 1, mScores, 0, insertIndex); 296 mSuggestions.add(insertIndex, wordString); 297 mSuggestions.remove(0); 298 } 299 mScores[insertIndex] = score; 300 301 return true; 302 } 303 304 public Result getResults(final int capitalizeType, final Locale locale) { 305 final String[] gatheredSuggestions; 306 final boolean hasRecommendedSuggestions; 307 if (0 == mLength) { 308 // Either we found no suggestions, or we found some BUT the max length was 0. 309 // If we found some mBestSuggestion will not be null. If it is null, then 310 // we found none, regardless of the max length. 311 if (null == mBestSuggestion) { 312 gatheredSuggestions = null; 313 hasRecommendedSuggestions = false; 314 } else { 315 gatheredSuggestions = EMPTY_STRING_ARRAY; 316 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 317 mOriginalText, mBestSuggestion, mBestScore); 318 hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); 319 } 320 } else { 321 if (DBG) { 322 if (mLength != mSuggestions.size()) { 323 Log.e(TAG, "Suggestion size is not the same as stored mLength"); 324 } 325 for (int i = mLength - 1; i >= 0; --i) { 326 Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); 327 } 328 } 329 Collections.reverse(mSuggestions); 330 StringUtils.removeDupes(mSuggestions); 331 if (CAPITALIZE_ALL == capitalizeType) { 332 for (int i = 0; i < mSuggestions.size(); ++i) { 333 // get(i) returns a CharSequence which is actually a String so .toString() 334 // should return the same object. 335 mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); 336 } 337 } else if (CAPITALIZE_FIRST == capitalizeType) { 338 for (int i = 0; i < mSuggestions.size(); ++i) { 339 // Likewise 340 mSuggestions.set(i, StringUtils.toTitleCase( 341 mSuggestions.get(i).toString(), locale)); 342 } 343 } 344 // This returns a String[], while toArray() returns an Object[] which cannot be cast 345 // into a String[]. 346 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); 347 348 final int bestScore = mScores[mLength - 1]; 349 final CharSequence bestSuggestion = mSuggestions.get(0); 350 final float normalizedScore = 351 BinaryDictionary.calcNormalizedScore( 352 mOriginalText, bestSuggestion.toString(), bestScore); 353 hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); 354 if (DBG) { 355 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 356 Log.i(TAG, "Normalized score = " + normalizedScore 357 + " (threshold " + mRecommendedThreshold 358 + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions); 359 } 360 } 361 return new Result(gatheredSuggestions, hasRecommendedSuggestions); 362 } 363 } 364 365 @Override 366 public boolean onUnbind(final Intent intent) { 367 closeAllDictionaries(); 368 return false; 369 } 370 371 private void closeAllDictionaries() { 372 final Map<String, DictionaryPool> oldPools = mDictionaryPools; 373 mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 374 final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries; 375 mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 376 final Map<String, Dictionary> oldWhitelistDictionaries = mWhitelistDictionaries; 377 mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 378 new Thread("spellchecker_close_dicts") { 379 @Override 380 public void run() { 381 for (DictionaryPool pool : oldPools.values()) { 382 pool.close(); 383 } 384 for (Dictionary dict : oldUserDictionaries.values()) { 385 dict.close(); 386 } 387 for (Dictionary dict : oldWhitelistDictionaries.values()) { 388 dict.close(); 389 } 390 synchronized (mUseContactsLock) { 391 if (null != mContactsDictionary) { 392 // The synchronously loaded contacts dictionary should have been in one 393 // or several pools, but it is shielded against multiple closing and it's 394 // safe to call it several times. 395 final Dictionary dictToClose = mContactsDictionary; 396 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY 397 // is no longer needed 398 mContactsDictionary = null; 399 dictToClose.close(); 400 } 401 } 402 } 403 }.start(); 404 } 405 406 private DictionaryPool getDictionaryPool(final String locale) { 407 DictionaryPool pool = mDictionaryPools.get(locale); 408 if (null == pool) { 409 final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); 410 pool = new DictionaryPool(POOL_SIZE, this, localeObject); 411 mDictionaryPools.put(locale, pool); 412 } 413 return pool; 414 } 415 416 public DictAndProximity createDictAndProximity(final Locale locale) { 417 final int script = getScriptFromLocale(locale); 418 final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo( 419 SpellCheckerProximityInfo.getProximityForScript(script), 420 SpellCheckerProximityInfo.ROW_SIZE, 421 SpellCheckerProximityInfo.PROXIMITY_GRID_WIDTH, 422 SpellCheckerProximityInfo.PROXIMITY_GRID_HEIGHT); 423 final DictionaryCollection dictionaryCollection = 424 DictionaryFactory.createMainDictionaryFromManager(this, locale, 425 true /* useFullEditDistance */); 426 final String localeStr = locale.toString(); 427 Dictionary userDictionary = mUserDictionaries.get(localeStr); 428 if (null == userDictionary) { 429 if (LatinIME.USE_BINARY_USER_DICTIONARY) { 430 userDictionary = new SynchronouslyLoadedUserBinaryDictionary(this, localeStr, true); 431 } else { 432 userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true); 433 } 434 mUserDictionaries.put(localeStr, userDictionary); 435 } 436 dictionaryCollection.addDictionary(userDictionary); 437 Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr); 438 if (null == whitelistDictionary) { 439 whitelistDictionary = new WhitelistDictionary(this, locale); 440 mWhitelistDictionaries.put(localeStr, whitelistDictionary); 441 } 442 dictionaryCollection.addDictionary(whitelistDictionary); 443 synchronized (mUseContactsLock) { 444 if (mUseContactsDictionary) { 445 if (null == mContactsDictionary) { 446 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no 447 // longer needed 448 if (LatinIME.USE_BINARY_CONTACTS_DICTIONARY) { 449 // TODO: use the right locale. We can't do it right now because the 450 // spell checker is reusing the contacts dictionary across sessions 451 // without regard for their locale, so we need to fix that first. 452 mContactsDictionary = new SynchronouslyLoadedContactsBinaryDictionary(this, 453 Locale.getDefault()); 454 } else { 455 mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); 456 } 457 } 458 } 459 dictionaryCollection.addDictionary(mContactsDictionary); 460 mDictionaryCollectionsList.add( 461 new WeakReference<DictionaryCollection>(dictionaryCollection)); 462 } 463 return new DictAndProximity(dictionaryCollection, proximityInfo); 464 } 465 466 // This method assumes the text is not empty or null. 467 private static int getCapitalizationType(String text) { 468 // If the first char is not uppercase, then the word is either all lower case, 469 // and in either case we return CAPITALIZE_NONE. 470 if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; 471 final int len = text.length(); 472 int capsCount = 1; 473 for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) { 474 if (1 != capsCount && i != capsCount) break; 475 if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; 476 } 477 // We know the first char is upper case. So we want to test if either everything 478 // else is lower case, or if everything else is upper case. If the string is 479 // exactly one char long, then we will arrive here with capsCount 1, and this is 480 // correct, too. 481 if (1 == capsCount) return CAPITALIZE_FIRST; 482 return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 483 } 484 485 private static class AndroidSpellCheckerSession extends Session { 486 // Immutable, but need the locale which is not available in the constructor yet 487 private DictionaryPool mDictionaryPool; 488 // Likewise 489 private Locale mLocale; 490 // Cache this for performance 491 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 492 493 private final AndroidSpellCheckerService mService; 494 495 private final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 496 private final ContentObserver mObserver; 497 498 private static class SuggestionsParams { 499 public final String[] mSuggestions; 500 public final int mFlags; 501 public SuggestionsParams(String[] suggestions, int flags) { 502 mSuggestions = suggestions; 503 mFlags = flags; 504 } 505 } 506 507 private static class SuggestionsCache { 508 private static final int MAX_CACHE_SIZE = 50; 509 // TODO: support bigram 510 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 511 new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE); 512 513 public SuggestionsParams getSuggestionsFromCache(String query) { 514 return mUnigramSuggestionsInfoCache.get(query); 515 } 516 517 public void putSuggestionsToCache(String query, String[] suggestions, int flags) { 518 if (suggestions == null || TextUtils.isEmpty(query)) { 519 return; 520 } 521 mUnigramSuggestionsInfoCache.put(query, new SuggestionsParams(suggestions, flags)); 522 } 523 524 public void clearCache() { 525 mUnigramSuggestionsInfoCache.evictAll(); 526 } 527 } 528 529 AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { 530 mService = service; 531 final ContentResolver cres = service.getContentResolver(); 532 533 mObserver = new ContentObserver(null) { 534 @Override 535 public void onChange(boolean self) { 536 mSuggestionsCache.clearCache(); 537 } 538 }; 539 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 540 } 541 542 @Override 543 public void onCreate() { 544 final String localeString = getLocale(); 545 mDictionaryPool = mService.getDictionaryPool(localeString); 546 mLocale = LocaleUtils.constructLocaleFromString(localeString); 547 mScript = getScriptFromLocale(mLocale); 548 } 549 550 @Override 551 public void onClose() { 552 final ContentResolver cres = mService.getContentResolver(); 553 cres.unregisterContentObserver(mObserver); 554 } 555 556 /* 557 * Returns whether the code point is a letter that makes sense for the specified 558 * locale for this spell checker. 559 * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml 560 * and is limited to EFIGS languages and Russian. 561 * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters 562 * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. 563 */ 564 private static boolean isLetterCheckableByLanguage(final int codePoint, 565 final int script) { 566 switch (script) { 567 case SCRIPT_LATIN: 568 // Our supported latin script dictionaries (EFIGS) at the moment only include 569 // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode 570 // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, 571 // so the below is a very efficient way to test for it. As for the 0-0x3F, it's 572 // excluded from isLetter anyway. 573 return codePoint <= 0x2AF && Character.isLetter(codePoint); 574 case SCRIPT_CYRILLIC: 575 // All Cyrillic characters are in the 400~52F block. There are some in the upper 576 // Unicode range, but they are archaic characters that are not used in modern 577 // russian and are not used by our dictionary. 578 return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); 579 default: 580 // Should never come here 581 throw new RuntimeException("Impossible value of script: " + script); 582 } 583 } 584 585 /** 586 * Finds out whether a particular string should be filtered out of spell checking. 587 * 588 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 589 * we know we will never recognize, this accepts a script identifier that should be one 590 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 591 * different languages. 592 * 593 * @param text the string to evaluate. 594 * @param script the identifier for the script this spell checker recognizes 595 * @return true if we should filter this text out, false otherwise 596 */ 597 private static boolean shouldFilterOut(final String text, final int script) { 598 if (TextUtils.isEmpty(text) || text.length() <= 1) return true; 599 600 // TODO: check if an equivalent processing can't be done more quickly with a 601 // compiled regexp. 602 // Filter by first letter 603 final int firstCodePoint = text.codePointAt(0); 604 // Filter out words that don't start with a letter or an apostrophe 605 if (!isLetterCheckableByLanguage(firstCodePoint, script) 606 && '\'' != firstCodePoint) return true; 607 608 // Filter contents 609 final int length = text.length(); 610 int letterCount = 0; 611 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 612 final int codePoint = text.codePointAt(i); 613 // Any word containing a '@' is probably an e-mail address 614 // Any word containing a '/' is probably either an ad-hoc combination of two 615 // words or a URI - in either case we don't want to spell check that 616 if ('@' == codePoint || '/' == codePoint) return true; 617 if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; 618 } 619 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 620 // in this word are letters 621 return (letterCount * 4 < length * 3); 622 } 623 624 private SentenceSuggestionsInfo fixWronglyInvalidatedWordWithSingleQuote( 625 TextInfo ti, SentenceSuggestionsInfo ssi) { 626 final String typedText = ti.getText(); 627 if (!typedText.contains(SINGLE_QUOTE)) { 628 return null; 629 } 630 final int N = ssi.getSuggestionsCount(); 631 final ArrayList<Integer> additionalOffsets = new ArrayList<Integer>(); 632 final ArrayList<Integer> additionalLengths = new ArrayList<Integer>(); 633 final ArrayList<SuggestionsInfo> additionalSuggestionsInfos = 634 new ArrayList<SuggestionsInfo>(); 635 for (int i = 0; i < N; ++i) { 636 final SuggestionsInfo si = ssi.getSuggestionsInfoAt(i); 637 final int flags = si.getSuggestionsAttributes(); 638 if ((flags & SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY) == 0) { 639 continue; 640 } 641 final int offset = ssi.getOffsetAt(i); 642 final int length = ssi.getLengthAt(i); 643 final String subText = typedText.substring(offset, offset + length); 644 if (!subText.contains(SINGLE_QUOTE)) { 645 continue; 646 } 647 final String[] splitTexts = subText.split(SINGLE_QUOTE, -1); 648 if (splitTexts == null || splitTexts.length <= 1) { 649 continue; 650 } 651 final int splitNum = splitTexts.length; 652 for (int j = 0; j < splitNum; ++j) { 653 final String splitText = splitTexts[j]; 654 if (TextUtils.isEmpty(splitText)) { 655 continue; 656 } 657 if (mSuggestionsCache.getSuggestionsFromCache(splitText) == null) { 658 continue; 659 } 660 final int newLength = splitText.length(); 661 // Neither RESULT_ATTR_IN_THE_DICTIONARY nor RESULT_ATTR_LOOKS_LIKE_TYPO 662 final int newFlags = 0; 663 final SuggestionsInfo newSi = new SuggestionsInfo(newFlags, EMPTY_STRING_ARRAY); 664 newSi.setCookieAndSequence(si.getCookie(), si.getSequence()); 665 if (DBG) { 666 Log.d(TAG, "Override and remove old span over: " 667 + splitText + ", " + offset + "," + newLength); 668 } 669 additionalOffsets.add(offset); 670 additionalLengths.add(newLength); 671 additionalSuggestionsInfos.add(newSi); 672 } 673 } 674 final int additionalSize = additionalOffsets.size(); 675 if (additionalSize <= 0) { 676 return null; 677 } 678 final int suggestionsSize = N + additionalSize; 679 final int[] newOffsets = new int[suggestionsSize]; 680 final int[] newLengths = new int[suggestionsSize]; 681 final SuggestionsInfo[] newSuggestionsInfos = new SuggestionsInfo[suggestionsSize]; 682 int i; 683 for (i = 0; i < N; ++i) { 684 newOffsets[i] = ssi.getOffsetAt(i); 685 newLengths[i] = ssi.getLengthAt(i); 686 newSuggestionsInfos[i] = ssi.getSuggestionsInfoAt(i); 687 } 688 for (; i < suggestionsSize; ++i) { 689 newOffsets[i] = additionalOffsets.get(i - N); 690 newLengths[i] = additionalLengths.get(i - N); 691 newSuggestionsInfos[i] = additionalSuggestionsInfos.get(i - N); 692 } 693 return new SentenceSuggestionsInfo(newSuggestionsInfos, newOffsets, newLengths); 694 } 695 696 @Override 697 public SentenceSuggestionsInfo[] onGetSentenceSuggestionsMultiple( 698 TextInfo[] textInfos, int suggestionsLimit) { 699 final SentenceSuggestionsInfo[] retval = super.onGetSentenceSuggestionsMultiple( 700 textInfos, suggestionsLimit); 701 if (retval == null || retval.length != textInfos.length) { 702 return retval; 703 } 704 for (int i = 0; i < retval.length; ++i) { 705 final SentenceSuggestionsInfo tempSsi = 706 fixWronglyInvalidatedWordWithSingleQuote(textInfos[i], retval[i]); 707 if (tempSsi != null) { 708 retval[i] = tempSsi; 709 } 710 } 711 return retval; 712 } 713 714 @Override 715 public SuggestionsInfo[] onGetSuggestionsMultiple(TextInfo[] textInfos, 716 int suggestionsLimit, boolean sequentialWords) { 717 final int length = textInfos.length; 718 final SuggestionsInfo[] retval = new SuggestionsInfo[length]; 719 for (int i = 0; i < length; ++i) { 720 final String prevWord; 721 if (sequentialWords && i > 0) { 722 final String prevWordCandidate = textInfos[i - 1].getText(); 723 // Note that an empty string would be used to indicate the initial word 724 // in the future. 725 prevWord = TextUtils.isEmpty(prevWordCandidate) ? null : prevWordCandidate; 726 } else { 727 prevWord = null; 728 } 729 retval[i] = onGetSuggestions(textInfos[i], prevWord, suggestionsLimit); 730 retval[i].setCookieAndSequence( 731 textInfos[i].getCookie(), textInfos[i].getSequence()); 732 } 733 return retval; 734 } 735 736 // Note : this must be reentrant 737 /** 738 * Gets a list of suggestions for a specific string. This returns a list of possible 739 * corrections for the text passed as an argument. It may split or group words, and 740 * even perform grammatical analysis. 741 */ 742 @Override 743 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 744 final int suggestionsLimit) { 745 return onGetSuggestions(textInfo, null, suggestionsLimit); 746 } 747 748 private SuggestionsInfo onGetSuggestions( 749 final TextInfo textInfo, final String prevWord, final int suggestionsLimit) { 750 try { 751 final String inText = textInfo.getText(); 752 final SuggestionsParams cachedSuggestionsParams = 753 mSuggestionsCache.getSuggestionsFromCache(inText); 754 if (cachedSuggestionsParams != null) { 755 if (DBG) { 756 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 757 } 758 return new SuggestionsInfo( 759 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 760 } 761 762 if (shouldFilterOut(inText, mScript)) { 763 DictAndProximity dictInfo = null; 764 try { 765 dictInfo = mDictionaryPool.takeOrGetNull(); 766 if (null == dictInfo) return getNotInDictEmptySuggestions(); 767 return dictInfo.mDictionary.isValidWord(inText) ? 768 getInDictEmptySuggestions() : getNotInDictEmptySuggestions(); 769 } finally { 770 if (null != dictInfo) { 771 if (!mDictionaryPool.offer(dictInfo)) { 772 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 773 } 774 } 775 } 776 } 777 final String text = inText.replaceAll(APOSTROPHE, SINGLE_QUOTE); 778 779 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 780 final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 781 mService.mSuggestionThreshold, mService.mRecommendedThreshold, 782 suggestionsLimit); 783 final WordComposer composer = new WordComposer(); 784 final int length = text.length(); 785 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 786 final int codePoint = text.codePointAt(i); 787 // The getXYForCodePointAndScript method returns (Y << 16) + X 788 final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript( 789 codePoint, mScript); 790 if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) { 791 composer.add(codePoint, WordComposer.NOT_A_COORDINATE, 792 WordComposer.NOT_A_COORDINATE, null); 793 } else { 794 composer.add(codePoint, xy & 0xFFFF, xy >> 16, null); 795 } 796 } 797 798 final int capitalizeType = getCapitalizationType(text); 799 boolean isInDict = true; 800 DictAndProximity dictInfo = null; 801 try { 802 dictInfo = mDictionaryPool.takeOrGetNull(); 803 if (null == dictInfo) return getNotInDictEmptySuggestions(); 804 dictInfo.mDictionary.getWords(composer, prevWord, suggestionsGatherer, 805 dictInfo.mProximityInfo); 806 isInDict = dictInfo.mDictionary.isValidWord(text); 807 if (!isInDict && CAPITALIZE_NONE != capitalizeType) { 808 // We want to test the word again if it's all caps or first caps only. 809 // If it's fully down, we already tested it, if it's mixed case, we don't 810 // want to test a lowercase version of it. 811 isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); 812 } 813 } finally { 814 if (null != dictInfo) { 815 if (!mDictionaryPool.offer(dictInfo)) { 816 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 817 } 818 } 819 } 820 821 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 822 capitalizeType, mLocale); 823 824 if (DBG) { 825 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 826 + suggestionsLimit); 827 Log.i(TAG, "IsInDict = " + isInDict); 828 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 829 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 830 if (null != result.mSuggestions) { 831 for (String suggestion : result.mSuggestions) { 832 Log.i(TAG, suggestion); 833 } 834 } 835 } 836 837 final int flags = 838 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 839 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 840 | (result.mHasRecommendedSuggestions 841 ? SuggestionsInfoCompatUtils 842 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 843 : 0); 844 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 845 mSuggestionsCache.putSuggestionsToCache(text, result.mSuggestions, flags); 846 return retval; 847 } catch (RuntimeException e) { 848 // Don't kill the keyboard if there is a bug in the spell checker 849 if (DBG) { 850 throw e; 851 } else { 852 Log.e(TAG, "Exception while spellcheking: " + e); 853 return getNotInDictEmptySuggestions(); 854 } 855 } 856 } 857 } 858 } 859