1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.Intent; 20 import android.content.res.Resources; 21 import android.service.textservice.SpellCheckerService; 22 import android.text.TextUtils; 23 import android.util.Log; 24 import android.view.textservice.SuggestionsInfo; 25 import android.view.textservice.TextInfo; 26 27 import com.android.inputmethod.compat.ArraysCompatUtils; 28 import com.android.inputmethod.keyboard.ProximityInfo; 29 import com.android.inputmethod.latin.BinaryDictionary; 30 import com.android.inputmethod.latin.Dictionary; 31 import com.android.inputmethod.latin.Dictionary.DataType; 32 import com.android.inputmethod.latin.Dictionary.WordCallback; 33 import com.android.inputmethod.latin.DictionaryCollection; 34 import com.android.inputmethod.latin.DictionaryFactory; 35 import com.android.inputmethod.latin.Flag; 36 import com.android.inputmethod.latin.LocaleUtils; 37 import com.android.inputmethod.latin.R; 38 import com.android.inputmethod.latin.SynchronouslyLoadedContactsDictionary; 39 import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary; 40 import com.android.inputmethod.latin.Utils; 41 import com.android.inputmethod.latin.WhitelistDictionary; 42 import com.android.inputmethod.latin.WordComposer; 43 44 import java.util.ArrayList; 45 import java.util.Arrays; 46 import java.util.Collections; 47 import java.util.Locale; 48 import java.util.Map; 49 import java.util.TreeMap; 50 51 /** 52 * Service for spell checking, using LatinIME's dictionaries and mechanisms. 53 */ 54 public class AndroidSpellCheckerService extends SpellCheckerService { 55 private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); 56 private static final boolean DBG = false; 57 private static final int POOL_SIZE = 2; 58 59 private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 60 private static final int CAPITALIZE_FIRST = 1; // First only 61 private static final int CAPITALIZE_ALL = 2; // All caps 62 63 private final static String[] EMPTY_STRING_ARRAY = new String[0]; 64 private final static Flag[] USE_FULL_EDIT_DISTANCE_FLAG_ARRAY; 65 static { 66 // See BinaryDictionary.java for an explanation of these flags 67 // Specifially, ALL_CONFIG_FLAGS means that we want to consider all flags with the 68 // current dictionary configuration - for example, consider the UMLAUT flag 69 // so that it will be turned on for German dictionaries and off for others. 70 USE_FULL_EDIT_DISTANCE_FLAG_ARRAY = Arrays.copyOf(BinaryDictionary.ALL_CONFIG_FLAGS, 71 BinaryDictionary.ALL_CONFIG_FLAGS.length + 1); 72 USE_FULL_EDIT_DISTANCE_FLAG_ARRAY[BinaryDictionary.ALL_CONFIG_FLAGS.length] = 73 BinaryDictionary.FLAG_USE_FULL_EDIT_DISTANCE; 74 } 75 private Map<String, DictionaryPool> mDictionaryPools = 76 Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 77 private Map<String, Dictionary> mUserDictionaries = 78 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 79 private Map<String, Dictionary> mWhitelistDictionaries = 80 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 81 private SynchronouslyLoadedContactsDictionary mContactsDictionary; 82 83 // The threshold for a candidate to be offered as a suggestion. 84 private double mSuggestionThreshold; 85 // The threshold for a suggestion to be considered "likely". 86 private double mLikelyThreshold; 87 88 @Override public void onCreate() { 89 super.onCreate(); 90 mSuggestionThreshold = 91 Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value)); 92 mLikelyThreshold = 93 Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value)); 94 } 95 96 @Override 97 public Session createSession() { 98 return new AndroidSpellCheckerSession(this); 99 } 100 101 private static SuggestionsInfo getNotInDictEmptySuggestions() { 102 return new SuggestionsInfo(0, EMPTY_STRING_ARRAY); 103 } 104 105 private static SuggestionsInfo getInDictEmptySuggestions() { 106 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, 107 EMPTY_STRING_ARRAY); 108 } 109 110 private static class SuggestionsGatherer implements WordCallback { 111 public static class Result { 112 public final String[] mSuggestions; 113 public final boolean mHasLikelySuggestions; 114 public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) { 115 mSuggestions = gatheredSuggestions; 116 mHasLikelySuggestions = hasLikelySuggestions; 117 } 118 } 119 120 private final ArrayList<CharSequence> mSuggestions; 121 private final int[] mScores; 122 private final String mOriginalText; 123 private final double mSuggestionThreshold; 124 private final double mLikelyThreshold; 125 private final int mMaxLength; 126 private int mLength = 0; 127 128 // The two following attributes are only ever filled if the requested max length 129 // is 0 (or less, which is treated the same). 130 private String mBestSuggestion = null; 131 private int mBestScore = Integer.MIN_VALUE; // As small as possible 132 133 SuggestionsGatherer(final String originalText, final double suggestionThreshold, 134 final double likelyThreshold, final int maxLength) { 135 mOriginalText = originalText; 136 mSuggestionThreshold = suggestionThreshold; 137 mLikelyThreshold = likelyThreshold; 138 mMaxLength = maxLength; 139 mSuggestions = new ArrayList<CharSequence>(maxLength + 1); 140 mScores = new int[mMaxLength]; 141 } 142 143 @Override 144 synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, 145 int dicTypeId, DataType dataType) { 146 final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score); 147 // binarySearch returns the index if the element exists, and -<insertion index> - 1 148 // if it doesn't. See documentation for binarySearch. 149 final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; 150 151 if (insertIndex == 0 && mLength >= mMaxLength) { 152 // In the future, we may want to keep track of the best suggestion score even if 153 // we are asked for 0 suggestions. In this case, we can use the following 154 // (tested) code to keep it: 155 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) 156 // then we need to keep track of the best suggestion in mBestScore and 157 // mBestSuggestion. This is so that we know whether the best suggestion makes 158 // the score cutoff, since we need to know that to return a meaningful 159 // looksLikeTypo. 160 // if (0 >= mMaxLength) { 161 // if (score > mBestScore) { 162 // mBestScore = score; 163 // mBestSuggestion = new String(word, wordOffset, wordLength); 164 // } 165 // } 166 return true; 167 } 168 if (insertIndex >= mMaxLength) { 169 // We found a suggestion, but its score is too weak to be kept considering 170 // the suggestion limit. 171 return true; 172 } 173 174 // Compute the normalized score and skip this word if it's normalized score does not 175 // make the threshold. 176 final String wordString = new String(word, wordOffset, wordLength); 177 final double normalizedScore = 178 Utils.calcNormalizedScore(mOriginalText, wordString, score); 179 if (normalizedScore < mSuggestionThreshold) { 180 if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); 181 return true; 182 } 183 184 if (mLength < mMaxLength) { 185 final int copyLen = mLength - insertIndex; 186 ++mLength; 187 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); 188 mSuggestions.add(insertIndex, wordString); 189 } else { 190 System.arraycopy(mScores, 1, mScores, 0, insertIndex); 191 mSuggestions.add(insertIndex, wordString); 192 mSuggestions.remove(0); 193 } 194 mScores[insertIndex] = score; 195 196 return true; 197 } 198 199 public Result getResults(final int capitalizeType, final Locale locale) { 200 final String[] gatheredSuggestions; 201 final boolean hasLikelySuggestions; 202 if (0 == mLength) { 203 // Either we found no suggestions, or we found some BUT the max length was 0. 204 // If we found some mBestSuggestion will not be null. If it is null, then 205 // we found none, regardless of the max length. 206 if (null == mBestSuggestion) { 207 gatheredSuggestions = null; 208 hasLikelySuggestions = false; 209 } else { 210 gatheredSuggestions = EMPTY_STRING_ARRAY; 211 final double normalizedScore = 212 Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore); 213 hasLikelySuggestions = (normalizedScore > mLikelyThreshold); 214 } 215 } else { 216 if (DBG) { 217 if (mLength != mSuggestions.size()) { 218 Log.e(TAG, "Suggestion size is not the same as stored mLength"); 219 } 220 for (int i = mLength - 1; i >= 0; --i) { 221 Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); 222 } 223 } 224 Collections.reverse(mSuggestions); 225 Utils.removeDupes(mSuggestions); 226 if (CAPITALIZE_ALL == capitalizeType) { 227 for (int i = 0; i < mSuggestions.size(); ++i) { 228 // get(i) returns a CharSequence which is actually a String so .toString() 229 // should return the same object. 230 mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); 231 } 232 } else if (CAPITALIZE_FIRST == capitalizeType) { 233 for (int i = 0; i < mSuggestions.size(); ++i) { 234 // Likewise 235 mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(), 236 locale)); 237 } 238 } 239 // This returns a String[], while toArray() returns an Object[] which cannot be cast 240 // into a String[]. 241 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); 242 243 final int bestScore = mScores[mLength - 1]; 244 final CharSequence bestSuggestion = mSuggestions.get(0); 245 final double normalizedScore = 246 Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore); 247 hasLikelySuggestions = (normalizedScore > mLikelyThreshold); 248 if (DBG) { 249 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 250 Log.i(TAG, "Normalized score = " + normalizedScore 251 + " (threshold " + mLikelyThreshold 252 + ") => hasLikelySuggestions = " + hasLikelySuggestions); 253 } 254 } 255 return new Result(gatheredSuggestions, hasLikelySuggestions); 256 } 257 } 258 259 @Override 260 public boolean onUnbind(final Intent intent) { 261 final Map<String, DictionaryPool> oldPools = mDictionaryPools; 262 mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 263 final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries; 264 mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 265 final Map<String, Dictionary> oldWhitelistDictionaries = mWhitelistDictionaries; 266 mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 267 for (DictionaryPool pool : oldPools.values()) { 268 pool.close(); 269 } 270 for (Dictionary dict : oldUserDictionaries.values()) { 271 dict.close(); 272 } 273 for (Dictionary dict : oldWhitelistDictionaries.values()) { 274 dict.close(); 275 } 276 if (null != mContactsDictionary) { 277 // The synchronously loaded contacts dictionary should have been in one 278 // or several pools, but it is shielded against multiple closing and it's 279 // safe to call it several times. 280 final SynchronouslyLoadedContactsDictionary dictToClose = mContactsDictionary; 281 mContactsDictionary = null; 282 dictToClose.close(); 283 } 284 return false; 285 } 286 287 private DictionaryPool getDictionaryPool(final String locale) { 288 DictionaryPool pool = mDictionaryPools.get(locale); 289 if (null == pool) { 290 final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); 291 pool = new DictionaryPool(POOL_SIZE, this, localeObject); 292 mDictionaryPools.put(locale, pool); 293 } 294 return pool; 295 } 296 297 public DictAndProximity createDictAndProximity(final Locale locale) { 298 final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(); 299 final Resources resources = getResources(); 300 final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources); 301 final DictionaryCollection dictionaryCollection = 302 DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId, 303 USE_FULL_EDIT_DISTANCE_FLAG_ARRAY); 304 final String localeStr = locale.toString(); 305 Dictionary userDictionary = mUserDictionaries.get(localeStr); 306 if (null == userDictionary) { 307 userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true); 308 mUserDictionaries.put(localeStr, userDictionary); 309 } 310 dictionaryCollection.addDictionary(userDictionary); 311 Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr); 312 if (null == whitelistDictionary) { 313 whitelistDictionary = new WhitelistDictionary(this, locale); 314 mWhitelistDictionaries.put(localeStr, whitelistDictionary); 315 } 316 dictionaryCollection.addDictionary(whitelistDictionary); 317 if (null == mContactsDictionary) { 318 mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); 319 } 320 // TODO: add a setting to use or not contacts when checking spelling 321 dictionaryCollection.addDictionary(mContactsDictionary); 322 return new DictAndProximity(dictionaryCollection, proximityInfo); 323 } 324 325 // This method assumes the text is not empty or null. 326 private static int getCapitalizationType(String text) { 327 // If the first char is not uppercase, then the word is either all lower case, 328 // and in either case we return CAPITALIZE_NONE. 329 if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; 330 final int len = text.codePointCount(0, text.length()); 331 int capsCount = 1; 332 for (int i = 1; i < len; ++i) { 333 if (1 != capsCount && i != capsCount) break; 334 if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; 335 } 336 // We know the first char is upper case. So we want to test if either everything 337 // else is lower case, or if everything else is upper case. If the string is 338 // exactly one char long, then we will arrive here with capsCount 1, and this is 339 // correct, too. 340 if (1 == capsCount) return CAPITALIZE_FIRST; 341 return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 342 } 343 344 private static class AndroidSpellCheckerSession extends Session { 345 // Immutable, but need the locale which is not available in the constructor yet 346 private DictionaryPool mDictionaryPool; 347 // Likewise 348 private Locale mLocale; 349 350 private final AndroidSpellCheckerService mService; 351 352 AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { 353 mService = service; 354 } 355 356 @Override 357 public void onCreate() { 358 final String localeString = getLocale(); 359 mDictionaryPool = mService.getDictionaryPool(localeString); 360 mLocale = LocaleUtils.constructLocaleFromString(localeString); 361 } 362 363 /** 364 * Finds out whether a particular string should be filtered out of spell checking. 365 * 366 * This will loosely match URLs, numbers, symbols. 367 * 368 * @param text the string to evaluate. 369 * @return true if we should filter this text out, false otherwise 370 */ 371 private boolean shouldFilterOut(final String text) { 372 if (TextUtils.isEmpty(text) || text.length() <= 1) return true; 373 374 // TODO: check if an equivalent processing can't be done more quickly with a 375 // compiled regexp. 376 // Filter by first letter 377 final int firstCodePoint = text.codePointAt(0); 378 // Filter out words that don't start with a letter or an apostrophe 379 if (!Character.isLetter(firstCodePoint) 380 && '\'' != firstCodePoint) return true; 381 382 // Filter contents 383 final int length = text.length(); 384 int letterCount = 0; 385 for (int i = 0; i < length; ++i) { 386 final int codePoint = text.codePointAt(i); 387 // Any word containing a '@' is probably an e-mail address 388 // Any word containing a '/' is probably either an ad-hoc combination of two 389 // words or a URI - in either case we don't want to spell check that 390 if ('@' == codePoint 391 || '/' == codePoint) return true; 392 if (Character.isLetter(codePoint)) ++letterCount; 393 } 394 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 395 // in this word are letters 396 return (letterCount * 4 < length * 3); 397 } 398 399 // Note : this must be reentrant 400 /** 401 * Gets a list of suggestions for a specific string. This returns a list of possible 402 * corrections for the text passed as an argument. It may split or group words, and 403 * even perform grammatical analysis. 404 */ 405 @Override 406 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 407 final int suggestionsLimit) { 408 try { 409 final String text = textInfo.getText(); 410 411 if (shouldFilterOut(text)) { 412 DictAndProximity dictInfo = null; 413 try { 414 dictInfo = mDictionaryPool.takeOrGetNull(); 415 if (null == dictInfo) return getNotInDictEmptySuggestions(); 416 return dictInfo.mDictionary.isValidWord(text) ? getInDictEmptySuggestions() 417 : getNotInDictEmptySuggestions(); 418 } finally { 419 if (null != dictInfo) { 420 if (!mDictionaryPool.offer(dictInfo)) { 421 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 422 } 423 } 424 } 425 } 426 427 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 428 final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 429 mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit); 430 final WordComposer composer = new WordComposer(); 431 final int length = text.length(); 432 for (int i = 0; i < length; ++i) { 433 final int character = text.codePointAt(i); 434 final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character); 435 final int[] proximities; 436 if (-1 == proximityIndex) { 437 proximities = new int[] { character }; 438 } else { 439 proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY, 440 proximityIndex, 441 proximityIndex + SpellCheckerProximityInfo.ROW_SIZE); 442 } 443 composer.add(character, proximities, 444 WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE); 445 } 446 447 final int capitalizeType = getCapitalizationType(text); 448 boolean isInDict = true; 449 DictAndProximity dictInfo = null; 450 try { 451 dictInfo = mDictionaryPool.takeOrGetNull(); 452 if (null == dictInfo) return getNotInDictEmptySuggestions(); 453 dictInfo.mDictionary.getWords(composer, suggestionsGatherer, 454 dictInfo.mProximityInfo); 455 isInDict = dictInfo.mDictionary.isValidWord(text); 456 if (!isInDict && CAPITALIZE_NONE != capitalizeType) { 457 // We want to test the word again if it's all caps or first caps only. 458 // If it's fully down, we already tested it, if it's mixed case, we don't 459 // want to test a lowercase version of it. 460 isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); 461 } 462 } finally { 463 if (null != dictInfo) { 464 if (!mDictionaryPool.offer(dictInfo)) { 465 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 466 } 467 } 468 } 469 470 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 471 capitalizeType, mLocale); 472 473 if (DBG) { 474 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 475 + suggestionsLimit); 476 Log.i(TAG, "IsInDict = " + isInDict); 477 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 478 Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions); 479 if (null != result.mSuggestions) { 480 for (String suggestion : result.mSuggestions) { 481 Log.i(TAG, suggestion); 482 } 483 } 484 } 485 486 // TODO: actually use result.mHasLikelySuggestions 487 final int flags = 488 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 489 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO); 490 return new SuggestionsInfo(flags, result.mSuggestions); 491 } catch (RuntimeException e) { 492 // Don't kill the keyboard if there is a bug in the spell checker 493 if (DBG) { 494 throw e; 495 } else { 496 Log.e(TAG, "Exception while spellcheking: " + e); 497 return getNotInDictEmptySuggestions(); 498 } 499 } 500 } 501 } 502 } 503