1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 import android.preference.PreferenceManager; 21 import android.text.TextUtils; 22 import android.util.Log; 23 24 import com.android.inputmethod.annotations.UsedForTesting; 25 import com.android.inputmethod.keyboard.ProximityInfo; 26 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 27 import com.android.inputmethod.latin.personalization.PersonalizationDictionary; 28 import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary; 29 import com.android.inputmethod.latin.personalization.UserHistoryDictionary; 30 import com.android.inputmethod.latin.settings.Settings; 31 import com.android.inputmethod.latin.utils.AutoCorrectionUtils; 32 import com.android.inputmethod.latin.utils.BoundedTreeSet; 33 import com.android.inputmethod.latin.utils.CollectionUtils; 34 import com.android.inputmethod.latin.utils.StringUtils; 35 36 import java.util.ArrayList; 37 import java.util.Comparator; 38 import java.util.HashSet; 39 import java.util.Locale; 40 import java.util.concurrent.ConcurrentHashMap; 41 42 /** 43 * This class loads a dictionary and provides a list of suggestions for a given sequence of 44 * characters. This includes corrections and completions. 45 */ 46 public final class Suggest { 47 public static final String TAG = Suggest.class.getSimpleName(); 48 49 // Session id for 50 // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}. 51 // We are sharing the same ID between typing and gesture to save RAM footprint. 52 public static final int SESSION_TYPING = 0; 53 public static final int SESSION_GESTURE = 0; 54 55 // TODO: rename this to CORRECTION_OFF 56 public static final int CORRECTION_NONE = 0; 57 // TODO: rename this to CORRECTION_ON 58 public static final int CORRECTION_FULL = 1; 59 60 // Close to -2**31 61 private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000; 62 63 public static final int MAX_SUGGESTIONS = 18; 64 65 public interface SuggestInitializationListener { 66 public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable); 67 } 68 69 private static final boolean DBG = LatinImeLogger.sDBG; 70 71 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 72 CollectionUtils.newConcurrentHashMap(); 73 private HashSet<String> mOnlyDictionarySetForDebug = null; 74 private Dictionary mMainDictionary; 75 private ContactsBinaryDictionary mContactsDict; 76 @UsedForTesting 77 private boolean mIsCurrentlyWaitingForMainDictionary = false; 78 79 private float mAutoCorrectionThreshold; 80 81 // Locale used for upper- and title-casing words 82 public final Locale mLocale; 83 84 public Suggest(final Context context, final Locale locale, 85 final SuggestInitializationListener listener) { 86 initAsynchronously(context, locale, listener); 87 mLocale = locale; 88 // initialize a debug flag for the personalization 89 if (Settings.readUseOnlyPersonalizationDictionaryForDebug( 90 PreferenceManager.getDefaultSharedPreferences(context))) { 91 mOnlyDictionarySetForDebug = new HashSet<String>(); 92 mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION); 93 mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA); 94 } 95 } 96 97 @UsedForTesting 98 Suggest(final AssetFileAddress[] dictionaryList, final Locale locale) { 99 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(dictionaryList, 100 false /* useFullEditDistance */, locale); 101 mLocale = locale; 102 mMainDictionary = mainDict; 103 addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, mainDict); 104 } 105 106 private void initAsynchronously(final Context context, final Locale locale, 107 final SuggestInitializationListener listener) { 108 resetMainDict(context, locale, listener); 109 } 110 111 private void addOrReplaceDictionaryInternal(final String key, final Dictionary dict) { 112 if (mOnlyDictionarySetForDebug != null && !mOnlyDictionarySetForDebug.contains(key)) { 113 Log.w(TAG, "Ignore add " + key + " dictionary for debug."); 114 return; 115 } 116 addOrReplaceDictionary(mDictionaries, key, dict); 117 } 118 119 private static void addOrReplaceDictionary( 120 final ConcurrentHashMap<String, Dictionary> dictionaries, 121 final String key, final Dictionary dict) { 122 final Dictionary oldDict = (dict == null) 123 ? dictionaries.remove(key) 124 : dictionaries.put(key, dict); 125 if (oldDict != null && dict != oldDict) { 126 oldDict.close(); 127 } 128 } 129 130 public void resetMainDict(final Context context, final Locale locale, 131 final SuggestInitializationListener listener) { 132 mIsCurrentlyWaitingForMainDictionary = true; 133 mMainDictionary = null; 134 if (listener != null) { 135 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 136 } 137 new Thread("InitializeBinaryDictionary") { 138 @Override 139 public void run() { 140 final DictionaryCollection newMainDict = 141 DictionaryFactory.createMainDictionaryFromManager(context, locale); 142 addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, newMainDict); 143 mMainDictionary = newMainDict; 144 if (listener != null) { 145 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 146 } 147 mIsCurrentlyWaitingForMainDictionary = false; 148 } 149 }.start(); 150 } 151 152 // The main dictionary could have been loaded asynchronously. Don't cache the return value 153 // of this method. 154 public boolean hasMainDictionary() { 155 return null != mMainDictionary && mMainDictionary.isInitialized(); 156 } 157 158 @UsedForTesting 159 public boolean isCurrentlyWaitingForMainDictionary() { 160 return mIsCurrentlyWaitingForMainDictionary; 161 } 162 163 public Dictionary getMainDictionary() { 164 return mMainDictionary; 165 } 166 167 public ContactsBinaryDictionary getContactsDictionary() { 168 return mContactsDict; 169 } 170 171 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 172 return mDictionaries; 173 } 174 175 /** 176 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 177 * before the main dictionary, if set. This refers to the system-managed user dictionary. 178 */ 179 public void setUserDictionary(final UserBinaryDictionary userDictionary) { 180 addOrReplaceDictionaryInternal(Dictionary.TYPE_USER, userDictionary); 181 } 182 183 /** 184 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 185 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 186 * won't be used. 187 */ 188 public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) { 189 mContactsDict = contactsDictionary; 190 addOrReplaceDictionaryInternal(Dictionary.TYPE_CONTACTS, contactsDictionary); 191 } 192 193 public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) { 194 addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 195 } 196 197 public void setPersonalizationPredictionDictionary( 198 final PersonalizationPredictionDictionary personalizationPredictionDictionary) { 199 addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA, 200 personalizationPredictionDictionary); 201 } 202 203 public void setPersonalizationDictionary( 204 final PersonalizationDictionary personalizationDictionary) { 205 addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION, 206 personalizationDictionary); 207 } 208 209 public void setAutoCorrectionThreshold(float threshold) { 210 mAutoCorrectionThreshold = threshold; 211 } 212 213 public interface OnGetSuggestedWordsCallback { 214 public void onGetSuggestedWords(final SuggestedWords suggestedWords); 215 } 216 217 public void getSuggestedWords(final WordComposer wordComposer, 218 final String prevWordForBigram, final ProximityInfo proximityInfo, 219 final boolean blockOffensiveWords, final boolean isCorrectionEnabled, 220 final int[] additionalFeaturesOptions, final int sessionId, 221 final OnGetSuggestedWordsCallback callback) { 222 LatinImeLogger.onStartSuggestion(prevWordForBigram); 223 if (wordComposer.isBatchMode()) { 224 getSuggestedWordsForBatchInput(wordComposer, prevWordForBigram, proximityInfo, 225 blockOffensiveWords, additionalFeaturesOptions, sessionId, callback); 226 } else { 227 getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo, 228 blockOffensiveWords, isCorrectionEnabled, additionalFeaturesOptions, callback); 229 } 230 } 231 232 // Retrieves suggestions for the typing input 233 // and calls the callback function with the suggestions. 234 private void getSuggestedWordsForTypingInput(final WordComposer wordComposer, 235 final String prevWordForBigram, final ProximityInfo proximityInfo, 236 final boolean blockOffensiveWords, final boolean isCorrectionEnabled, 237 final int[] additionalFeaturesOptions, final OnGetSuggestedWordsCallback callback) { 238 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 239 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 240 MAX_SUGGESTIONS); 241 242 final String typedWord = wordComposer.getTypedWord(); 243 final String consideredWord = trailingSingleQuotesCount > 0 244 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 245 : typedWord; 246 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 247 248 final WordComposer wordComposerForLookup; 249 if (trailingSingleQuotesCount > 0) { 250 wordComposerForLookup = new WordComposer(wordComposer); 251 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 252 wordComposerForLookup.deleteLast(); 253 } 254 } else { 255 wordComposerForLookup = wordComposer; 256 } 257 258 for (final String key : mDictionaries.keySet()) { 259 final Dictionary dictionary = mDictionaries.get(key); 260 suggestionsSet.addAll(dictionary.getSuggestions(wordComposerForLookup, 261 prevWordForBigram, proximityInfo, blockOffensiveWords, 262 additionalFeaturesOptions)); 263 } 264 265 final String whitelistedWord; 266 if (suggestionsSet.isEmpty()) { 267 whitelistedWord = null; 268 } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) { 269 whitelistedWord = null; 270 } else { 271 whitelistedWord = suggestionsSet.first().mWord; 272 } 273 274 // The word can be auto-corrected if it has a whitelist entry that is not itself, 275 // or if it's a 2+ characters non-word (i.e. it's not in the dictionary). 276 final boolean allowsToBeAutoCorrected = (null != whitelistedWord 277 && !whitelistedWord.equals(consideredWord)) 278 || (consideredWord.length() > 1 && !AutoCorrectionUtils.isValidWord(this, 279 consideredWord, wordComposer.isFirstCharCapitalized())); 280 281 final boolean hasAutoCorrection; 282 // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because 283 // any attempt to do auto-correction is already shielded with a test for this flag; at the 284 // same time, it feels wrong that the SuggestedWord object includes information about 285 // the current settings. It may also be useful to know, when the setting is off, whether 286 // the word *would* have been auto-corrected. 287 if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord() 288 || suggestionsSet.isEmpty() || wordComposer.hasDigits() 289 || wordComposer.isMostlyCaps() || wordComposer.isResumed() || !hasMainDictionary() 290 || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) { 291 // If we don't have a main dictionary, we never want to auto-correct. The reason for 292 // this is, the user may have a contact whose name happens to match a valid word in 293 // their language, and it will unexpectedly auto-correct. For example, if the user 294 // types in English with no dictionary and has a "Will" in their contact list, "will" 295 // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no 296 // auto-correct. 297 // Also, shortcuts should never auto-correct unless they are whitelist entries. 298 // TODO: we may want to have shortcut-only entries auto-correct in the future. 299 hasAutoCorrection = false; 300 } else { 301 hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold( 302 suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold); 303 } 304 305 final ArrayList<SuggestedWordInfo> suggestionsContainer = 306 CollectionUtils.newArrayList(suggestionsSet); 307 final int suggestionsCount = suggestionsContainer.size(); 308 final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 309 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 310 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 311 for (int i = 0; i < suggestionsCount; ++i) { 312 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 313 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 314 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 315 trailingSingleQuotesCount); 316 suggestionsContainer.set(i, transformedWordInfo); 317 } 318 } 319 320 for (int i = 0; i < suggestionsCount; ++i) { 321 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 322 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), 323 wordInfo.mSourceDict.mDictType); 324 } 325 326 if (!TextUtils.isEmpty(typedWord)) { 327 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 328 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 329 Dictionary.DICTIONARY_USER_TYPED, 330 SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, 331 SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); 332 } 333 SuggestedWordInfo.removeDups(suggestionsContainer); 334 335 final ArrayList<SuggestedWordInfo> suggestionsList; 336 if (DBG && !suggestionsContainer.isEmpty()) { 337 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 338 } else { 339 suggestionsList = suggestionsContainer; 340 } 341 342 callback.onGetSuggestedWords(new SuggestedWords(suggestionsList, 343 // TODO: this first argument is lying. If this is a whitelisted word which is an 344 // actual word, it says typedWordValid = false, which looks wrong. We should either 345 // rename the attribute or change the value. 346 !allowsToBeAutoCorrected /* typedWordValid */, 347 hasAutoCorrection, /* willAutoCorrect */ 348 false /* isPunctuationSuggestions */, 349 false /* isObsoleteSuggestions */, 350 !wordComposer.isComposingWord() /* isPrediction */)); 351 } 352 353 // Retrieves suggestions for the batch input 354 // and calls the callback function with the suggestions. 355 private void getSuggestedWordsForBatchInput(final WordComposer wordComposer, 356 final String prevWordForBigram, final ProximityInfo proximityInfo, 357 final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, 358 final int sessionId, final OnGetSuggestedWordsCallback callback) { 359 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 360 MAX_SUGGESTIONS); 361 362 // At second character typed, search the unigrams (scores being affected by bigrams) 363 for (final String key : mDictionaries.keySet()) { 364 final Dictionary dictionary = mDictionaries.get(key); 365 suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(wordComposer, 366 prevWordForBigram, proximityInfo, blockOffensiveWords, 367 additionalFeaturesOptions, sessionId)); 368 } 369 370 for (SuggestedWordInfo wordInfo : suggestionsSet) { 371 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict.mDictType); 372 } 373 374 final ArrayList<SuggestedWordInfo> suggestionsContainer = 375 CollectionUtils.newArrayList(suggestionsSet); 376 final int suggestionsCount = suggestionsContainer.size(); 377 final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); 378 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 379 if (isFirstCharCapitalized || isAllUpperCase) { 380 for (int i = 0; i < suggestionsCount; ++i) { 381 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 382 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 383 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 384 0 /* trailingSingleQuotesCount */); 385 suggestionsContainer.set(i, transformedWordInfo); 386 } 387 } 388 389 if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord, 390 wordComposer.getRejectedBatchModeSuggestion())) { 391 final SuggestedWordInfo rejected = suggestionsContainer.remove(0); 392 suggestionsContainer.add(1, rejected); 393 } 394 SuggestedWordInfo.removeDups(suggestionsContainer); 395 396 // For some reason some suggestions with MIN_VALUE are making their way here. 397 // TODO: Find a more robust way to detect distractors. 398 for (int i = suggestionsContainer.size() - 1; i >= 0; --i) { 399 if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) { 400 suggestionsContainer.remove(i); 401 } 402 } 403 404 // In the batch input mode, the most relevant suggested word should act as a "typed word" 405 // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). 406 callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer, 407 true /* typedWordValid */, 408 false /* willAutoCorrect */, 409 false /* isPunctuationSuggestions */, 410 false /* isObsoleteSuggestions */, 411 false /* isPrediction */)); 412 } 413 414 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 415 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 416 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 417 typedWordInfo.setDebugString("+"); 418 final int suggestionsSize = suggestions.size(); 419 final ArrayList<SuggestedWordInfo> suggestionsList = 420 CollectionUtils.newArrayList(suggestionsSize); 421 suggestionsList.add(typedWordInfo); 422 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 423 // than i because we added the typed word to mSuggestions without touching mScores. 424 for (int i = 0; i < suggestionsSize - 1; ++i) { 425 final SuggestedWordInfo cur = suggestions.get(i + 1); 426 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 427 typedWord, cur.toString(), cur.mScore); 428 final String scoreInfoString; 429 if (normalizedScore > 0) { 430 scoreInfoString = String.format( 431 Locale.ROOT, "%d (%4.2f)", cur.mScore, normalizedScore); 432 } else { 433 scoreInfoString = Integer.toString(cur.mScore); 434 } 435 cur.setDebugString(scoreInfoString); 436 suggestionsList.add(cur); 437 } 438 return suggestionsList; 439 } 440 441 private static final class SuggestedWordInfoComparator 442 implements Comparator<SuggestedWordInfo> { 443 // This comparator ranks the word info with the higher frequency first. That's because 444 // that's the order we want our elements in. 445 @Override 446 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 447 if (o1.mScore > o2.mScore) return -1; 448 if (o1.mScore < o2.mScore) return 1; 449 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 450 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 451 return o1.mWord.compareTo(o2.mWord); 452 } 453 } 454 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 455 new SuggestedWordInfoComparator(); 456 457 /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo( 458 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 459 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 460 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 461 if (isAllUpperCase) { 462 sb.append(wordInfo.mWord.toUpperCase(locale)); 463 } else if (isFirstCharCapitalized) { 464 sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale)); 465 } else { 466 sb.append(wordInfo.mWord); 467 } 468 // Appending quotes is here to help people quote words. However, it's not helpful 469 // when they type words with quotes toward the end like "it's" or "didn't", where 470 // it's more likely the user missed the last character (or didn't type it yet). 471 final int quotesToAppend = trailingSingleQuotesCount 472 - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1); 473 for (int i = quotesToAppend - 1; i >= 0; --i) { 474 sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); 475 } 476 return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind, 477 wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord, 478 SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */); 479 } 480 481 public void close() { 482 final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet(); 483 dictionaries.addAll(mDictionaries.values()); 484 for (final Dictionary dictionary : dictionaries) { 485 dictionary.close(); 486 } 487 mMainDictionary = null; 488 } 489 } 490