1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 import android.preference.PreferenceManager; 21 import android.text.TextUtils; 22 import android.util.Log; 23 24 import com.android.inputmethod.annotations.UsedForTesting; 25 import com.android.inputmethod.keyboard.ProximityInfo; 26 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 27 import com.android.inputmethod.latin.personalization.PersonalizationDictionary; 28 import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary; 29 import com.android.inputmethod.latin.personalization.UserHistoryDictionary; 30 import com.android.inputmethod.latin.settings.Settings; 31 import com.android.inputmethod.latin.utils.AutoCorrectionUtils; 32 import com.android.inputmethod.latin.utils.BoundedTreeSet; 33 import com.android.inputmethod.latin.utils.CollectionUtils; 34 import com.android.inputmethod.latin.utils.StringUtils; 35 36 import java.util.ArrayList; 37 import java.util.Comparator; 38 import java.util.HashSet; 39 import java.util.Locale; 40 import java.util.concurrent.ConcurrentHashMap; 41 42 /** 43 * This class loads a dictionary and provides a list of suggestions for a given sequence of 44 * characters. This includes corrections and completions. 45 */ 46 public final class Suggest { 47 public static final String TAG = Suggest.class.getSimpleName(); 48 49 // Session id for 50 // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}. 51 // We are sharing the same ID between typing and gesture to save RAM footprint. 52 public static final int SESSION_TYPING = 0; 53 public static final int SESSION_GESTURE = 0; 54 55 // TODO: rename this to CORRECTION_OFF 56 public static final int CORRECTION_NONE = 0; 57 // TODO: rename this to CORRECTION_ON 58 public static final int CORRECTION_FULL = 1; 59 60 // Close to -2**31 61 private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000; 62 63 public static final int MAX_SUGGESTIONS = 18; 64 65 public interface SuggestInitializationListener { 66 public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable); 67 } 68 69 private static final boolean DBG = LatinImeLogger.sDBG; 70 71 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 72 CollectionUtils.newConcurrentHashMap(); 73 private HashSet<String> mOnlyDictionarySetForDebug = null; 74 private Dictionary mMainDictionary; 75 private ContactsBinaryDictionary mContactsDict; 76 @UsedForTesting 77 private boolean mIsCurrentlyWaitingForMainDictionary = false; 78 79 private float mAutoCorrectionThreshold; 80 81 // Locale used for upper- and title-casing words 82 public final Locale mLocale; 83 84 public Suggest(final Context context, final Locale locale, 85 final SuggestInitializationListener listener) { 86 initAsynchronously(context, locale, listener); 87 mLocale = locale; 88 // initialize a debug flag for the personalization 89 if (Settings.readUseOnlyPersonalizationDictionaryForDebug( 90 PreferenceManager.getDefaultSharedPreferences(context))) { 91 mOnlyDictionarySetForDebug = new HashSet<String>(); 92 mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION); 93 mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA); 94 } 95 } 96 97 @UsedForTesting 98 Suggest(final AssetFileAddress[] dictionaryList, final Locale locale) { 99 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(dictionaryList, 100 false /* useFullEditDistance */, locale); 101 mLocale = locale; 102 mMainDictionary = mainDict; 103 addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, mainDict); 104 } 105 106 private void initAsynchronously(final Context context, final Locale locale, 107 final SuggestInitializationListener listener) { 108 resetMainDict(context, locale, listener); 109 } 110 111 private void addOrReplaceDictionaryInternal(final String key, final Dictionary dict) { 112 if (mOnlyDictionarySetForDebug != null && !mOnlyDictionarySetForDebug.contains(key)) { 113 Log.w(TAG, "Ignore add " + key + " dictionary for debug."); 114 return; 115 } 116 addOrReplaceDictionary(mDictionaries, key, dict); 117 } 118 119 private static void addOrReplaceDictionary( 120 final ConcurrentHashMap<String, Dictionary> dictionaries, 121 final String key, final Dictionary dict) { 122 final Dictionary oldDict = (dict == null) 123 ? dictionaries.remove(key) 124 : dictionaries.put(key, dict); 125 if (oldDict != null && dict != oldDict) { 126 oldDict.close(); 127 } 128 } 129 130 public void resetMainDict(final Context context, final Locale locale, 131 final SuggestInitializationListener listener) { 132 mIsCurrentlyWaitingForMainDictionary = true; 133 mMainDictionary = null; 134 if (listener != null) { 135 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 136 } 137 new Thread("InitializeBinaryDictionary") { 138 @Override 139 public void run() { 140 final DictionaryCollection newMainDict = 141 DictionaryFactory.createMainDictionaryFromManager(context, locale); 142 addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, newMainDict); 143 mMainDictionary = newMainDict; 144 if (listener != null) { 145 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 146 } 147 mIsCurrentlyWaitingForMainDictionary = false; 148 } 149 }.start(); 150 } 151 152 // The main dictionary could have been loaded asynchronously. Don't cache the return value 153 // of this method. 154 public boolean hasMainDictionary() { 155 return null != mMainDictionary && mMainDictionary.isInitialized(); 156 } 157 158 @UsedForTesting 159 public boolean isCurrentlyWaitingForMainDictionary() { 160 return mIsCurrentlyWaitingForMainDictionary; 161 } 162 163 public Dictionary getMainDictionary() { 164 return mMainDictionary; 165 } 166 167 public ContactsBinaryDictionary getContactsDictionary() { 168 return mContactsDict; 169 } 170 171 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 172 return mDictionaries; 173 } 174 175 /** 176 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 177 * before the main dictionary, if set. This refers to the system-managed user dictionary. 178 */ 179 public void setUserDictionary(final UserBinaryDictionary userDictionary) { 180 addOrReplaceDictionaryInternal(Dictionary.TYPE_USER, userDictionary); 181 } 182 183 /** 184 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 185 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 186 * won't be used. 187 */ 188 public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) { 189 mContactsDict = contactsDictionary; 190 addOrReplaceDictionaryInternal(Dictionary.TYPE_CONTACTS, contactsDictionary); 191 } 192 193 public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) { 194 addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 195 } 196 197 public void setPersonalizationPredictionDictionary( 198 final PersonalizationPredictionDictionary personalizationPredictionDictionary) { 199 addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA, 200 personalizationPredictionDictionary); 201 } 202 203 public void setPersonalizationDictionary( 204 final PersonalizationDictionary personalizationDictionary) { 205 addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION, 206 personalizationDictionary); 207 } 208 209 public void setAutoCorrectionThreshold(float threshold) { 210 mAutoCorrectionThreshold = threshold; 211 } 212 213 public interface OnGetSuggestedWordsCallback { 214 public void onGetSuggestedWords(final SuggestedWords suggestedWords); 215 } 216 217 public void getSuggestedWords(final WordComposer wordComposer, 218 final String prevWordForBigram, final ProximityInfo proximityInfo, 219 final boolean blockOffensiveWords, final boolean isCorrectionEnabled, 220 final int[] additionalFeaturesOptions, final int sessionId, final int sequenceNumber, 221 final OnGetSuggestedWordsCallback callback) { 222 LatinImeLogger.onStartSuggestion(prevWordForBigram); 223 if (wordComposer.isBatchMode()) { 224 getSuggestedWordsForBatchInput(wordComposer, prevWordForBigram, proximityInfo, 225 blockOffensiveWords, additionalFeaturesOptions, sessionId, sequenceNumber, 226 callback); 227 } else { 228 getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo, 229 blockOffensiveWords, isCorrectionEnabled, additionalFeaturesOptions, 230 sequenceNumber, callback); 231 } 232 } 233 234 // Retrieves suggestions for the typing input 235 // and calls the callback function with the suggestions. 236 private void getSuggestedWordsForTypingInput(final WordComposer wordComposer, 237 final String prevWordForBigram, final ProximityInfo proximityInfo, 238 final boolean blockOffensiveWords, final boolean isCorrectionEnabled, 239 final int[] additionalFeaturesOptions, final int sequenceNumber, 240 final OnGetSuggestedWordsCallback callback) { 241 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 242 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 243 MAX_SUGGESTIONS); 244 245 final String typedWord = wordComposer.getTypedWord(); 246 final String consideredWord = trailingSingleQuotesCount > 0 247 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 248 : typedWord; 249 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 250 251 final WordComposer wordComposerForLookup; 252 if (trailingSingleQuotesCount > 0) { 253 wordComposerForLookup = new WordComposer(wordComposer); 254 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 255 wordComposerForLookup.deleteLast(); 256 } 257 } else { 258 wordComposerForLookup = wordComposer; 259 } 260 261 for (final String key : mDictionaries.keySet()) { 262 final Dictionary dictionary = mDictionaries.get(key); 263 suggestionsSet.addAll(dictionary.getSuggestions(wordComposerForLookup, 264 prevWordForBigram, proximityInfo, blockOffensiveWords, 265 additionalFeaturesOptions)); 266 } 267 268 final String whitelistedWord; 269 if (suggestionsSet.isEmpty()) { 270 whitelistedWord = null; 271 } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) { 272 whitelistedWord = null; 273 } else { 274 whitelistedWord = suggestionsSet.first().mWord; 275 } 276 277 // The word can be auto-corrected if it has a whitelist entry that is not itself, 278 // or if it's a 2+ characters non-word (i.e. it's not in the dictionary). 279 final boolean allowsToBeAutoCorrected = (null != whitelistedWord 280 && !whitelistedWord.equals(consideredWord)) 281 || (consideredWord.length() > 1 && !AutoCorrectionUtils.isValidWord(this, 282 consideredWord, wordComposer.isFirstCharCapitalized())); 283 284 final boolean hasAutoCorrection; 285 // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because 286 // any attempt to do auto-correction is already shielded with a test for this flag; at the 287 // same time, it feels wrong that the SuggestedWord object includes information about 288 // the current settings. It may also be useful to know, when the setting is off, whether 289 // the word *would* have been auto-corrected. 290 if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord() 291 || suggestionsSet.isEmpty() || wordComposer.hasDigits() 292 || wordComposer.isMostlyCaps() || wordComposer.isResumed() || !hasMainDictionary() 293 || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) { 294 // If we don't have a main dictionary, we never want to auto-correct. The reason for 295 // this is, the user may have a contact whose name happens to match a valid word in 296 // their language, and it will unexpectedly auto-correct. For example, if the user 297 // types in English with no dictionary and has a "Will" in their contact list, "will" 298 // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no 299 // auto-correct. 300 // Also, shortcuts should never auto-correct unless they are whitelist entries. 301 // TODO: we may want to have shortcut-only entries auto-correct in the future. 302 hasAutoCorrection = false; 303 } else { 304 hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold( 305 suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold); 306 } 307 308 final ArrayList<SuggestedWordInfo> suggestionsContainer = 309 CollectionUtils.newArrayList(suggestionsSet); 310 final int suggestionsCount = suggestionsContainer.size(); 311 final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 312 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 313 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 314 for (int i = 0; i < suggestionsCount; ++i) { 315 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 316 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 317 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 318 trailingSingleQuotesCount); 319 suggestionsContainer.set(i, transformedWordInfo); 320 } 321 } 322 323 for (int i = 0; i < suggestionsCount; ++i) { 324 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 325 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), 326 wordInfo.mSourceDict.mDictType); 327 } 328 329 if (!TextUtils.isEmpty(typedWord)) { 330 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 331 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 332 Dictionary.DICTIONARY_USER_TYPED, 333 SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, 334 SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); 335 } 336 SuggestedWordInfo.removeDups(suggestionsContainer); 337 338 final ArrayList<SuggestedWordInfo> suggestionsList; 339 if (DBG && !suggestionsContainer.isEmpty()) { 340 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 341 } else { 342 suggestionsList = suggestionsContainer; 343 } 344 345 callback.onGetSuggestedWords(new SuggestedWords(suggestionsList, 346 // TODO: this first argument is lying. If this is a whitelisted word which is an 347 // actual word, it says typedWordValid = false, which looks wrong. We should either 348 // rename the attribute or change the value. 349 !allowsToBeAutoCorrected /* typedWordValid */, 350 hasAutoCorrection, /* willAutoCorrect */ 351 false /* isPunctuationSuggestions */, 352 false /* isObsoleteSuggestions */, 353 !wordComposer.isComposingWord() /* isPrediction */, sequenceNumber)); 354 } 355 356 // Retrieves suggestions for the batch input 357 // and calls the callback function with the suggestions. 358 private void getSuggestedWordsForBatchInput(final WordComposer wordComposer, 359 final String prevWordForBigram, final ProximityInfo proximityInfo, 360 final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, 361 final int sessionId, final int sequenceNumber, 362 final OnGetSuggestedWordsCallback callback) { 363 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 364 MAX_SUGGESTIONS); 365 366 // At second character typed, search the unigrams (scores being affected by bigrams) 367 for (final String key : mDictionaries.keySet()) { 368 final Dictionary dictionary = mDictionaries.get(key); 369 suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(wordComposer, 370 prevWordForBigram, proximityInfo, blockOffensiveWords, 371 additionalFeaturesOptions, sessionId)); 372 } 373 374 for (SuggestedWordInfo wordInfo : suggestionsSet) { 375 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict.mDictType); 376 } 377 378 final ArrayList<SuggestedWordInfo> suggestionsContainer = 379 CollectionUtils.newArrayList(suggestionsSet); 380 final int suggestionsCount = suggestionsContainer.size(); 381 final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); 382 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 383 if (isFirstCharCapitalized || isAllUpperCase) { 384 for (int i = 0; i < suggestionsCount; ++i) { 385 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 386 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 387 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 388 0 /* trailingSingleQuotesCount */); 389 suggestionsContainer.set(i, transformedWordInfo); 390 } 391 } 392 393 if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord, 394 wordComposer.getRejectedBatchModeSuggestion())) { 395 final SuggestedWordInfo rejected = suggestionsContainer.remove(0); 396 suggestionsContainer.add(1, rejected); 397 } 398 SuggestedWordInfo.removeDups(suggestionsContainer); 399 400 // For some reason some suggestions with MIN_VALUE are making their way here. 401 // TODO: Find a more robust way to detect distractors. 402 for (int i = suggestionsContainer.size() - 1; i >= 0; --i) { 403 if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) { 404 suggestionsContainer.remove(i); 405 } 406 } 407 408 // In the batch input mode, the most relevant suggested word should act as a "typed word" 409 // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). 410 callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer, 411 true /* typedWordValid */, 412 false /* willAutoCorrect */, 413 false /* isPunctuationSuggestions */, 414 false /* isObsoleteSuggestions */, 415 false /* isPrediction */, sequenceNumber)); 416 } 417 418 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 419 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 420 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 421 typedWordInfo.setDebugString("+"); 422 final int suggestionsSize = suggestions.size(); 423 final ArrayList<SuggestedWordInfo> suggestionsList = 424 CollectionUtils.newArrayList(suggestionsSize); 425 suggestionsList.add(typedWordInfo); 426 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 427 // than i because we added the typed word to mSuggestions without touching mScores. 428 for (int i = 0; i < suggestionsSize - 1; ++i) { 429 final SuggestedWordInfo cur = suggestions.get(i + 1); 430 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 431 typedWord, cur.toString(), cur.mScore); 432 final String scoreInfoString; 433 if (normalizedScore > 0) { 434 scoreInfoString = String.format( 435 Locale.ROOT, "%d (%4.2f)", cur.mScore, normalizedScore); 436 } else { 437 scoreInfoString = Integer.toString(cur.mScore); 438 } 439 cur.setDebugString(scoreInfoString); 440 suggestionsList.add(cur); 441 } 442 return suggestionsList; 443 } 444 445 private static final class SuggestedWordInfoComparator 446 implements Comparator<SuggestedWordInfo> { 447 // This comparator ranks the word info with the higher frequency first. That's because 448 // that's the order we want our elements in. 449 @Override 450 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 451 if (o1.mScore > o2.mScore) return -1; 452 if (o1.mScore < o2.mScore) return 1; 453 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 454 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 455 return o1.mWord.compareTo(o2.mWord); 456 } 457 } 458 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 459 new SuggestedWordInfoComparator(); 460 461 /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo( 462 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 463 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 464 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 465 if (isAllUpperCase) { 466 sb.append(wordInfo.mWord.toUpperCase(locale)); 467 } else if (isFirstCharCapitalized) { 468 sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale)); 469 } else { 470 sb.append(wordInfo.mWord); 471 } 472 // Appending quotes is here to help people quote words. However, it's not helpful 473 // when they type words with quotes toward the end like "it's" or "didn't", where 474 // it's more likely the user missed the last character (or didn't type it yet). 475 final int quotesToAppend = trailingSingleQuotesCount 476 - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1); 477 for (int i = quotesToAppend - 1; i >= 0; --i) { 478 sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); 479 } 480 return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind, 481 wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord, 482 wordInfo.mAutoCommitFirstWordConfidence); 483 } 484 485 public void close() { 486 final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet(); 487 dictionaries.addAll(mDictionaries.values()); 488 for (final Dictionary dictionary : dictionaries) { 489 dictionary.close(); 490 } 491 mMainDictionary = null; 492 } 493 } 494