1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.text.TextUtils; 20 21 import static com.android.inputmethod.latin.define.DecoderSpecificConstants.SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION; 22 import static com.android.inputmethod.latin.define.DecoderSpecificConstants.SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION; 23 24 import com.android.inputmethod.keyboard.Keyboard; 25 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 26 import com.android.inputmethod.latin.common.Constants; 27 import com.android.inputmethod.latin.common.StringUtils; 28 import com.android.inputmethod.latin.define.DebugFlags; 29 import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion; 30 import com.android.inputmethod.latin.utils.AutoCorrectionUtils; 31 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 32 import com.android.inputmethod.latin.utils.SuggestionResults; 33 34 import java.util.ArrayList; 35 import java.util.HashMap; 36 import java.util.Locale; 37 38 import javax.annotation.Nonnull; 39 40 /** 41 * This class loads a dictionary and provides a list of suggestions for a given sequence of 42 * characters. This includes corrections and completions. 43 */ 44 public final class Suggest { 45 public static final String TAG = Suggest.class.getSimpleName(); 46 47 // Session id for 48 // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}. 49 // We are sharing the same ID between typing and gesture to save RAM footprint. 50 public static final int SESSION_ID_TYPING = 0; 51 public static final int SESSION_ID_GESTURE = 0; 52 53 // Close to -2**31 54 private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000; 55 56 private static final boolean DBG = DebugFlags.DEBUG_ENABLED; 57 private final DictionaryFacilitator mDictionaryFacilitator; 58 59 private static final int MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12; 60 private static final HashMap<String, Integer> sLanguageToMaximumAutoCorrectionWithSpaceLength = 61 new HashMap<>(); 62 static { 63 // TODO: should we add Finnish here? 64 // TODO: This should not be hardcoded here but be written in the dictionary header 65 sLanguageToMaximumAutoCorrectionWithSpaceLength.put(Locale.GERMAN.getLanguage(), 66 MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN); 67 } 68 69 private float mAutoCorrectionThreshold; 70 private float mPlausibilityThreshold; 71 72 public Suggest(final DictionaryFacilitator dictionaryFacilitator) { 73 mDictionaryFacilitator = dictionaryFacilitator; 74 } 75 76 /** 77 * Set the normalized-score threshold for a suggestion to be considered strong enough that we 78 * will auto-correct to this. 79 * @param threshold the threshold 80 */ 81 public void setAutoCorrectionThreshold(final float threshold) { 82 mAutoCorrectionThreshold = threshold; 83 } 84 85 /** 86 * Set the normalized-score threshold for what we consider a "plausible" suggestion, in 87 * the same dimension as the auto-correction threshold. 88 * @param threshold the threshold 89 */ 90 public void setPlausibilityThreshold(final float threshold) { 91 mPlausibilityThreshold = threshold; 92 } 93 94 public interface OnGetSuggestedWordsCallback { 95 public void onGetSuggestedWords(final SuggestedWords suggestedWords); 96 } 97 98 public void getSuggestedWords(final WordComposer wordComposer, 99 final NgramContext ngramContext, final Keyboard keyboard, 100 final SettingsValuesForSuggestion settingsValuesForSuggestion, 101 final boolean isCorrectionEnabled, final int inputStyle, final int sequenceNumber, 102 final OnGetSuggestedWordsCallback callback) { 103 if (wordComposer.isBatchMode()) { 104 getSuggestedWordsForBatchInput(wordComposer, ngramContext, keyboard, 105 settingsValuesForSuggestion, inputStyle, sequenceNumber, callback); 106 } else { 107 getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, keyboard, 108 settingsValuesForSuggestion, inputStyle, isCorrectionEnabled, 109 sequenceNumber, callback); 110 } 111 } 112 113 private static ArrayList<SuggestedWordInfo> getTransformedSuggestedWordInfoList( 114 final WordComposer wordComposer, final SuggestionResults results, 115 final int trailingSingleQuotesCount, final Locale defaultLocale) { 116 final boolean shouldMakeSuggestionsAllUpperCase = wordComposer.isAllUpperCase() 117 && !wordComposer.isResumed(); 118 final boolean isOnlyFirstCharCapitalized = 119 wordComposer.isOrWillBeOnlyFirstCharCapitalized(); 120 121 final ArrayList<SuggestedWordInfo> suggestionsContainer = new ArrayList<>(results); 122 final int suggestionsCount = suggestionsContainer.size(); 123 if (isOnlyFirstCharCapitalized || shouldMakeSuggestionsAllUpperCase 124 || 0 != trailingSingleQuotesCount) { 125 for (int i = 0; i < suggestionsCount; ++i) { 126 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 127 final Locale wordLocale = wordInfo.mSourceDict.mLocale; 128 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 129 wordInfo, null == wordLocale ? defaultLocale : wordLocale, 130 shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized, 131 trailingSingleQuotesCount); 132 suggestionsContainer.set(i, transformedWordInfo); 133 } 134 } 135 return suggestionsContainer; 136 } 137 138 private static SuggestedWordInfo getWhitelistedWordInfoOrNull( 139 @Nonnull final ArrayList<SuggestedWordInfo> suggestions) { 140 if (suggestions.isEmpty()) { 141 return null; 142 } 143 final SuggestedWordInfo firstSuggestedWordInfo = suggestions.get(0); 144 if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) { 145 return null; 146 } 147 return firstSuggestedWordInfo; 148 } 149 150 // Retrieves suggestions for non-batch input (typing, recorrection, predictions...) 151 // and calls the callback function with the suggestions. 152 private void getSuggestedWordsForNonBatchInput(final WordComposer wordComposer, 153 final NgramContext ngramContext, final Keyboard keyboard, 154 final SettingsValuesForSuggestion settingsValuesForSuggestion, 155 final int inputStyleIfNotPrediction, final boolean isCorrectionEnabled, 156 final int sequenceNumber, final OnGetSuggestedWordsCallback callback) { 157 final String typedWordString = wordComposer.getTypedWord(); 158 final int trailingSingleQuotesCount = 159 StringUtils.getTrailingSingleQuotesCount(typedWordString); 160 final String consideredWord = trailingSingleQuotesCount > 0 161 ? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount) 162 : typedWordString; 163 164 final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults( 165 wordComposer.getComposedDataSnapshot(), ngramContext, keyboard, 166 settingsValuesForSuggestion, SESSION_ID_TYPING, inputStyleIfNotPrediction); 167 final Locale locale = mDictionaryFacilitator.getLocale(); 168 final ArrayList<SuggestedWordInfo> suggestionsContainer = 169 getTransformedSuggestedWordInfoList(wordComposer, suggestionResults, 170 trailingSingleQuotesCount, locale); 171 172 boolean foundInDictionary = false; 173 Dictionary sourceDictionaryOfRemovedWord = null; 174 for (final SuggestedWordInfo info : suggestionsContainer) { 175 // Search for the best dictionary, defined as the first one with the highest match 176 // quality we can find. 177 if (!foundInDictionary && typedWordString.equals(info.mWord)) { 178 // Use this source if the old match had lower quality than this match 179 sourceDictionaryOfRemovedWord = info.mSourceDict; 180 foundInDictionary = true; 181 break; 182 } 183 } 184 185 final int firstOcurrenceOfTypedWordInSuggestions = 186 SuggestedWordInfo.removeDups(typedWordString, suggestionsContainer); 187 188 final SuggestedWordInfo whitelistedWordInfo = 189 getWhitelistedWordInfoOrNull(suggestionsContainer); 190 final String whitelistedWord = whitelistedWordInfo == null 191 ? null : whitelistedWordInfo.mWord; 192 final boolean resultsArePredictions = !wordComposer.isComposingWord(); 193 194 // We allow auto-correction if whitelisting is not required or the word is whitelisted, 195 // or if the word had more than one char and was not suggested. 196 final boolean allowsToBeAutoCorrected = 197 (SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null) 198 || (consideredWord.length() > 1 && (sourceDictionaryOfRemovedWord == null)); 199 200 final boolean hasAutoCorrection; 201 // If correction is not enabled, we never auto-correct. This is for example for when 202 // the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct. 203 if (!isCorrectionEnabled 204 // If the word does not allow to be auto-corrected, then we don't auto-correct. 205 || !allowsToBeAutoCorrected 206 // If we are doing prediction, then we never auto-correct of course 207 || resultsArePredictions 208 // If we don't have suggestion results, we can't evaluate the first suggestion 209 // for auto-correction 210 || suggestionResults.isEmpty() 211 // If the word has digits, we never auto-correct because it's likely the word 212 // was type with a lot of care 213 || wordComposer.hasDigits() 214 // If the word is mostly caps, we never auto-correct because this is almost 215 // certainly intentional (and careful input) 216 || wordComposer.isMostlyCaps() 217 // We never auto-correct when suggestions are resumed because it would be unexpected 218 || wordComposer.isResumed() 219 // If we don't have a main dictionary, we never want to auto-correct. The reason 220 // for this is, the user may have a contact whose name happens to match a valid 221 // word in their language, and it will unexpectedly auto-correct. For example, if 222 // the user types in English with no dictionary and has a "Will" in their contact 223 // list, "will" would always auto-correct to "Will" which is unwanted. Hence, no 224 // main dict => no auto-correct. Also, it would probably get obnoxious quickly. 225 // TODO: now that we have personalization, we may want to re-evaluate this decision 226 || !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary() 227 // If the first suggestion is a shortcut we never auto-correct to it, regardless 228 // of how strong it is (whitelist entries are not KIND_SHORTCUT but KIND_WHITELIST). 229 // TODO: we may want to have shortcut-only entries auto-correct in the future. 230 || suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) { 231 hasAutoCorrection = false; 232 } else { 233 final SuggestedWordInfo firstSuggestion = suggestionResults.first(); 234 if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold 235 && firstOcurrenceOfTypedWordInSuggestions != 0) { 236 hasAutoCorrection = true; 237 } else if (!AutoCorrectionUtils.suggestionExceedsThreshold( 238 firstSuggestion, consideredWord, mAutoCorrectionThreshold)) { 239 // Score is too low for autocorrect 240 hasAutoCorrection = false; 241 } else { 242 // We have a high score, so we need to check if this suggestion is in the correct 243 // form to allow auto-correcting to it in this language. For details of how this 244 // is determined, see #isAllowedByAutoCorrectionWithSpaceFilter. 245 // TODO: this should not have its own logic here but be handled by the dictionary. 246 hasAutoCorrection = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion); 247 } 248 } 249 250 final SuggestedWordInfo typedWordInfo = new SuggestedWordInfo(typedWordString, 251 "" /* prevWordsContext */, SuggestedWordInfo.MAX_SCORE, 252 SuggestedWordInfo.KIND_TYPED, 253 null == sourceDictionaryOfRemovedWord ? Dictionary.DICTIONARY_USER_TYPED 254 : sourceDictionaryOfRemovedWord, 255 SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, 256 SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */); 257 if (!TextUtils.isEmpty(typedWordString)) { 258 suggestionsContainer.add(0, typedWordInfo); 259 } 260 261 final ArrayList<SuggestedWordInfo> suggestionsList; 262 if (DBG && !suggestionsContainer.isEmpty()) { 263 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWordString, 264 suggestionsContainer); 265 } else { 266 suggestionsList = suggestionsContainer; 267 } 268 269 final int inputStyle; 270 if (resultsArePredictions) { 271 inputStyle = suggestionResults.mIsBeginningOfSentence 272 ? SuggestedWords.INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION 273 : SuggestedWords.INPUT_STYLE_PREDICTION; 274 } else { 275 inputStyle = inputStyleIfNotPrediction; 276 } 277 278 final boolean isTypedWordValid = firstOcurrenceOfTypedWordInSuggestions > -1 279 || (!resultsArePredictions && !allowsToBeAutoCorrected); 280 callback.onGetSuggestedWords(new SuggestedWords(suggestionsList, 281 suggestionResults.mRawSuggestions, typedWordInfo, 282 isTypedWordValid, 283 hasAutoCorrection /* willAutoCorrect */, 284 false /* isObsoleteSuggestions */, inputStyle, sequenceNumber)); 285 } 286 287 // Retrieves suggestions for the batch input 288 // and calls the callback function with the suggestions. 289 private void getSuggestedWordsForBatchInput(final WordComposer wordComposer, 290 final NgramContext ngramContext, final Keyboard keyboard, 291 final SettingsValuesForSuggestion settingsValuesForSuggestion, 292 final int inputStyle, final int sequenceNumber, 293 final OnGetSuggestedWordsCallback callback) { 294 final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults( 295 wordComposer.getComposedDataSnapshot(), ngramContext, keyboard, 296 settingsValuesForSuggestion, SESSION_ID_GESTURE, inputStyle); 297 // For transforming words that don't come from a dictionary, because it's our best bet 298 final Locale locale = mDictionaryFacilitator.getLocale(); 299 final ArrayList<SuggestedWordInfo> suggestionsContainer = 300 new ArrayList<>(suggestionResults); 301 final int suggestionsCount = suggestionsContainer.size(); 302 final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); 303 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 304 if (isFirstCharCapitalized || isAllUpperCase) { 305 for (int i = 0; i < suggestionsCount; ++i) { 306 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 307 final Locale wordlocale = wordInfo.mSourceDict.mLocale; 308 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 309 wordInfo, null == wordlocale ? locale : wordlocale, isAllUpperCase, 310 isFirstCharCapitalized, 0 /* trailingSingleQuotesCount */); 311 suggestionsContainer.set(i, transformedWordInfo); 312 } 313 } 314 315 if (SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION 316 && suggestionsContainer.size() > 1 317 && TextUtils.equals(suggestionsContainer.get(0).mWord, 318 wordComposer.getRejectedBatchModeSuggestion())) { 319 final SuggestedWordInfo rejected = suggestionsContainer.remove(0); 320 suggestionsContainer.add(1, rejected); 321 } 322 SuggestedWordInfo.removeDups(null /* typedWord */, suggestionsContainer); 323 324 // For some reason some suggestions with MIN_VALUE are making their way here. 325 // TODO: Find a more robust way to detect distracters. 326 for (int i = suggestionsContainer.size() - 1; i >= 0; --i) { 327 if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) { 328 suggestionsContainer.remove(i); 329 } 330 } 331 332 // In the batch input mode, the most relevant suggested word should act as a "typed word" 333 // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). 334 // Note that because this method is never used to get predictions, there is no need to 335 // modify inputType such in getSuggestedWordsForNonBatchInput. 336 final SuggestedWordInfo pseudoTypedWordInfo = suggestionsContainer.isEmpty() ? null 337 : suggestionsContainer.get(0); 338 339 callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer, 340 suggestionResults.mRawSuggestions, 341 pseudoTypedWordInfo, 342 true /* typedWordValid */, 343 false /* willAutoCorrect */, 344 false /* isObsoleteSuggestions */, 345 inputStyle, sequenceNumber)); 346 } 347 348 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 349 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 350 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 351 typedWordInfo.setDebugString("+"); 352 final int suggestionsSize = suggestions.size(); 353 final ArrayList<SuggestedWordInfo> suggestionsList = new ArrayList<>(suggestionsSize); 354 suggestionsList.add(typedWordInfo); 355 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 356 // than i because we added the typed word to mSuggestions without touching mScores. 357 for (int i = 0; i < suggestionsSize - 1; ++i) { 358 final SuggestedWordInfo cur = suggestions.get(i + 1); 359 final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( 360 typedWord, cur.toString(), cur.mScore); 361 final String scoreInfoString; 362 if (normalizedScore > 0) { 363 scoreInfoString = String.format( 364 Locale.ROOT, "%d (%4.2f), %s", cur.mScore, normalizedScore, 365 cur.mSourceDict.mDictType); 366 } else { 367 scoreInfoString = Integer.toString(cur.mScore); 368 } 369 cur.setDebugString(scoreInfoString); 370 suggestionsList.add(cur); 371 } 372 return suggestionsList; 373 } 374 375 /** 376 * Computes whether this suggestion should be blocked or not in this language 377 * 378 * This function implements a filter that avoids auto-correcting to suggestions that contain 379 * spaces that are above a certain language-dependent character limit. In languages like German 380 * where it's possible to concatenate many words, it often happens our dictionary does not 381 * have the longer words. In this case, we offer a lot of unhelpful suggestions that contain 382 * one or several spaces. Ideally we should understand what the user wants and display useful 383 * suggestions by improving the dictionary and possibly having some specific logic. Until 384 * that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell 385 * whether a suggestion is useful or not. So at least for the time being we block 386 * auto-correction when the suggestion is long and contains a space, which should avoid the 387 * worst damage. 388 * This function is implementing that filter. If the language enforces no such limit, then it 389 * always returns true. If the suggestion contains no space, it also returns true. Otherwise, 390 * it checks the length against the language-specific limit. 391 * 392 * @param info the suggestion info 393 * @return whether it's fine to auto-correct to this. 394 */ 395 private static boolean isAllowedByAutoCorrectionWithSpaceFilter(final SuggestedWordInfo info) { 396 final Locale locale = info.mSourceDict.mLocale; 397 if (null == locale) { 398 return true; 399 } 400 final Integer maximumLengthForThisLanguage = 401 sLanguageToMaximumAutoCorrectionWithSpaceLength.get(locale.getLanguage()); 402 if (null == maximumLengthForThisLanguage) { 403 // This language does not enforce a maximum length to auto-correction 404 return true; 405 } 406 return info.mWord.length() <= maximumLengthForThisLanguage 407 || -1 == info.mWord.indexOf(Constants.CODE_SPACE); 408 } 409 410 /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo( 411 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 412 final boolean isOnlyFirstCharCapitalized, final int trailingSingleQuotesCount) { 413 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 414 if (isAllUpperCase) { 415 sb.append(wordInfo.mWord.toUpperCase(locale)); 416 } else if (isOnlyFirstCharCapitalized) { 417 sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale)); 418 } else { 419 sb.append(wordInfo.mWord); 420 } 421 // Appending quotes is here to help people quote words. However, it's not helpful 422 // when they type words with quotes toward the end like "it's" or "didn't", where 423 // it's more likely the user missed the last character (or didn't type it yet). 424 final int quotesToAppend = trailingSingleQuotesCount 425 - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1); 426 for (int i = quotesToAppend - 1; i >= 0; --i) { 427 sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); 428 } 429 return new SuggestedWordInfo(sb.toString(), wordInfo.mPrevWordsContext, 430 wordInfo.mScore, wordInfo.mKindAndFlags, 431 wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord, 432 wordInfo.mAutoCommitFirstWordConfidence); 433 } 434 } 435