1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 import android.text.TextUtils; 21 22 import com.android.inputmethod.annotations.UsedForTesting; 23 import com.android.inputmethod.keyboard.ProximityInfo; 24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 25 26 import java.io.File; 27 import java.util.ArrayList; 28 import java.util.Comparator; 29 import java.util.HashSet; 30 import java.util.Locale; 31 import java.util.concurrent.ConcurrentHashMap; 32 33 /** 34 * This class loads a dictionary and provides a list of suggestions for a given sequence of 35 * characters. This includes corrections and completions. 36 */ 37 public final class Suggest { 38 public static final String TAG = Suggest.class.getSimpleName(); 39 40 // Session id for 41 // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}. 42 public static final int SESSION_TYPING = 0; 43 public static final int SESSION_GESTURE = 1; 44 45 // TODO: rename this to CORRECTION_OFF 46 public static final int CORRECTION_NONE = 0; 47 // TODO: rename this to CORRECTION_ON 48 public static final int CORRECTION_FULL = 1; 49 50 // Close to -2**31 51 private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000; 52 53 public interface SuggestInitializationListener { 54 public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable); 55 } 56 57 private static final boolean DBG = LatinImeLogger.sDBG; 58 59 private Dictionary mMainDictionary; 60 private ContactsBinaryDictionary mContactsDict; 61 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 62 CollectionUtils.newConcurrentHashMap(); 63 @UsedForTesting 64 private boolean mIsCurrentlyWaitingForMainDictionary = false; 65 66 public static final int MAX_SUGGESTIONS = 18; 67 68 private float mAutoCorrectionThreshold; 69 70 // Locale used for upper- and title-casing words 71 public final Locale mLocale; 72 73 public Suggest(final Context context, final Locale locale, 74 final SuggestInitializationListener listener) { 75 initAsynchronously(context, locale, listener); 76 mLocale = locale; 77 } 78 79 @UsedForTesting 80 Suggest(final File dictionary, final long startOffset, final long length, final Locale locale) { 81 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(dictionary, 82 startOffset, length /* useFullEditDistance */, false, locale); 83 mLocale = locale; 84 mMainDictionary = mainDict; 85 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict); 86 } 87 88 private void initAsynchronously(final Context context, final Locale locale, 89 final SuggestInitializationListener listener) { 90 resetMainDict(context, locale, listener); 91 } 92 93 private static void addOrReplaceDictionary( 94 final ConcurrentHashMap<String, Dictionary> dictionaries, 95 final String key, final Dictionary dict) { 96 final Dictionary oldDict = (dict == null) 97 ? dictionaries.remove(key) 98 : dictionaries.put(key, dict); 99 if (oldDict != null && dict != oldDict) { 100 oldDict.close(); 101 } 102 } 103 104 public void resetMainDict(final Context context, final Locale locale, 105 final SuggestInitializationListener listener) { 106 mIsCurrentlyWaitingForMainDictionary = true; 107 mMainDictionary = null; 108 if (listener != null) { 109 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 110 } 111 new Thread("InitializeBinaryDictionary") { 112 @Override 113 public void run() { 114 final DictionaryCollection newMainDict = 115 DictionaryFactory.createMainDictionaryFromManager(context, locale); 116 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict); 117 mMainDictionary = newMainDict; 118 if (listener != null) { 119 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 120 } 121 mIsCurrentlyWaitingForMainDictionary = false; 122 } 123 }.start(); 124 } 125 126 // The main dictionary could have been loaded asynchronously. Don't cache the return value 127 // of this method. 128 public boolean hasMainDictionary() { 129 return null != mMainDictionary && mMainDictionary.isInitialized(); 130 } 131 132 @UsedForTesting 133 public boolean isCurrentlyWaitingForMainDictionary() { 134 return mIsCurrentlyWaitingForMainDictionary; 135 } 136 137 public Dictionary getMainDictionary() { 138 return mMainDictionary; 139 } 140 141 public ContactsBinaryDictionary getContactsDictionary() { 142 return mContactsDict; 143 } 144 145 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 146 return mDictionaries; 147 } 148 149 /** 150 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 151 * before the main dictionary, if set. This refers to the system-managed user dictionary. 152 */ 153 public void setUserDictionary(final UserBinaryDictionary userDictionary) { 154 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary); 155 } 156 157 /** 158 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 159 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 160 * won't be used. 161 */ 162 public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) { 163 mContactsDict = contactsDictionary; 164 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); 165 } 166 167 public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) { 168 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 169 } 170 171 public void setAutoCorrectionThreshold(float threshold) { 172 mAutoCorrectionThreshold = threshold; 173 } 174 175 public SuggestedWords getSuggestedWords(final WordComposer wordComposer, 176 final String prevWordForBigram, final ProximityInfo proximityInfo, 177 final boolean blockOffensiveWords, final boolean isCorrectionEnabled, 178 final int sessionId) { 179 LatinImeLogger.onStartSuggestion(prevWordForBigram); 180 if (wordComposer.isBatchMode()) { 181 return getSuggestedWordsForBatchInput( 182 wordComposer, prevWordForBigram, proximityInfo, blockOffensiveWords, sessionId); 183 } else { 184 return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo, 185 blockOffensiveWords, isCorrectionEnabled); 186 } 187 } 188 189 // Retrieves suggestions for the typing input. 190 private SuggestedWords getSuggestedWordsForTypingInput(final WordComposer wordComposer, 191 final String prevWordForBigram, final ProximityInfo proximityInfo, 192 final boolean blockOffensiveWords, final boolean isCorrectionEnabled) { 193 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 194 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 195 MAX_SUGGESTIONS); 196 197 final String typedWord = wordComposer.getTypedWord(); 198 final String consideredWord = trailingSingleQuotesCount > 0 199 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 200 : typedWord; 201 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 202 203 final WordComposer wordComposerForLookup; 204 if (trailingSingleQuotesCount > 0) { 205 wordComposerForLookup = new WordComposer(wordComposer); 206 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 207 wordComposerForLookup.deleteLast(); 208 } 209 } else { 210 wordComposerForLookup = wordComposer; 211 } 212 213 for (final String key : mDictionaries.keySet()) { 214 final Dictionary dictionary = mDictionaries.get(key); 215 suggestionsSet.addAll(dictionary.getSuggestions( 216 wordComposerForLookup, prevWordForBigram, proximityInfo, blockOffensiveWords)); 217 } 218 219 final String whitelistedWord; 220 if (suggestionsSet.isEmpty()) { 221 whitelistedWord = null; 222 } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) { 223 whitelistedWord = null; 224 } else { 225 whitelistedWord = suggestionsSet.first().mWord; 226 } 227 228 // The word can be auto-corrected if it has a whitelist entry that is not itself, 229 // or if it's a 2+ characters non-word (i.e. it's not in the dictionary). 230 final boolean allowsToBeAutoCorrected = (null != whitelistedWord 231 && !whitelistedWord.equals(consideredWord)) 232 || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries, 233 consideredWord, wordComposer.isFirstCharCapitalized())); 234 235 final boolean hasAutoCorrection; 236 // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because 237 // any attempt to do auto-correction is already shielded with a test for this flag; at the 238 // same time, it feels wrong that the SuggestedWord object includes information about 239 // the current settings. It may also be useful to know, when the setting is off, whether 240 // the word *would* have been auto-corrected. 241 if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord() 242 || suggestionsSet.isEmpty() || wordComposer.hasDigits() 243 || wordComposer.isMostlyCaps() || wordComposer.isResumed() 244 || !hasMainDictionary()) { 245 // If we don't have a main dictionary, we never want to auto-correct. The reason for 246 // this is, the user may have a contact whose name happens to match a valid word in 247 // their language, and it will unexpectedly auto-correct. For example, if the user 248 // types in English with no dictionary and has a "Will" in their contact list, "will" 249 // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no 250 // auto-correct. 251 hasAutoCorrection = false; 252 } else { 253 hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold( 254 suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold); 255 } 256 257 final ArrayList<SuggestedWordInfo> suggestionsContainer = 258 CollectionUtils.newArrayList(suggestionsSet); 259 final int suggestionsCount = suggestionsContainer.size(); 260 final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 261 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 262 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 263 for (int i = 0; i < suggestionsCount; ++i) { 264 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 265 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 266 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 267 trailingSingleQuotesCount); 268 suggestionsContainer.set(i, transformedWordInfo); 269 } 270 } 271 272 for (int i = 0; i < suggestionsCount; ++i) { 273 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 274 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); 275 } 276 277 if (!TextUtils.isEmpty(typedWord)) { 278 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 279 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 280 Dictionary.TYPE_USER_TYPED)); 281 } 282 SuggestedWordInfo.removeDups(suggestionsContainer); 283 284 final ArrayList<SuggestedWordInfo> suggestionsList; 285 if (DBG && !suggestionsContainer.isEmpty()) { 286 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 287 } else { 288 suggestionsList = suggestionsContainer; 289 } 290 291 return new SuggestedWords(suggestionsList, 292 // TODO: this first argument is lying. If this is a whitelisted word which is an 293 // actual word, it says typedWordValid = false, which looks wrong. We should either 294 // rename the attribute or change the value. 295 !allowsToBeAutoCorrected /* typedWordValid */, 296 hasAutoCorrection, /* willAutoCorrect */ 297 false /* isPunctuationSuggestions */, 298 false /* isObsoleteSuggestions */, 299 !wordComposer.isComposingWord() /* isPrediction */); 300 } 301 302 // Retrieves suggestions for the batch input. 303 private SuggestedWords getSuggestedWordsForBatchInput(final WordComposer wordComposer, 304 final String prevWordForBigram, final ProximityInfo proximityInfo, 305 final boolean blockOffensiveWords, final int sessionId) { 306 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 307 MAX_SUGGESTIONS); 308 309 // At second character typed, search the unigrams (scores being affected by bigrams) 310 for (final String key : mDictionaries.keySet()) { 311 // Skip User history dictionary for lookup 312 // TODO: The user history dictionary should just override getSuggestionsWithSessionId 313 // to make sure it doesn't return anything and we should remove this test 314 if (key.equals(Dictionary.TYPE_USER_HISTORY)) { 315 continue; 316 } 317 final Dictionary dictionary = mDictionaries.get(key); 318 suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(wordComposer, 319 prevWordForBigram, proximityInfo, blockOffensiveWords, sessionId)); 320 } 321 322 for (SuggestedWordInfo wordInfo : suggestionsSet) { 323 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict); 324 } 325 326 final ArrayList<SuggestedWordInfo> suggestionsContainer = 327 CollectionUtils.newArrayList(suggestionsSet); 328 final int suggestionsCount = suggestionsContainer.size(); 329 final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); 330 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 331 if (isFirstCharCapitalized || isAllUpperCase) { 332 for (int i = 0; i < suggestionsCount; ++i) { 333 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 334 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 335 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 336 0 /* trailingSingleQuotesCount */); 337 suggestionsContainer.set(i, transformedWordInfo); 338 } 339 } 340 341 if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord, 342 wordComposer.getRejectedBatchModeSuggestion())) { 343 final SuggestedWordInfo rejected = suggestionsContainer.remove(0); 344 suggestionsContainer.add(1, rejected); 345 } 346 SuggestedWordInfo.removeDups(suggestionsContainer); 347 348 // For some reason some suggestions with MIN_VALUE are making their way here. 349 // TODO: Find a more robust way to detect distractors. 350 for (int i = suggestionsContainer.size() - 1; i >= 0; --i) { 351 if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) { 352 suggestionsContainer.remove(i); 353 } 354 } 355 356 // In the batch input mode, the most relevant suggested word should act as a "typed word" 357 // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). 358 return new SuggestedWords(suggestionsContainer, 359 true /* typedWordValid */, 360 false /* willAutoCorrect */, 361 false /* isPunctuationSuggestions */, 362 false /* isObsoleteSuggestions */, 363 false /* isPrediction */); 364 } 365 366 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 367 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 368 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 369 typedWordInfo.setDebugString("+"); 370 final int suggestionsSize = suggestions.size(); 371 final ArrayList<SuggestedWordInfo> suggestionsList = 372 CollectionUtils.newArrayList(suggestionsSize); 373 suggestionsList.add(typedWordInfo); 374 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 375 // than i because we added the typed word to mSuggestions without touching mScores. 376 for (int i = 0; i < suggestionsSize - 1; ++i) { 377 final SuggestedWordInfo cur = suggestions.get(i + 1); 378 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 379 typedWord, cur.toString(), cur.mScore); 380 final String scoreInfoString; 381 if (normalizedScore > 0) { 382 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore); 383 } else { 384 scoreInfoString = Integer.toString(cur.mScore); 385 } 386 cur.setDebugString(scoreInfoString); 387 suggestionsList.add(cur); 388 } 389 return suggestionsList; 390 } 391 392 private static final class SuggestedWordInfoComparator 393 implements Comparator<SuggestedWordInfo> { 394 // This comparator ranks the word info with the higher frequency first. That's because 395 // that's the order we want our elements in. 396 @Override 397 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 398 if (o1.mScore > o2.mScore) return -1; 399 if (o1.mScore < o2.mScore) return 1; 400 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 401 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 402 return o1.mWord.compareTo(o2.mWord); 403 } 404 } 405 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 406 new SuggestedWordInfoComparator(); 407 408 private static SuggestedWordInfo getTransformedSuggestedWordInfo( 409 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 410 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 411 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 412 if (isAllUpperCase) { 413 sb.append(wordInfo.mWord.toUpperCase(locale)); 414 } else if (isFirstCharCapitalized) { 415 sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale)); 416 } else { 417 sb.append(wordInfo.mWord); 418 } 419 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 420 sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); 421 } 422 return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind, 423 wordInfo.mSourceDict); 424 } 425 426 public void close() { 427 final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet(); 428 dictionaries.addAll(mDictionaries.values()); 429 for (final Dictionary dictionary : dictionaries) { 430 dictionary.close(); 431 } 432 mMainDictionary = null; 433 } 434 } 435