1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 import android.text.TextUtils; 21 22 import com.android.inputmethod.keyboard.Keyboard; 23 import com.android.inputmethod.keyboard.ProximityInfo; 24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 25 26 import java.io.File; 27 import java.util.ArrayList; 28 import java.util.Comparator; 29 import java.util.HashSet; 30 import java.util.Locale; 31 import java.util.concurrent.ConcurrentHashMap; 32 33 /** 34 * This class loads a dictionary and provides a list of suggestions for a given sequence of 35 * characters. This includes corrections and completions. 36 */ 37 public final class Suggest { 38 public static final String TAG = Suggest.class.getSimpleName(); 39 40 // Session id for 41 // {@link #getSuggestedWords(WordComposer,CharSequence,ProximityInfo,boolean,int)}. 42 public static final int SESSION_TYPING = 0; 43 public static final int SESSION_GESTURE = 1; 44 45 // TODO: rename this to CORRECTION_OFF 46 public static final int CORRECTION_NONE = 0; 47 // TODO: rename this to CORRECTION_ON 48 public static final int CORRECTION_FULL = 1; 49 50 public interface SuggestInitializationListener { 51 public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable); 52 } 53 54 private static final boolean DBG = LatinImeLogger.sDBG; 55 56 private Dictionary mMainDictionary; 57 private ContactsBinaryDictionary mContactsDict; 58 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 59 CollectionUtils.newConcurrentHashMap(); 60 61 public static final int MAX_SUGGESTIONS = 18; 62 63 private float mAutoCorrectionThreshold; 64 65 // Locale used for upper- and title-casing words 66 private final Locale mLocale; 67 68 public Suggest(final Context context, final Locale locale, 69 final SuggestInitializationListener listener) { 70 initAsynchronously(context, locale, listener); 71 mLocale = locale; 72 } 73 74 /* package for test */ Suggest(final Context context, final File dictionary, 75 final long startOffset, final long length, final Locale locale) { 76 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary, 77 startOffset, length /* useFullEditDistance */, false, locale); 78 mLocale = locale; 79 mMainDictionary = mainDict; 80 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict); 81 } 82 83 private void initAsynchronously(final Context context, final Locale locale, 84 final SuggestInitializationListener listener) { 85 resetMainDict(context, locale, listener); 86 } 87 88 private static void addOrReplaceDictionary( 89 final ConcurrentHashMap<String, Dictionary> dictionaries, 90 final String key, final Dictionary dict) { 91 final Dictionary oldDict = (dict == null) 92 ? dictionaries.remove(key) 93 : dictionaries.put(key, dict); 94 if (oldDict != null && dict != oldDict) { 95 oldDict.close(); 96 } 97 } 98 99 public void resetMainDict(final Context context, final Locale locale, 100 final SuggestInitializationListener listener) { 101 mMainDictionary = null; 102 if (listener != null) { 103 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 104 } 105 new Thread("InitializeBinaryDictionary") { 106 @Override 107 public void run() { 108 final DictionaryCollection newMainDict = 109 DictionaryFactory.createMainDictionaryFromManager(context, locale); 110 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict); 111 mMainDictionary = newMainDict; 112 if (listener != null) { 113 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 114 } 115 } 116 }.start(); 117 } 118 119 // The main dictionary could have been loaded asynchronously. Don't cache the return value 120 // of this method. 121 public boolean hasMainDictionary() { 122 return null != mMainDictionary && mMainDictionary.isInitialized(); 123 } 124 125 public Dictionary getMainDictionary() { 126 return mMainDictionary; 127 } 128 129 public ContactsBinaryDictionary getContactsDictionary() { 130 return mContactsDict; 131 } 132 133 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 134 return mDictionaries; 135 } 136 137 /** 138 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 139 * before the main dictionary, if set. This refers to the system-managed user dictionary. 140 */ 141 public void setUserDictionary(UserBinaryDictionary userDictionary) { 142 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary); 143 } 144 145 /** 146 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 147 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 148 * won't be used. 149 */ 150 public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) { 151 mContactsDict = contactsDictionary; 152 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); 153 } 154 155 public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) { 156 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 157 } 158 159 public void setAutoCorrectionThreshold(float threshold) { 160 mAutoCorrectionThreshold = threshold; 161 } 162 163 public SuggestedWords getSuggestedWords( 164 final WordComposer wordComposer, CharSequence prevWordForBigram, 165 final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, int sessionId) { 166 LatinImeLogger.onStartSuggestion(prevWordForBigram); 167 if (wordComposer.isBatchMode()) { 168 return getSuggestedWordsForBatchInput( 169 wordComposer, prevWordForBigram, proximityInfo, sessionId); 170 } else { 171 return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo, 172 isCorrectionEnabled); 173 } 174 } 175 176 // Retrieves suggestions for the typing input. 177 private SuggestedWords getSuggestedWordsForTypingInput( 178 final WordComposer wordComposer, CharSequence prevWordForBigram, 179 final ProximityInfo proximityInfo, final boolean isCorrectionEnabled) { 180 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 181 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 182 MAX_SUGGESTIONS); 183 184 final String typedWord = wordComposer.getTypedWord(); 185 final String consideredWord = trailingSingleQuotesCount > 0 186 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 187 : typedWord; 188 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 189 190 final WordComposer wordComposerForLookup; 191 if (trailingSingleQuotesCount > 0) { 192 wordComposerForLookup = new WordComposer(wordComposer); 193 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 194 wordComposerForLookup.deleteLast(); 195 } 196 } else { 197 wordComposerForLookup = wordComposer; 198 } 199 200 for (final String key : mDictionaries.keySet()) { 201 final Dictionary dictionary = mDictionaries.get(key); 202 suggestionsSet.addAll(dictionary.getSuggestions( 203 wordComposerForLookup, prevWordForBigram, proximityInfo)); 204 } 205 206 final CharSequence whitelistedWord; 207 if (suggestionsSet.isEmpty()) { 208 whitelistedWord = null; 209 } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) { 210 whitelistedWord = null; 211 } else { 212 whitelistedWord = suggestionsSet.first().mWord; 213 } 214 215 // The word can be auto-corrected if it has a whitelist entry that is not itself, 216 // or if it's a 2+ characters non-word (i.e. it's not in the dictionary). 217 final boolean allowsToBeAutoCorrected = (null != whitelistedWord 218 && !whitelistedWord.equals(consideredWord)) 219 || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries, 220 consideredWord, wordComposer.isFirstCharCapitalized())); 221 222 final boolean hasAutoCorrection; 223 // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because 224 // any attempt to do auto-correction is already shielded with a test for this flag; at the 225 // same time, it feels wrong that the SuggestedWord object includes information about 226 // the current settings. It may also be useful to know, when the setting is off, whether 227 // the word *would* have been auto-corrected. 228 if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord() 229 || suggestionsSet.isEmpty() || wordComposer.hasDigits() 230 || wordComposer.isMostlyCaps() || wordComposer.isResumed() 231 || !hasMainDictionary()) { 232 // If we don't have a main dictionary, we never want to auto-correct. The reason for 233 // this is, the user may have a contact whose name happens to match a valid word in 234 // their language, and it will unexpectedly auto-correct. For example, if the user 235 // types in English with no dictionary and has a "Will" in their contact list, "will" 236 // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no 237 // auto-correct. 238 hasAutoCorrection = false; 239 } else { 240 hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold( 241 suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold); 242 } 243 244 final ArrayList<SuggestedWordInfo> suggestionsContainer = 245 CollectionUtils.newArrayList(suggestionsSet); 246 final int suggestionsCount = suggestionsContainer.size(); 247 final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 248 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 249 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 250 for (int i = 0; i < suggestionsCount; ++i) { 251 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 252 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 253 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 254 trailingSingleQuotesCount); 255 suggestionsContainer.set(i, transformedWordInfo); 256 } 257 } 258 259 for (int i = 0; i < suggestionsCount; ++i) { 260 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 261 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); 262 } 263 264 if (!TextUtils.isEmpty(typedWord)) { 265 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 266 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 267 Dictionary.TYPE_USER_TYPED)); 268 } 269 SuggestedWordInfo.removeDups(suggestionsContainer); 270 271 final ArrayList<SuggestedWordInfo> suggestionsList; 272 if (DBG && !suggestionsContainer.isEmpty()) { 273 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 274 } else { 275 suggestionsList = suggestionsContainer; 276 } 277 278 return new SuggestedWords(suggestionsList, 279 // TODO: this first argument is lying. If this is a whitelisted word which is an 280 // actual word, it says typedWordValid = false, which looks wrong. We should either 281 // rename the attribute or change the value. 282 !allowsToBeAutoCorrected /* typedWordValid */, 283 hasAutoCorrection, /* willAutoCorrect */ 284 false /* isPunctuationSuggestions */, 285 false /* isObsoleteSuggestions */, 286 !wordComposer.isComposingWord() /* isPrediction */); 287 } 288 289 // Retrieves suggestions for the batch input. 290 private SuggestedWords getSuggestedWordsForBatchInput( 291 final WordComposer wordComposer, CharSequence prevWordForBigram, 292 final ProximityInfo proximityInfo, int sessionId) { 293 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 294 MAX_SUGGESTIONS); 295 296 // At second character typed, search the unigrams (scores being affected by bigrams) 297 for (final String key : mDictionaries.keySet()) { 298 // Skip User history dictionary for lookup 299 // TODO: The user history dictionary should just override getSuggestionsWithSessionId 300 // to make sure it doesn't return anything and we should remove this test 301 if (key.equals(Dictionary.TYPE_USER_HISTORY)) { 302 continue; 303 } 304 final Dictionary dictionary = mDictionaries.get(key); 305 suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId( 306 wordComposer, prevWordForBigram, proximityInfo, sessionId)); 307 } 308 309 for (SuggestedWordInfo wordInfo : suggestionsSet) { 310 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); 311 } 312 313 final ArrayList<SuggestedWordInfo> suggestionsContainer = 314 CollectionUtils.newArrayList(suggestionsSet); 315 final int suggestionsCount = suggestionsContainer.size(); 316 final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); 317 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 318 if (isFirstCharCapitalized || isAllUpperCase) { 319 for (int i = 0; i < suggestionsCount; ++i) { 320 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 321 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 322 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 323 0 /* trailingSingleQuotesCount */); 324 suggestionsContainer.set(i, transformedWordInfo); 325 } 326 } 327 328 SuggestedWordInfo.removeDups(suggestionsContainer); 329 // In the batch input mode, the most relevant suggested word should act as a "typed word" 330 // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). 331 return new SuggestedWords(suggestionsContainer, 332 true /* typedWordValid */, 333 false /* willAutoCorrect */, 334 false /* isPunctuationSuggestions */, 335 false /* isObsoleteSuggestions */, 336 false /* isPrediction */); 337 } 338 339 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 340 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 341 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 342 typedWordInfo.setDebugString("+"); 343 final int suggestionsSize = suggestions.size(); 344 final ArrayList<SuggestedWordInfo> suggestionsList = 345 CollectionUtils.newArrayList(suggestionsSize); 346 suggestionsList.add(typedWordInfo); 347 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 348 // than i because we added the typed word to mSuggestions without touching mScores. 349 for (int i = 0; i < suggestionsSize - 1; ++i) { 350 final SuggestedWordInfo cur = suggestions.get(i + 1); 351 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 352 typedWord, cur.toString(), cur.mScore); 353 final String scoreInfoString; 354 if (normalizedScore > 0) { 355 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore); 356 } else { 357 scoreInfoString = Integer.toString(cur.mScore); 358 } 359 cur.setDebugString(scoreInfoString); 360 suggestionsList.add(cur); 361 } 362 return suggestionsList; 363 } 364 365 private static final class SuggestedWordInfoComparator 366 implements Comparator<SuggestedWordInfo> { 367 // This comparator ranks the word info with the higher frequency first. That's because 368 // that's the order we want our elements in. 369 @Override 370 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 371 if (o1.mScore > o2.mScore) return -1; 372 if (o1.mScore < o2.mScore) return 1; 373 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 374 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 375 return o1.mWord.toString().compareTo(o2.mWord.toString()); 376 } 377 } 378 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 379 new SuggestedWordInfoComparator(); 380 381 private static SuggestedWordInfo getTransformedSuggestedWordInfo( 382 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 383 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 384 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 385 if (isAllUpperCase) { 386 sb.append(wordInfo.mWord.toString().toUpperCase(locale)); 387 } else if (isFirstCharCapitalized) { 388 sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale)); 389 } else { 390 sb.append(wordInfo.mWord); 391 } 392 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 393 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 394 } 395 return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict); 396 } 397 398 public void close() { 399 final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet(); 400 dictionaries.addAll(mDictionaries.values()); 401 for (final Dictionary dictionary : dictionaries) { 402 dictionary.close(); 403 } 404 mMainDictionary = null; 405 } 406 } 407