Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.content.Context;
     20 import android.text.TextUtils;
     21 
     22 import com.android.inputmethod.annotations.UsedForTesting;
     23 import com.android.inputmethod.keyboard.ProximityInfo;
     24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     25 
     26 import java.io.File;
     27 import java.util.ArrayList;
     28 import java.util.Comparator;
     29 import java.util.HashSet;
     30 import java.util.Locale;
     31 import java.util.concurrent.ConcurrentHashMap;
     32 
     33 /**
     34  * This class loads a dictionary and provides a list of suggestions for a given sequence of
     35  * characters. This includes corrections and completions.
     36  */
     37 public final class Suggest {
     38     public static final String TAG = Suggest.class.getSimpleName();
     39 
     40     // Session id for
     41     // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
     42     public static final int SESSION_TYPING = 0;
     43     public static final int SESSION_GESTURE = 1;
     44 
     45     // TODO: rename this to CORRECTION_OFF
     46     public static final int CORRECTION_NONE = 0;
     47     // TODO: rename this to CORRECTION_ON
     48     public static final int CORRECTION_FULL = 1;
     49 
     50     // Close to -2**31
     51     private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000;
     52 
     53     public interface SuggestInitializationListener {
     54         public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
     55     }
     56 
     57     private static final boolean DBG = LatinImeLogger.sDBG;
     58 
     59     private Dictionary mMainDictionary;
     60     private ContactsBinaryDictionary mContactsDict;
     61     private final ConcurrentHashMap<String, Dictionary> mDictionaries =
     62             CollectionUtils.newConcurrentHashMap();
     63     @UsedForTesting
     64     private boolean mIsCurrentlyWaitingForMainDictionary = false;
     65 
     66     public static final int MAX_SUGGESTIONS = 18;
     67 
     68     private float mAutoCorrectionThreshold;
     69 
     70     // Locale used for upper- and title-casing words
     71     public final Locale mLocale;
     72 
     73     public Suggest(final Context context, final Locale locale,
     74             final SuggestInitializationListener listener) {
     75         initAsynchronously(context, locale, listener);
     76         mLocale = locale;
     77     }
     78 
     79     @UsedForTesting
     80     Suggest(final File dictionary, final long startOffset, final long length, final Locale locale) {
     81         final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(dictionary,
     82                 startOffset, length /* useFullEditDistance */, false, locale);
     83         mLocale = locale;
     84         mMainDictionary = mainDict;
     85         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict);
     86     }
     87 
     88     private void initAsynchronously(final Context context, final Locale locale,
     89             final SuggestInitializationListener listener) {
     90         resetMainDict(context, locale, listener);
     91     }
     92 
     93     private static void addOrReplaceDictionary(
     94             final ConcurrentHashMap<String, Dictionary> dictionaries,
     95             final String key, final Dictionary dict) {
     96         final Dictionary oldDict = (dict == null)
     97                 ? dictionaries.remove(key)
     98                 : dictionaries.put(key, dict);
     99         if (oldDict != null && dict != oldDict) {
    100             oldDict.close();
    101         }
    102     }
    103 
    104     public void resetMainDict(final Context context, final Locale locale,
    105             final SuggestInitializationListener listener) {
    106         mIsCurrentlyWaitingForMainDictionary = true;
    107         mMainDictionary = null;
    108         if (listener != null) {
    109             listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
    110         }
    111         new Thread("InitializeBinaryDictionary") {
    112             @Override
    113             public void run() {
    114                 final DictionaryCollection newMainDict =
    115                         DictionaryFactory.createMainDictionaryFromManager(context, locale);
    116                 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict);
    117                 mMainDictionary = newMainDict;
    118                 if (listener != null) {
    119                     listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
    120                 }
    121                 mIsCurrentlyWaitingForMainDictionary = false;
    122             }
    123         }.start();
    124     }
    125 
    126     // The main dictionary could have been loaded asynchronously.  Don't cache the return value
    127     // of this method.
    128     public boolean hasMainDictionary() {
    129         return null != mMainDictionary && mMainDictionary.isInitialized();
    130     }
    131 
    132     @UsedForTesting
    133     public boolean isCurrentlyWaitingForMainDictionary() {
    134         return mIsCurrentlyWaitingForMainDictionary;
    135     }
    136 
    137     public Dictionary getMainDictionary() {
    138         return mMainDictionary;
    139     }
    140 
    141     public ContactsBinaryDictionary getContactsDictionary() {
    142         return mContactsDict;
    143     }
    144 
    145     public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() {
    146         return mDictionaries;
    147     }
    148 
    149     /**
    150      * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
    151      * before the main dictionary, if set. This refers to the system-managed user dictionary.
    152      */
    153     public void setUserDictionary(final UserBinaryDictionary userDictionary) {
    154         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary);
    155     }
    156 
    157     /**
    158      * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
    159      * the contacts dictionary by passing null to this method. In this case no contacts dictionary
    160      * won't be used.
    161      */
    162     public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) {
    163         mContactsDict = contactsDictionary;
    164         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary);
    165     }
    166 
    167     public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) {
    168         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
    169     }
    170 
    171     public void setAutoCorrectionThreshold(float threshold) {
    172         mAutoCorrectionThreshold = threshold;
    173     }
    174 
    175     public SuggestedWords getSuggestedWords(final WordComposer wordComposer,
    176             final String prevWordForBigram, final ProximityInfo proximityInfo,
    177             final boolean blockOffensiveWords, final boolean isCorrectionEnabled,
    178             final int sessionId) {
    179         LatinImeLogger.onStartSuggestion(prevWordForBigram);
    180         if (wordComposer.isBatchMode()) {
    181             return getSuggestedWordsForBatchInput(
    182                     wordComposer, prevWordForBigram, proximityInfo, blockOffensiveWords, sessionId);
    183         } else {
    184             return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo,
    185                     blockOffensiveWords, isCorrectionEnabled);
    186         }
    187     }
    188 
    189     // Retrieves suggestions for the typing input.
    190     private SuggestedWords getSuggestedWordsForTypingInput(final WordComposer wordComposer,
    191             final String prevWordForBigram, final ProximityInfo proximityInfo,
    192             final boolean blockOffensiveWords, final boolean isCorrectionEnabled) {
    193         final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
    194         final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
    195                 MAX_SUGGESTIONS);
    196 
    197         final String typedWord = wordComposer.getTypedWord();
    198         final String consideredWord = trailingSingleQuotesCount > 0
    199                 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
    200                 : typedWord;
    201         LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
    202 
    203         final WordComposer wordComposerForLookup;
    204         if (trailingSingleQuotesCount > 0) {
    205             wordComposerForLookup = new WordComposer(wordComposer);
    206             for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
    207                 wordComposerForLookup.deleteLast();
    208             }
    209         } else {
    210             wordComposerForLookup = wordComposer;
    211         }
    212 
    213         for (final String key : mDictionaries.keySet()) {
    214             final Dictionary dictionary = mDictionaries.get(key);
    215             suggestionsSet.addAll(dictionary.getSuggestions(
    216                     wordComposerForLookup, prevWordForBigram, proximityInfo, blockOffensiveWords));
    217         }
    218 
    219         final String whitelistedWord;
    220         if (suggestionsSet.isEmpty()) {
    221             whitelistedWord = null;
    222         } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) {
    223             whitelistedWord = null;
    224         } else {
    225             whitelistedWord = suggestionsSet.first().mWord;
    226         }
    227 
    228         // The word can be auto-corrected if it has a whitelist entry that is not itself,
    229         // or if it's a 2+ characters non-word (i.e. it's not in the dictionary).
    230         final boolean allowsToBeAutoCorrected = (null != whitelistedWord
    231                 && !whitelistedWord.equals(consideredWord))
    232                 || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries,
    233                         consideredWord, wordComposer.isFirstCharCapitalized()));
    234 
    235         final boolean hasAutoCorrection;
    236         // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
    237         // any attempt to do auto-correction is already shielded with a test for this flag; at the
    238         // same time, it feels wrong that the SuggestedWord object includes information about
    239         // the current settings. It may also be useful to know, when the setting is off, whether
    240         // the word *would* have been auto-corrected.
    241         if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord()
    242                 || suggestionsSet.isEmpty() || wordComposer.hasDigits()
    243                 || wordComposer.isMostlyCaps() || wordComposer.isResumed()
    244                 || !hasMainDictionary()) {
    245             // If we don't have a main dictionary, we never want to auto-correct. The reason for
    246             // this is, the user may have a contact whose name happens to match a valid word in
    247             // their language, and it will unexpectedly auto-correct. For example, if the user
    248             // types in English with no dictionary and has a "Will" in their contact list, "will"
    249             // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
    250             // auto-correct.
    251             hasAutoCorrection = false;
    252         } else {
    253             hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold(
    254                     suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold);
    255         }
    256 
    257         final ArrayList<SuggestedWordInfo> suggestionsContainer =
    258                 CollectionUtils.newArrayList(suggestionsSet);
    259         final int suggestionsCount = suggestionsContainer.size();
    260         final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
    261         final boolean isAllUpperCase = wordComposer.isAllUpperCase();
    262         if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
    263             for (int i = 0; i < suggestionsCount; ++i) {
    264                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
    265                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
    266                         wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
    267                         trailingSingleQuotesCount);
    268                 suggestionsContainer.set(i, transformedWordInfo);
    269             }
    270         }
    271 
    272         for (int i = 0; i < suggestionsCount; ++i) {
    273             final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
    274             LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict);
    275         }
    276 
    277         if (!TextUtils.isEmpty(typedWord)) {
    278             suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
    279                     SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
    280                     Dictionary.TYPE_USER_TYPED));
    281         }
    282         SuggestedWordInfo.removeDups(suggestionsContainer);
    283 
    284         final ArrayList<SuggestedWordInfo> suggestionsList;
    285         if (DBG && !suggestionsContainer.isEmpty()) {
    286             suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
    287         } else {
    288             suggestionsList = suggestionsContainer;
    289         }
    290 
    291         return new SuggestedWords(suggestionsList,
    292                 // TODO: this first argument is lying. If this is a whitelisted word which is an
    293                 // actual word, it says typedWordValid = false, which looks wrong. We should either
    294                 // rename the attribute or change the value.
    295                 !allowsToBeAutoCorrected /* typedWordValid */,
    296                 hasAutoCorrection, /* willAutoCorrect */
    297                 false /* isPunctuationSuggestions */,
    298                 false /* isObsoleteSuggestions */,
    299                 !wordComposer.isComposingWord() /* isPrediction */);
    300     }
    301 
    302     // Retrieves suggestions for the batch input.
    303     private SuggestedWords getSuggestedWordsForBatchInput(final WordComposer wordComposer,
    304             final String prevWordForBigram, final ProximityInfo proximityInfo,
    305             final boolean blockOffensiveWords, final int sessionId) {
    306         final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
    307                 MAX_SUGGESTIONS);
    308 
    309         // At second character typed, search the unigrams (scores being affected by bigrams)
    310         for (final String key : mDictionaries.keySet()) {
    311             // Skip User history dictionary for lookup
    312             // TODO: The user history dictionary should just override getSuggestionsWithSessionId
    313             // to make sure it doesn't return anything and we should remove this test
    314             if (key.equals(Dictionary.TYPE_USER_HISTORY)) {
    315                 continue;
    316             }
    317             final Dictionary dictionary = mDictionaries.get(key);
    318             suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(wordComposer,
    319                     prevWordForBigram, proximityInfo, blockOffensiveWords, sessionId));
    320         }
    321 
    322         for (SuggestedWordInfo wordInfo : suggestionsSet) {
    323             LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict);
    324         }
    325 
    326         final ArrayList<SuggestedWordInfo> suggestionsContainer =
    327                 CollectionUtils.newArrayList(suggestionsSet);
    328         final int suggestionsCount = suggestionsContainer.size();
    329         final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
    330         final boolean isAllUpperCase = wordComposer.isAllUpperCase();
    331         if (isFirstCharCapitalized || isAllUpperCase) {
    332             for (int i = 0; i < suggestionsCount; ++i) {
    333                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
    334                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
    335                         wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
    336                         0 /* trailingSingleQuotesCount */);
    337                 suggestionsContainer.set(i, transformedWordInfo);
    338             }
    339         }
    340 
    341         if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord,
    342                 wordComposer.getRejectedBatchModeSuggestion())) {
    343             final SuggestedWordInfo rejected = suggestionsContainer.remove(0);
    344             suggestionsContainer.add(1, rejected);
    345         }
    346         SuggestedWordInfo.removeDups(suggestionsContainer);
    347 
    348         // For some reason some suggestions with MIN_VALUE are making their way here.
    349         // TODO: Find a more robust way to detect distractors.
    350         for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
    351             if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
    352                 suggestionsContainer.remove(i);
    353             }
    354         }
    355 
    356         // In the batch input mode, the most relevant suggested word should act as a "typed word"
    357         // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
    358         return new SuggestedWords(suggestionsContainer,
    359                 true /* typedWordValid */,
    360                 false /* willAutoCorrect */,
    361                 false /* isPunctuationSuggestions */,
    362                 false /* isObsoleteSuggestions */,
    363                 false /* isPrediction */);
    364     }
    365 
    366     private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
    367             final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
    368         final SuggestedWordInfo typedWordInfo = suggestions.get(0);
    369         typedWordInfo.setDebugString("+");
    370         final int suggestionsSize = suggestions.size();
    371         final ArrayList<SuggestedWordInfo> suggestionsList =
    372                 CollectionUtils.newArrayList(suggestionsSize);
    373         suggestionsList.add(typedWordInfo);
    374         // Note: i here is the index in mScores[], but the index in mSuggestions is one more
    375         // than i because we added the typed word to mSuggestions without touching mScores.
    376         for (int i = 0; i < suggestionsSize - 1; ++i) {
    377             final SuggestedWordInfo cur = suggestions.get(i + 1);
    378             final float normalizedScore = BinaryDictionary.calcNormalizedScore(
    379                     typedWord, cur.toString(), cur.mScore);
    380             final String scoreInfoString;
    381             if (normalizedScore > 0) {
    382                 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore);
    383             } else {
    384                 scoreInfoString = Integer.toString(cur.mScore);
    385             }
    386             cur.setDebugString(scoreInfoString);
    387             suggestionsList.add(cur);
    388         }
    389         return suggestionsList;
    390     }
    391 
    392     private static final class SuggestedWordInfoComparator
    393             implements Comparator<SuggestedWordInfo> {
    394         // This comparator ranks the word info with the higher frequency first. That's because
    395         // that's the order we want our elements in.
    396         @Override
    397         public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
    398             if (o1.mScore > o2.mScore) return -1;
    399             if (o1.mScore < o2.mScore) return 1;
    400             if (o1.mCodePointCount < o2.mCodePointCount) return -1;
    401             if (o1.mCodePointCount > o2.mCodePointCount) return 1;
    402             return o1.mWord.compareTo(o2.mWord);
    403         }
    404     }
    405     private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
    406             new SuggestedWordInfoComparator();
    407 
    408     private static SuggestedWordInfo getTransformedSuggestedWordInfo(
    409             final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
    410             final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
    411         final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
    412         if (isAllUpperCase) {
    413             sb.append(wordInfo.mWord.toUpperCase(locale));
    414         } else if (isFirstCharCapitalized) {
    415             sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale));
    416         } else {
    417             sb.append(wordInfo.mWord);
    418         }
    419         for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
    420             sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
    421         }
    422         return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind,
    423                 wordInfo.mSourceDict);
    424     }
    425 
    426     public void close() {
    427         final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet();
    428         dictionaries.addAll(mDictionaries.values());
    429         for (final Dictionary dictionary : dictionaries) {
    430             dictionary.close();
    431         }
    432         mMainDictionary = null;
    433     }
    434 }
    435