Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.content.Context;
     20 import android.text.TextUtils;
     21 
     22 import com.android.inputmethod.keyboard.Keyboard;
     23 import com.android.inputmethod.keyboard.ProximityInfo;
     24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     25 
     26 import java.io.File;
     27 import java.util.ArrayList;
     28 import java.util.Comparator;
     29 import java.util.HashSet;
     30 import java.util.Locale;
     31 import java.util.concurrent.ConcurrentHashMap;
     32 
     33 /**
     34  * This class loads a dictionary and provides a list of suggestions for a given sequence of
     35  * characters. This includes corrections and completions.
     36  */
     37 public final class Suggest {
     38     public static final String TAG = Suggest.class.getSimpleName();
     39 
     40     // Session id for
     41     // {@link #getSuggestedWords(WordComposer,CharSequence,ProximityInfo,boolean,int)}.
     42     public static final int SESSION_TYPING = 0;
     43     public static final int SESSION_GESTURE = 1;
     44 
     45     // TODO: rename this to CORRECTION_OFF
     46     public static final int CORRECTION_NONE = 0;
     47     // TODO: rename this to CORRECTION_ON
     48     public static final int CORRECTION_FULL = 1;
     49 
     50     public interface SuggestInitializationListener {
     51         public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
     52     }
     53 
     54     private static final boolean DBG = LatinImeLogger.sDBG;
     55 
     56     private Dictionary mMainDictionary;
     57     private ContactsBinaryDictionary mContactsDict;
     58     private final ConcurrentHashMap<String, Dictionary> mDictionaries =
     59             CollectionUtils.newConcurrentHashMap();
     60 
     61     public static final int MAX_SUGGESTIONS = 18;
     62 
     63     private float mAutoCorrectionThreshold;
     64 
     65     // Locale used for upper- and title-casing words
     66     private final Locale mLocale;
     67 
     68     public Suggest(final Context context, final Locale locale,
     69             final SuggestInitializationListener listener) {
     70         initAsynchronously(context, locale, listener);
     71         mLocale = locale;
     72     }
     73 
     74     /* package for test */ Suggest(final Context context, final File dictionary,
     75             final long startOffset, final long length, final Locale locale) {
     76         final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary,
     77                 startOffset, length /* useFullEditDistance */, false, locale);
     78         mLocale = locale;
     79         mMainDictionary = mainDict;
     80         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict);
     81     }
     82 
     83     private void initAsynchronously(final Context context, final Locale locale,
     84             final SuggestInitializationListener listener) {
     85         resetMainDict(context, locale, listener);
     86     }
     87 
     88     private static void addOrReplaceDictionary(
     89             final ConcurrentHashMap<String, Dictionary> dictionaries,
     90             final String key, final Dictionary dict) {
     91         final Dictionary oldDict = (dict == null)
     92                 ? dictionaries.remove(key)
     93                 : dictionaries.put(key, dict);
     94         if (oldDict != null && dict != oldDict) {
     95             oldDict.close();
     96         }
     97     }
     98 
     99     public void resetMainDict(final Context context, final Locale locale,
    100             final SuggestInitializationListener listener) {
    101         mMainDictionary = null;
    102         if (listener != null) {
    103             listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
    104         }
    105         new Thread("InitializeBinaryDictionary") {
    106             @Override
    107             public void run() {
    108                 final DictionaryCollection newMainDict =
    109                         DictionaryFactory.createMainDictionaryFromManager(context, locale);
    110                 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict);
    111                 mMainDictionary = newMainDict;
    112                 if (listener != null) {
    113                     listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
    114                 }
    115             }
    116         }.start();
    117     }
    118 
    119     // The main dictionary could have been loaded asynchronously.  Don't cache the return value
    120     // of this method.
    121     public boolean hasMainDictionary() {
    122         return null != mMainDictionary && mMainDictionary.isInitialized();
    123     }
    124 
    125     public Dictionary getMainDictionary() {
    126         return mMainDictionary;
    127     }
    128 
    129     public ContactsBinaryDictionary getContactsDictionary() {
    130         return mContactsDict;
    131     }
    132 
    133     public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() {
    134         return mDictionaries;
    135     }
    136 
    137     /**
    138      * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
    139      * before the main dictionary, if set. This refers to the system-managed user dictionary.
    140      */
    141     public void setUserDictionary(UserBinaryDictionary userDictionary) {
    142         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary);
    143     }
    144 
    145     /**
    146      * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
    147      * the contacts dictionary by passing null to this method. In this case no contacts dictionary
    148      * won't be used.
    149      */
    150     public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) {
    151         mContactsDict = contactsDictionary;
    152         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary);
    153     }
    154 
    155     public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) {
    156         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
    157     }
    158 
    159     public void setAutoCorrectionThreshold(float threshold) {
    160         mAutoCorrectionThreshold = threshold;
    161     }
    162 
    163     public SuggestedWords getSuggestedWords(
    164             final WordComposer wordComposer, CharSequence prevWordForBigram,
    165             final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, int sessionId) {
    166         LatinImeLogger.onStartSuggestion(prevWordForBigram);
    167         if (wordComposer.isBatchMode()) {
    168             return getSuggestedWordsForBatchInput(
    169                     wordComposer, prevWordForBigram, proximityInfo, sessionId);
    170         } else {
    171             return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo,
    172                     isCorrectionEnabled);
    173         }
    174     }
    175 
    176     // Retrieves suggestions for the typing input.
    177     private SuggestedWords getSuggestedWordsForTypingInput(
    178             final WordComposer wordComposer, CharSequence prevWordForBigram,
    179             final ProximityInfo proximityInfo, final boolean isCorrectionEnabled) {
    180         final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
    181         final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
    182                 MAX_SUGGESTIONS);
    183 
    184         final String typedWord = wordComposer.getTypedWord();
    185         final String consideredWord = trailingSingleQuotesCount > 0
    186                 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
    187                 : typedWord;
    188         LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
    189 
    190         final WordComposer wordComposerForLookup;
    191         if (trailingSingleQuotesCount > 0) {
    192             wordComposerForLookup = new WordComposer(wordComposer);
    193             for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
    194                 wordComposerForLookup.deleteLast();
    195             }
    196         } else {
    197             wordComposerForLookup = wordComposer;
    198         }
    199 
    200         for (final String key : mDictionaries.keySet()) {
    201             final Dictionary dictionary = mDictionaries.get(key);
    202             suggestionsSet.addAll(dictionary.getSuggestions(
    203                     wordComposerForLookup, prevWordForBigram, proximityInfo));
    204         }
    205 
    206         final CharSequence whitelistedWord;
    207         if (suggestionsSet.isEmpty()) {
    208             whitelistedWord = null;
    209         } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) {
    210             whitelistedWord = null;
    211         } else {
    212             whitelistedWord = suggestionsSet.first().mWord;
    213         }
    214 
    215         // The word can be auto-corrected if it has a whitelist entry that is not itself,
    216         // or if it's a 2+ characters non-word (i.e. it's not in the dictionary).
    217         final boolean allowsToBeAutoCorrected = (null != whitelistedWord
    218                 && !whitelistedWord.equals(consideredWord))
    219                 || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries,
    220                         consideredWord, wordComposer.isFirstCharCapitalized()));
    221 
    222         final boolean hasAutoCorrection;
    223         // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
    224         // any attempt to do auto-correction is already shielded with a test for this flag; at the
    225         // same time, it feels wrong that the SuggestedWord object includes information about
    226         // the current settings. It may also be useful to know, when the setting is off, whether
    227         // the word *would* have been auto-corrected.
    228         if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord()
    229                 || suggestionsSet.isEmpty() || wordComposer.hasDigits()
    230                 || wordComposer.isMostlyCaps() || wordComposer.isResumed()
    231                 || !hasMainDictionary()) {
    232             // If we don't have a main dictionary, we never want to auto-correct. The reason for
    233             // this is, the user may have a contact whose name happens to match a valid word in
    234             // their language, and it will unexpectedly auto-correct. For example, if the user
    235             // types in English with no dictionary and has a "Will" in their contact list, "will"
    236             // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
    237             // auto-correct.
    238             hasAutoCorrection = false;
    239         } else {
    240             hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold(
    241                     suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold);
    242         }
    243 
    244         final ArrayList<SuggestedWordInfo> suggestionsContainer =
    245                 CollectionUtils.newArrayList(suggestionsSet);
    246         final int suggestionsCount = suggestionsContainer.size();
    247         final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
    248         final boolean isAllUpperCase = wordComposer.isAllUpperCase();
    249         if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
    250             for (int i = 0; i < suggestionsCount; ++i) {
    251                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
    252                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
    253                         wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
    254                         trailingSingleQuotesCount);
    255                 suggestionsContainer.set(i, transformedWordInfo);
    256             }
    257         }
    258 
    259         for (int i = 0; i < suggestionsCount; ++i) {
    260             final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
    261             LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict);
    262         }
    263 
    264         if (!TextUtils.isEmpty(typedWord)) {
    265             suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
    266                     SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
    267                     Dictionary.TYPE_USER_TYPED));
    268         }
    269         SuggestedWordInfo.removeDups(suggestionsContainer);
    270 
    271         final ArrayList<SuggestedWordInfo> suggestionsList;
    272         if (DBG && !suggestionsContainer.isEmpty()) {
    273             suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
    274         } else {
    275             suggestionsList = suggestionsContainer;
    276         }
    277 
    278         return new SuggestedWords(suggestionsList,
    279                 // TODO: this first argument is lying. If this is a whitelisted word which is an
    280                 // actual word, it says typedWordValid = false, which looks wrong. We should either
    281                 // rename the attribute or change the value.
    282                 !allowsToBeAutoCorrected /* typedWordValid */,
    283                 hasAutoCorrection, /* willAutoCorrect */
    284                 false /* isPunctuationSuggestions */,
    285                 false /* isObsoleteSuggestions */,
    286                 !wordComposer.isComposingWord() /* isPrediction */);
    287     }
    288 
    289     // Retrieves suggestions for the batch input.
    290     private SuggestedWords getSuggestedWordsForBatchInput(
    291             final WordComposer wordComposer, CharSequence prevWordForBigram,
    292             final ProximityInfo proximityInfo, int sessionId) {
    293         final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
    294                 MAX_SUGGESTIONS);
    295 
    296         // At second character typed, search the unigrams (scores being affected by bigrams)
    297         for (final String key : mDictionaries.keySet()) {
    298             // Skip User history dictionary for lookup
    299             // TODO: The user history dictionary should just override getSuggestionsWithSessionId
    300             // to make sure it doesn't return anything and we should remove this test
    301             if (key.equals(Dictionary.TYPE_USER_HISTORY)) {
    302                 continue;
    303             }
    304             final Dictionary dictionary = mDictionaries.get(key);
    305             suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(
    306                     wordComposer, prevWordForBigram, proximityInfo, sessionId));
    307         }
    308 
    309         for (SuggestedWordInfo wordInfo : suggestionsSet) {
    310             LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict);
    311         }
    312 
    313         final ArrayList<SuggestedWordInfo> suggestionsContainer =
    314                 CollectionUtils.newArrayList(suggestionsSet);
    315         final int suggestionsCount = suggestionsContainer.size();
    316         final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
    317         final boolean isAllUpperCase = wordComposer.isAllUpperCase();
    318         if (isFirstCharCapitalized || isAllUpperCase) {
    319             for (int i = 0; i < suggestionsCount; ++i) {
    320                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
    321                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
    322                         wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
    323                         0 /* trailingSingleQuotesCount */);
    324                 suggestionsContainer.set(i, transformedWordInfo);
    325             }
    326         }
    327 
    328         SuggestedWordInfo.removeDups(suggestionsContainer);
    329         // In the batch input mode, the most relevant suggested word should act as a "typed word"
    330         // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
    331         return new SuggestedWords(suggestionsContainer,
    332                 true /* typedWordValid */,
    333                 false /* willAutoCorrect */,
    334                 false /* isPunctuationSuggestions */,
    335                 false /* isObsoleteSuggestions */,
    336                 false /* isPrediction */);
    337     }
    338 
    339     private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
    340             final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
    341         final SuggestedWordInfo typedWordInfo = suggestions.get(0);
    342         typedWordInfo.setDebugString("+");
    343         final int suggestionsSize = suggestions.size();
    344         final ArrayList<SuggestedWordInfo> suggestionsList =
    345                 CollectionUtils.newArrayList(suggestionsSize);
    346         suggestionsList.add(typedWordInfo);
    347         // Note: i here is the index in mScores[], but the index in mSuggestions is one more
    348         // than i because we added the typed word to mSuggestions without touching mScores.
    349         for (int i = 0; i < suggestionsSize - 1; ++i) {
    350             final SuggestedWordInfo cur = suggestions.get(i + 1);
    351             final float normalizedScore = BinaryDictionary.calcNormalizedScore(
    352                     typedWord, cur.toString(), cur.mScore);
    353             final String scoreInfoString;
    354             if (normalizedScore > 0) {
    355                 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore);
    356             } else {
    357                 scoreInfoString = Integer.toString(cur.mScore);
    358             }
    359             cur.setDebugString(scoreInfoString);
    360             suggestionsList.add(cur);
    361         }
    362         return suggestionsList;
    363     }
    364 
    365     private static final class SuggestedWordInfoComparator
    366             implements Comparator<SuggestedWordInfo> {
    367         // This comparator ranks the word info with the higher frequency first. That's because
    368         // that's the order we want our elements in.
    369         @Override
    370         public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
    371             if (o1.mScore > o2.mScore) return -1;
    372             if (o1.mScore < o2.mScore) return 1;
    373             if (o1.mCodePointCount < o2.mCodePointCount) return -1;
    374             if (o1.mCodePointCount > o2.mCodePointCount) return 1;
    375             return o1.mWord.toString().compareTo(o2.mWord.toString());
    376         }
    377     }
    378     private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
    379             new SuggestedWordInfoComparator();
    380 
    381     private static SuggestedWordInfo getTransformedSuggestedWordInfo(
    382             final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
    383             final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
    384         final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
    385         if (isAllUpperCase) {
    386             sb.append(wordInfo.mWord.toString().toUpperCase(locale));
    387         } else if (isFirstCharCapitalized) {
    388             sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale));
    389         } else {
    390             sb.append(wordInfo.mWord);
    391         }
    392         for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
    393             sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE);
    394         }
    395         return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict);
    396     }
    397 
    398     public void close() {
    399         final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet();
    400         dictionaries.addAll(mDictionaries.values());
    401         for (final Dictionary dictionary : dictionaries) {
    402             dictionary.close();
    403         }
    404         mMainDictionary = null;
    405     }
    406 }
    407