Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.text.TextUtils;
     20 
     21 import static com.android.inputmethod.latin.define.DecoderSpecificConstants.SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION;
     22 import static com.android.inputmethod.latin.define.DecoderSpecificConstants.SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION;
     23 
     24 import com.android.inputmethod.keyboard.Keyboard;
     25 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     26 import com.android.inputmethod.latin.common.Constants;
     27 import com.android.inputmethod.latin.common.StringUtils;
     28 import com.android.inputmethod.latin.define.DebugFlags;
     29 import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
     30 import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
     31 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
     32 import com.android.inputmethod.latin.utils.SuggestionResults;
     33 
     34 import java.util.ArrayList;
     35 import java.util.HashMap;
     36 import java.util.Locale;
     37 
     38 import javax.annotation.Nonnull;
     39 
     40 /**
     41  * This class loads a dictionary and provides a list of suggestions for a given sequence of
     42  * characters. This includes corrections and completions.
     43  */
     44 public final class Suggest {
     45     public static final String TAG = Suggest.class.getSimpleName();
     46 
     47     // Session id for
     48     // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
     49     // We are sharing the same ID between typing and gesture to save RAM footprint.
     50     public static final int SESSION_ID_TYPING = 0;
     51     public static final int SESSION_ID_GESTURE = 0;
     52 
     53     // Close to -2**31
     54     private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000;
     55 
     56     private static final boolean DBG = DebugFlags.DEBUG_ENABLED;
     57     private final DictionaryFacilitator mDictionaryFacilitator;
     58 
     59     private static final int MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12;
     60     private static final HashMap<String, Integer> sLanguageToMaximumAutoCorrectionWithSpaceLength =
     61             new HashMap<>();
     62     static {
     63         // TODO: should we add Finnish here?
     64         // TODO: This should not be hardcoded here but be written in the dictionary header
     65         sLanguageToMaximumAutoCorrectionWithSpaceLength.put(Locale.GERMAN.getLanguage(),
     66                 MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN);
     67     }
     68 
     69     private float mAutoCorrectionThreshold;
     70     private float mPlausibilityThreshold;
     71 
     72     public Suggest(final DictionaryFacilitator dictionaryFacilitator) {
     73         mDictionaryFacilitator = dictionaryFacilitator;
     74     }
     75 
     76     /**
     77      * Set the normalized-score threshold for a suggestion to be considered strong enough that we
     78      * will auto-correct to this.
     79      * @param threshold the threshold
     80      */
     81     public void setAutoCorrectionThreshold(final float threshold) {
     82         mAutoCorrectionThreshold = threshold;
     83     }
     84 
     85     /**
     86      * Set the normalized-score threshold for what we consider a "plausible" suggestion, in
     87      * the same dimension as the auto-correction threshold.
     88      * @param threshold the threshold
     89      */
     90     public void setPlausibilityThreshold(final float threshold) {
     91         mPlausibilityThreshold = threshold;
     92     }
     93 
     94     public interface OnGetSuggestedWordsCallback {
     95         public void onGetSuggestedWords(final SuggestedWords suggestedWords);
     96     }
     97 
     98     public void getSuggestedWords(final WordComposer wordComposer,
     99             final NgramContext ngramContext, final Keyboard keyboard,
    100             final SettingsValuesForSuggestion settingsValuesForSuggestion,
    101             final boolean isCorrectionEnabled, final int inputStyle, final int sequenceNumber,
    102             final OnGetSuggestedWordsCallback callback) {
    103         if (wordComposer.isBatchMode()) {
    104             getSuggestedWordsForBatchInput(wordComposer, ngramContext, keyboard,
    105                     settingsValuesForSuggestion, inputStyle, sequenceNumber, callback);
    106         } else {
    107             getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, keyboard,
    108                     settingsValuesForSuggestion, inputStyle, isCorrectionEnabled,
    109                     sequenceNumber, callback);
    110         }
    111     }
    112 
    113     private static ArrayList<SuggestedWordInfo> getTransformedSuggestedWordInfoList(
    114             final WordComposer wordComposer, final SuggestionResults results,
    115             final int trailingSingleQuotesCount, final Locale defaultLocale) {
    116         final boolean shouldMakeSuggestionsAllUpperCase = wordComposer.isAllUpperCase()
    117                 && !wordComposer.isResumed();
    118         final boolean isOnlyFirstCharCapitalized =
    119                 wordComposer.isOrWillBeOnlyFirstCharCapitalized();
    120 
    121         final ArrayList<SuggestedWordInfo> suggestionsContainer = new ArrayList<>(results);
    122         final int suggestionsCount = suggestionsContainer.size();
    123         if (isOnlyFirstCharCapitalized || shouldMakeSuggestionsAllUpperCase
    124                 || 0 != trailingSingleQuotesCount) {
    125             for (int i = 0; i < suggestionsCount; ++i) {
    126                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
    127                 final Locale wordLocale = wordInfo.mSourceDict.mLocale;
    128                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
    129                         wordInfo, null == wordLocale ? defaultLocale : wordLocale,
    130                         shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized,
    131                         trailingSingleQuotesCount);
    132                 suggestionsContainer.set(i, transformedWordInfo);
    133             }
    134         }
    135         return suggestionsContainer;
    136     }
    137 
    138     private static SuggestedWordInfo getWhitelistedWordInfoOrNull(
    139             @Nonnull final ArrayList<SuggestedWordInfo> suggestions) {
    140         if (suggestions.isEmpty()) {
    141             return null;
    142         }
    143         final SuggestedWordInfo firstSuggestedWordInfo = suggestions.get(0);
    144         if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) {
    145             return null;
    146         }
    147         return firstSuggestedWordInfo;
    148     }
    149 
    150     // Retrieves suggestions for non-batch input (typing, recorrection, predictions...)
    151     // and calls the callback function with the suggestions.
    152     private void getSuggestedWordsForNonBatchInput(final WordComposer wordComposer,
    153             final NgramContext ngramContext, final Keyboard keyboard,
    154             final SettingsValuesForSuggestion settingsValuesForSuggestion,
    155             final int inputStyleIfNotPrediction, final boolean isCorrectionEnabled,
    156             final int sequenceNumber, final OnGetSuggestedWordsCallback callback) {
    157         final String typedWordString = wordComposer.getTypedWord();
    158         final int trailingSingleQuotesCount =
    159                 StringUtils.getTrailingSingleQuotesCount(typedWordString);
    160         final String consideredWord = trailingSingleQuotesCount > 0
    161                 ? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount)
    162                 : typedWordString;
    163 
    164         final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
    165                 wordComposer.getComposedDataSnapshot(), ngramContext, keyboard,
    166                 settingsValuesForSuggestion, SESSION_ID_TYPING, inputStyleIfNotPrediction);
    167         final Locale locale = mDictionaryFacilitator.getLocale();
    168         final ArrayList<SuggestedWordInfo> suggestionsContainer =
    169                 getTransformedSuggestedWordInfoList(wordComposer, suggestionResults,
    170                         trailingSingleQuotesCount, locale);
    171 
    172         boolean foundInDictionary = false;
    173         Dictionary sourceDictionaryOfRemovedWord = null;
    174         for (final SuggestedWordInfo info : suggestionsContainer) {
    175             // Search for the best dictionary, defined as the first one with the highest match
    176             // quality we can find.
    177             if (!foundInDictionary && typedWordString.equals(info.mWord)) {
    178                 // Use this source if the old match had lower quality than this match
    179                 sourceDictionaryOfRemovedWord = info.mSourceDict;
    180                 foundInDictionary = true;
    181                 break;
    182             }
    183         }
    184 
    185         final int firstOcurrenceOfTypedWordInSuggestions =
    186                 SuggestedWordInfo.removeDups(typedWordString, suggestionsContainer);
    187 
    188         final SuggestedWordInfo whitelistedWordInfo =
    189                 getWhitelistedWordInfoOrNull(suggestionsContainer);
    190         final String whitelistedWord = whitelistedWordInfo == null
    191                 ? null : whitelistedWordInfo.mWord;
    192         final boolean resultsArePredictions = !wordComposer.isComposingWord();
    193 
    194         // We allow auto-correction if whitelisting is not required or the word is whitelisted,
    195         // or if the word had more than one char and was not suggested.
    196         final boolean allowsToBeAutoCorrected =
    197                 (SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null)
    198                 || (consideredWord.length() > 1 && (sourceDictionaryOfRemovedWord == null));
    199 
    200         final boolean hasAutoCorrection;
    201         // If correction is not enabled, we never auto-correct. This is for example for when
    202         // the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct.
    203         if (!isCorrectionEnabled
    204                 // If the word does not allow to be auto-corrected, then we don't auto-correct.
    205                 || !allowsToBeAutoCorrected
    206                 // If we are doing prediction, then we never auto-correct of course
    207                 || resultsArePredictions
    208                 // If we don't have suggestion results, we can't evaluate the first suggestion
    209                 // for auto-correction
    210                 || suggestionResults.isEmpty()
    211                 // If the word has digits, we never auto-correct because it's likely the word
    212                 // was type with a lot of care
    213                 || wordComposer.hasDigits()
    214                 // If the word is mostly caps, we never auto-correct because this is almost
    215                 // certainly intentional (and careful input)
    216                 || wordComposer.isMostlyCaps()
    217                 // We never auto-correct when suggestions are resumed because it would be unexpected
    218                 || wordComposer.isResumed()
    219                 // If we don't have a main dictionary, we never want to auto-correct. The reason
    220                 // for this is, the user may have a contact whose name happens to match a valid
    221                 // word in their language, and it will unexpectedly auto-correct. For example, if
    222                 // the user types in English with no dictionary and has a "Will" in their contact
    223                 // list, "will" would always auto-correct to "Will" which is unwanted. Hence, no
    224                 // main dict => no auto-correct. Also, it would probably get obnoxious quickly.
    225                 // TODO: now that we have personalization, we may want to re-evaluate this decision
    226                 || !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()
    227                 // If the first suggestion is a shortcut we never auto-correct to it, regardless
    228                 // of how strong it is (whitelist entries are not KIND_SHORTCUT but KIND_WHITELIST).
    229                 // TODO: we may want to have shortcut-only entries auto-correct in the future.
    230                 || suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) {
    231             hasAutoCorrection = false;
    232         } else {
    233             final SuggestedWordInfo firstSuggestion = suggestionResults.first();
    234             if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold
    235                     && firstOcurrenceOfTypedWordInSuggestions != 0) {
    236                 hasAutoCorrection = true;
    237             } else if (!AutoCorrectionUtils.suggestionExceedsThreshold(
    238                     firstSuggestion, consideredWord, mAutoCorrectionThreshold)) {
    239                 // Score is too low for autocorrect
    240                 hasAutoCorrection = false;
    241             } else {
    242                 // We have a high score, so we need to check if this suggestion is in the correct
    243                 // form to allow auto-correcting to it in this language. For details of how this
    244                 // is determined, see #isAllowedByAutoCorrectionWithSpaceFilter.
    245                 // TODO: this should not have its own logic here but be handled by the dictionary.
    246                 hasAutoCorrection = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion);
    247             }
    248         }
    249 
    250         final SuggestedWordInfo typedWordInfo = new SuggestedWordInfo(typedWordString,
    251                 "" /* prevWordsContext */, SuggestedWordInfo.MAX_SCORE,
    252                 SuggestedWordInfo.KIND_TYPED,
    253                 null == sourceDictionaryOfRemovedWord ? Dictionary.DICTIONARY_USER_TYPED
    254                         : sourceDictionaryOfRemovedWord,
    255                 SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
    256                 SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */);
    257         if (!TextUtils.isEmpty(typedWordString)) {
    258             suggestionsContainer.add(0, typedWordInfo);
    259         }
    260 
    261         final ArrayList<SuggestedWordInfo> suggestionsList;
    262         if (DBG && !suggestionsContainer.isEmpty()) {
    263             suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWordString,
    264                     suggestionsContainer);
    265         } else {
    266             suggestionsList = suggestionsContainer;
    267         }
    268 
    269         final int inputStyle;
    270         if (resultsArePredictions) {
    271             inputStyle = suggestionResults.mIsBeginningOfSentence
    272                     ? SuggestedWords.INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION
    273                     : SuggestedWords.INPUT_STYLE_PREDICTION;
    274         } else {
    275             inputStyle = inputStyleIfNotPrediction;
    276         }
    277 
    278         final boolean isTypedWordValid = firstOcurrenceOfTypedWordInSuggestions > -1
    279                 || (!resultsArePredictions && !allowsToBeAutoCorrected);
    280         callback.onGetSuggestedWords(new SuggestedWords(suggestionsList,
    281                 suggestionResults.mRawSuggestions, typedWordInfo,
    282                 isTypedWordValid,
    283                 hasAutoCorrection /* willAutoCorrect */,
    284                 false /* isObsoleteSuggestions */, inputStyle, sequenceNumber));
    285     }
    286 
    287     // Retrieves suggestions for the batch input
    288     // and calls the callback function with the suggestions.
    289     private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
    290             final NgramContext ngramContext, final Keyboard keyboard,
    291             final SettingsValuesForSuggestion settingsValuesForSuggestion,
    292             final int inputStyle, final int sequenceNumber,
    293             final OnGetSuggestedWordsCallback callback) {
    294         final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
    295                 wordComposer.getComposedDataSnapshot(), ngramContext, keyboard,
    296                 settingsValuesForSuggestion, SESSION_ID_GESTURE, inputStyle);
    297         // For transforming words that don't come from a dictionary, because it's our best bet
    298         final Locale locale = mDictionaryFacilitator.getLocale();
    299         final ArrayList<SuggestedWordInfo> suggestionsContainer =
    300                 new ArrayList<>(suggestionResults);
    301         final int suggestionsCount = suggestionsContainer.size();
    302         final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
    303         final boolean isAllUpperCase = wordComposer.isAllUpperCase();
    304         if (isFirstCharCapitalized || isAllUpperCase) {
    305             for (int i = 0; i < suggestionsCount; ++i) {
    306                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
    307                 final Locale wordlocale = wordInfo.mSourceDict.mLocale;
    308                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
    309                         wordInfo, null == wordlocale ? locale : wordlocale, isAllUpperCase,
    310                         isFirstCharCapitalized, 0 /* trailingSingleQuotesCount */);
    311                 suggestionsContainer.set(i, transformedWordInfo);
    312             }
    313         }
    314 
    315         if (SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION
    316                 && suggestionsContainer.size() > 1
    317                 && TextUtils.equals(suggestionsContainer.get(0).mWord,
    318                    wordComposer.getRejectedBatchModeSuggestion())) {
    319             final SuggestedWordInfo rejected = suggestionsContainer.remove(0);
    320             suggestionsContainer.add(1, rejected);
    321         }
    322         SuggestedWordInfo.removeDups(null /* typedWord */, suggestionsContainer);
    323 
    324         // For some reason some suggestions with MIN_VALUE are making their way here.
    325         // TODO: Find a more robust way to detect distracters.
    326         for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
    327             if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
    328                 suggestionsContainer.remove(i);
    329             }
    330         }
    331 
    332         // In the batch input mode, the most relevant suggested word should act as a "typed word"
    333         // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
    334         // Note that because this method is never used to get predictions, there is no need to
    335         // modify inputType such in getSuggestedWordsForNonBatchInput.
    336         final SuggestedWordInfo pseudoTypedWordInfo = suggestionsContainer.isEmpty() ? null
    337                 : suggestionsContainer.get(0);
    338 
    339         callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer,
    340                 suggestionResults.mRawSuggestions,
    341                 pseudoTypedWordInfo,
    342                 true /* typedWordValid */,
    343                 false /* willAutoCorrect */,
    344                 false /* isObsoleteSuggestions */,
    345                 inputStyle, sequenceNumber));
    346     }
    347 
    348     private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
    349             final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
    350         final SuggestedWordInfo typedWordInfo = suggestions.get(0);
    351         typedWordInfo.setDebugString("+");
    352         final int suggestionsSize = suggestions.size();
    353         final ArrayList<SuggestedWordInfo> suggestionsList = new ArrayList<>(suggestionsSize);
    354         suggestionsList.add(typedWordInfo);
    355         // Note: i here is the index in mScores[], but the index in mSuggestions is one more
    356         // than i because we added the typed word to mSuggestions without touching mScores.
    357         for (int i = 0; i < suggestionsSize - 1; ++i) {
    358             final SuggestedWordInfo cur = suggestions.get(i + 1);
    359             final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
    360                     typedWord, cur.toString(), cur.mScore);
    361             final String scoreInfoString;
    362             if (normalizedScore > 0) {
    363                 scoreInfoString = String.format(
    364                         Locale.ROOT, "%d (%4.2f), %s", cur.mScore, normalizedScore,
    365                         cur.mSourceDict.mDictType);
    366             } else {
    367                 scoreInfoString = Integer.toString(cur.mScore);
    368             }
    369             cur.setDebugString(scoreInfoString);
    370             suggestionsList.add(cur);
    371         }
    372         return suggestionsList;
    373     }
    374 
    375     /**
    376      * Computes whether this suggestion should be blocked or not in this language
    377      *
    378      * This function implements a filter that avoids auto-correcting to suggestions that contain
    379      * spaces that are above a certain language-dependent character limit. In languages like German
    380      * where it's possible to concatenate many words, it often happens our dictionary does not
    381      * have the longer words. In this case, we offer a lot of unhelpful suggestions that contain
    382      * one or several spaces. Ideally we should understand what the user wants and display useful
    383      * suggestions by improving the dictionary and possibly having some specific logic. Until
    384      * that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell
    385      * whether a suggestion is useful or not. So at least for the time being we block
    386      * auto-correction when the suggestion is long and contains a space, which should avoid the
    387      * worst damage.
    388      * This function is implementing that filter. If the language enforces no such limit, then it
    389      * always returns true. If the suggestion contains no space, it also returns true. Otherwise,
    390      * it checks the length against the language-specific limit.
    391      *
    392      * @param info the suggestion info
    393      * @return whether it's fine to auto-correct to this.
    394      */
    395     private static boolean isAllowedByAutoCorrectionWithSpaceFilter(final SuggestedWordInfo info) {
    396         final Locale locale = info.mSourceDict.mLocale;
    397         if (null == locale) {
    398             return true;
    399         }
    400         final Integer maximumLengthForThisLanguage =
    401                 sLanguageToMaximumAutoCorrectionWithSpaceLength.get(locale.getLanguage());
    402         if (null == maximumLengthForThisLanguage) {
    403             // This language does not enforce a maximum length to auto-correction
    404             return true;
    405         }
    406         return info.mWord.length() <= maximumLengthForThisLanguage
    407                 || -1 == info.mWord.indexOf(Constants.CODE_SPACE);
    408     }
    409 
    410     /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
    411             final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
    412             final boolean isOnlyFirstCharCapitalized, final int trailingSingleQuotesCount) {
    413         final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
    414         if (isAllUpperCase) {
    415             sb.append(wordInfo.mWord.toUpperCase(locale));
    416         } else if (isOnlyFirstCharCapitalized) {
    417             sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale));
    418         } else {
    419             sb.append(wordInfo.mWord);
    420         }
    421         // Appending quotes is here to help people quote words. However, it's not helpful
    422         // when they type words with quotes toward the end like "it's" or "didn't", where
    423         // it's more likely the user missed the last character (or didn't type it yet).
    424         final int quotesToAppend = trailingSingleQuotesCount
    425                 - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1);
    426         for (int i = quotesToAppend - 1; i >= 0; --i) {
    427             sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
    428         }
    429         return new SuggestedWordInfo(sb.toString(), wordInfo.mPrevWordsContext,
    430                 wordInfo.mScore, wordInfo.mKindAndFlags,
    431                 wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord,
    432                 wordInfo.mAutoCommitFirstWordConfidence);
    433     }
    434 }
    435