Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     20 
     21 import android.text.TextUtils;
     22 import android.util.Log;
     23 
     24 import java.util.concurrent.ConcurrentHashMap;
     25 
     26 public final class AutoCorrection {
     27     private static final boolean DBG = LatinImeLogger.sDBG;
     28     private static final String TAG = AutoCorrection.class.getSimpleName();
     29     private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
     30 
     31     private AutoCorrection() {
     32         // Purely static class: can't instantiate.
     33     }
     34 
     35     public static boolean isValidWord(final ConcurrentHashMap<String, Dictionary> dictionaries,
     36             final String word, final boolean ignoreCase) {
     37         if (TextUtils.isEmpty(word)) {
     38             return false;
     39         }
     40         final String lowerCasedWord = word.toLowerCase();
     41         for (final String key : dictionaries.keySet()) {
     42             final Dictionary dictionary = dictionaries.get(key);
     43             // It's unclear how realistically 'dictionary' can be null, but the monkey is somehow
     44             // managing to get null in here. Presumably the language is changing to a language with
     45             // no main dictionary and the monkey manages to type a whole word before the thread
     46             // that reads the dictionary is started or something?
     47             // Ideally the passed map would come out of a {@link java.util.concurrent.Future} and
     48             // would be immutable once it's finished initializing, but concretely a null test is
     49             // probably good enough for the time being.
     50             if (null == dictionary) continue;
     51             if (dictionary.isValidWord(word)
     52                     || (ignoreCase && dictionary.isValidWord(lowerCasedWord))) {
     53                 return true;
     54             }
     55         }
     56         return false;
     57     }
     58 
     59     public static int getMaxFrequency(final ConcurrentHashMap<String, Dictionary> dictionaries,
     60             final String word) {
     61         if (TextUtils.isEmpty(word)) {
     62             return Dictionary.NOT_A_PROBABILITY;
     63         }
     64         int maxFreq = -1;
     65         for (final String key : dictionaries.keySet()) {
     66             final Dictionary dictionary = dictionaries.get(key);
     67             if (null == dictionary) continue;
     68             final int tempFreq = dictionary.getFrequency(word);
     69             if (tempFreq >= maxFreq) {
     70                 maxFreq = tempFreq;
     71             }
     72         }
     73         return maxFreq;
     74     }
     75 
     76     // Returns true if this is in any of the dictionaries.
     77     public static boolean isInTheDictionary(
     78             final ConcurrentHashMap<String, Dictionary> dictionaries,
     79             final String word, final boolean ignoreCase) {
     80         return isValidWord(dictionaries, word, ignoreCase);
     81     }
     82 
     83     public static boolean suggestionExceedsAutoCorrectionThreshold(
     84             final SuggestedWordInfo suggestion, final String consideredWord,
     85             final float autoCorrectionThreshold) {
     86         if (null != suggestion) {
     87             // Shortlist a whitelisted word
     88             if (suggestion.mKind == SuggestedWordInfo.KIND_WHITELIST) return true;
     89             final int autoCorrectionSuggestionScore = suggestion.mScore;
     90             // TODO: when the normalized score of the first suggestion is nearly equals to
     91             //       the normalized score of the second suggestion, behave less aggressive.
     92             final float normalizedScore = BinaryDictionary.calcNormalizedScore(
     93                     consideredWord, suggestion.mWord, autoCorrectionSuggestionScore);
     94             if (DBG) {
     95                 Log.d(TAG, "Normalized " + consideredWord + "," + suggestion + ","
     96                         + autoCorrectionSuggestionScore + ", " + normalizedScore
     97                         + "(" + autoCorrectionThreshold + ")");
     98             }
     99             if (normalizedScore >= autoCorrectionThreshold) {
    100                 if (DBG) {
    101                     Log.d(TAG, "Auto corrected by S-threshold.");
    102                 }
    103                 return !shouldBlockAutoCorrectionBySafetyNet(consideredWord, suggestion.mWord);
    104             }
    105         }
    106         return false;
    107     }
    108 
    109     // TODO: Resolve the inconsistencies between the native auto correction algorithms and
    110     // this safety net
    111     public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord,
    112             final String suggestion) {
    113         // Safety net for auto correction.
    114         // Actually if we hit this safety net, it's a bug.
    115         // If user selected aggressive auto correction mode, there is no need to use the safety
    116         // net.
    117         // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH,
    118         // we should not use net because relatively edit distance can be big.
    119         final int typedWordLength = typedWord.length();
    120         if (typedWordLength < MINIMUM_SAFETY_NET_CHAR_LENGTH) {
    121             return false;
    122         }
    123         final int maxEditDistanceOfNativeDictionary =
    124                 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
    125         final int distance = BinaryDictionary.editDistance(typedWord, suggestion);
    126         if (DBG) {
    127             Log.d(TAG, "Autocorrected edit distance = " + distance
    128                     + ", " + maxEditDistanceOfNativeDictionary);
    129         }
    130         if (distance > maxEditDistanceOfNativeDictionary) {
    131             if (DBG) {
    132                 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion);
    133                 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. "
    134                         + "Turning off auto-correction.");
    135             }
    136             return true;
    137         } else {
    138             return false;
    139         }
    140     }
    141 }
    142