Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.inputmethod.latin.utils;
     18 
     19 import com.android.inputmethod.latin.BinaryDictionary;
     20 import com.android.inputmethod.latin.Dictionary;
     21 import com.android.inputmethod.latin.LatinImeLogger;
     22 import com.android.inputmethod.latin.Suggest;
     23 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
     24 
     25 import android.text.TextUtils;
     26 import android.util.Log;
     27 
     28 import java.util.concurrent.ConcurrentHashMap;
     29 
     30 public final class AutoCorrectionUtils {
     31     private static final boolean DBG = LatinImeLogger.sDBG;
     32     private static final String TAG = AutoCorrectionUtils.class.getSimpleName();
     33     private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
     34 
     35     private AutoCorrectionUtils() {
     36         // Purely static class: can't instantiate.
     37     }
     38 
     39     public static boolean isValidWord(final Suggest suggest, final String word,
     40             final boolean ignoreCase) {
     41         if (TextUtils.isEmpty(word)) {
     42             return false;
     43         }
     44         final ConcurrentHashMap<String, Dictionary> dictionaries = suggest.getUnigramDictionaries();
     45         final String lowerCasedWord = word.toLowerCase(suggest.mLocale);
     46         for (final String key : dictionaries.keySet()) {
     47             final Dictionary dictionary = dictionaries.get(key);
     48             // It's unclear how realistically 'dictionary' can be null, but the monkey is somehow
     49             // managing to get null in here. Presumably the language is changing to a language with
     50             // no main dictionary and the monkey manages to type a whole word before the thread
     51             // that reads the dictionary is started or something?
     52             // Ideally the passed map would come out of a {@link java.util.concurrent.Future} and
     53             // would be immutable once it's finished initializing, but concretely a null test is
     54             // probably good enough for the time being.
     55             if (null == dictionary) continue;
     56             if (dictionary.isValidWord(word)
     57                     || (ignoreCase && dictionary.isValidWord(lowerCasedWord))) {
     58                 return true;
     59             }
     60         }
     61         return false;
     62     }
     63 
     64     public static int getMaxFrequency(final ConcurrentHashMap<String, Dictionary> dictionaries,
     65             final String word) {
     66         if (TextUtils.isEmpty(word)) {
     67             return Dictionary.NOT_A_PROBABILITY;
     68         }
     69         int maxFreq = -1;
     70         for (final String key : dictionaries.keySet()) {
     71             final Dictionary dictionary = dictionaries.get(key);
     72             if (null == dictionary) continue;
     73             final int tempFreq = dictionary.getFrequency(word);
     74             if (tempFreq >= maxFreq) {
     75                 maxFreq = tempFreq;
     76             }
     77         }
     78         return maxFreq;
     79     }
     80 
     81     public static boolean suggestionExceedsAutoCorrectionThreshold(
     82             final SuggestedWordInfo suggestion, final String consideredWord,
     83             final float autoCorrectionThreshold) {
     84         if (null != suggestion) {
     85             // Shortlist a whitelisted word
     86             if (suggestion.mKind == SuggestedWordInfo.KIND_WHITELIST) return true;
     87             final int autoCorrectionSuggestionScore = suggestion.mScore;
     88             // TODO: when the normalized score of the first suggestion is nearly equals to
     89             //       the normalized score of the second suggestion, behave less aggressive.
     90             final float normalizedScore = BinaryDictionary.calcNormalizedScore(
     91                     consideredWord, suggestion.mWord, autoCorrectionSuggestionScore);
     92             if (DBG) {
     93                 Log.d(TAG, "Normalized " + consideredWord + "," + suggestion + ","
     94                         + autoCorrectionSuggestionScore + ", " + normalizedScore
     95                         + "(" + autoCorrectionThreshold + ")");
     96             }
     97             if (normalizedScore >= autoCorrectionThreshold) {
     98                 if (DBG) {
     99                     Log.d(TAG, "Auto corrected by S-threshold.");
    100                 }
    101                 return !shouldBlockAutoCorrectionBySafetyNet(consideredWord, suggestion.mWord);
    102             }
    103         }
    104         return false;
    105     }
    106 
    107     // TODO: Resolve the inconsistencies between the native auto correction algorithms and
    108     // this safety net
    109     public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord,
    110             final String suggestion) {
    111         // Safety net for auto correction.
    112         // Actually if we hit this safety net, it's a bug.
    113         // If user selected aggressive auto correction mode, there is no need to use the safety
    114         // net.
    115         // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH,
    116         // we should not use net because relatively edit distance can be big.
    117         final int typedWordLength = typedWord.length();
    118         if (typedWordLength < MINIMUM_SAFETY_NET_CHAR_LENGTH) {
    119             return false;
    120         }
    121         final int maxEditDistanceOfNativeDictionary =
    122                 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
    123         final int distance = BinaryDictionary.editDistance(typedWord, suggestion);
    124         if (DBG) {
    125             Log.d(TAG, "Autocorrected edit distance = " + distance
    126                     + ", " + maxEditDistanceOfNativeDictionary);
    127         }
    128         if (distance > maxEditDistanceOfNativeDictionary) {
    129             if (DBG) {
    130                 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion);
    131                 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. "
    132                         + "Turning off auto-correction.");
    133             }
    134             return true;
    135         } else {
    136             return false;
    137         }
    138     }
    139 }
    140