1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.utils; 18 19 import com.android.inputmethod.latin.BinaryDictionary; 20 import com.android.inputmethod.latin.Dictionary; 21 import com.android.inputmethod.latin.LatinImeLogger; 22 import com.android.inputmethod.latin.Suggest; 23 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 24 25 import android.text.TextUtils; 26 import android.util.Log; 27 28 import java.util.concurrent.ConcurrentHashMap; 29 30 public final class AutoCorrectionUtils { 31 private static final boolean DBG = LatinImeLogger.sDBG; 32 private static final String TAG = AutoCorrectionUtils.class.getSimpleName(); 33 private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; 34 35 private AutoCorrectionUtils() { 36 // Purely static class: can't instantiate. 37 } 38 39 public static boolean isValidWord(final Suggest suggest, final String word, 40 final boolean ignoreCase) { 41 if (TextUtils.isEmpty(word)) { 42 return false; 43 } 44 final ConcurrentHashMap<String, Dictionary> dictionaries = suggest.getUnigramDictionaries(); 45 final String lowerCasedWord = word.toLowerCase(suggest.mLocale); 46 for (final String key : dictionaries.keySet()) { 47 final Dictionary dictionary = dictionaries.get(key); 48 // It's unclear how realistically 'dictionary' can be null, but the monkey is somehow 49 // managing to get null in here. Presumably the language is changing to a language with 50 // no main dictionary and the monkey manages to type a whole word before the thread 51 // that reads the dictionary is started or something? 52 // Ideally the passed map would come out of a {@link java.util.concurrent.Future} and 53 // would be immutable once it's finished initializing, but concretely a null test is 54 // probably good enough for the time being. 55 if (null == dictionary) continue; 56 if (dictionary.isValidWord(word) 57 || (ignoreCase && dictionary.isValidWord(lowerCasedWord))) { 58 return true; 59 } 60 } 61 return false; 62 } 63 64 public static int getMaxFrequency(final ConcurrentHashMap<String, Dictionary> dictionaries, 65 final String word) { 66 if (TextUtils.isEmpty(word)) { 67 return Dictionary.NOT_A_PROBABILITY; 68 } 69 int maxFreq = -1; 70 for (final String key : dictionaries.keySet()) { 71 final Dictionary dictionary = dictionaries.get(key); 72 if (null == dictionary) continue; 73 final int tempFreq = dictionary.getFrequency(word); 74 if (tempFreq >= maxFreq) { 75 maxFreq = tempFreq; 76 } 77 } 78 return maxFreq; 79 } 80 81 public static boolean suggestionExceedsAutoCorrectionThreshold( 82 final SuggestedWordInfo suggestion, final String consideredWord, 83 final float autoCorrectionThreshold) { 84 if (null != suggestion) { 85 // Shortlist a whitelisted word 86 if (suggestion.mKind == SuggestedWordInfo.KIND_WHITELIST) return true; 87 final int autoCorrectionSuggestionScore = suggestion.mScore; 88 // TODO: when the normalized score of the first suggestion is nearly equals to 89 // the normalized score of the second suggestion, behave less aggressive. 90 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 91 consideredWord, suggestion.mWord, autoCorrectionSuggestionScore); 92 if (DBG) { 93 Log.d(TAG, "Normalized " + consideredWord + "," + suggestion + "," 94 + autoCorrectionSuggestionScore + ", " + normalizedScore 95 + "(" + autoCorrectionThreshold + ")"); 96 } 97 if (normalizedScore >= autoCorrectionThreshold) { 98 if (DBG) { 99 Log.d(TAG, "Auto corrected by S-threshold."); 100 } 101 return !shouldBlockAutoCorrectionBySafetyNet(consideredWord, suggestion.mWord); 102 } 103 } 104 return false; 105 } 106 107 // TODO: Resolve the inconsistencies between the native auto correction algorithms and 108 // this safety net 109 public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, 110 final String suggestion) { 111 // Safety net for auto correction. 112 // Actually if we hit this safety net, it's a bug. 113 // If user selected aggressive auto correction mode, there is no need to use the safety 114 // net. 115 // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, 116 // we should not use net because relatively edit distance can be big. 117 final int typedWordLength = typedWord.length(); 118 if (typedWordLength < MINIMUM_SAFETY_NET_CHAR_LENGTH) { 119 return false; 120 } 121 final int maxEditDistanceOfNativeDictionary = 122 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; 123 final int distance = BinaryDictionary.editDistance(typedWord, suggestion); 124 if (DBG) { 125 Log.d(TAG, "Autocorrected edit distance = " + distance 126 + ", " + maxEditDistanceOfNativeDictionary); 127 } 128 if (distance > maxEditDistanceOfNativeDictionary) { 129 if (DBG) { 130 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); 131 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " 132 + "Turning off auto-correction."); 133 } 134 return true; 135 } else { 136 return false; 137 } 138 } 139 } 140