1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 20 21 import android.text.TextUtils; 22 import android.util.Log; 23 24 import java.util.concurrent.ConcurrentHashMap; 25 26 public final class AutoCorrection { 27 private static final boolean DBG = LatinImeLogger.sDBG; 28 private static final String TAG = AutoCorrection.class.getSimpleName(); 29 private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; 30 31 private AutoCorrection() { 32 // Purely static class: can't instantiate. 33 } 34 35 public static boolean isValidWord(final ConcurrentHashMap<String, Dictionary> dictionaries, 36 CharSequence word, boolean ignoreCase) { 37 if (TextUtils.isEmpty(word)) { 38 return false; 39 } 40 final CharSequence lowerCasedWord = word.toString().toLowerCase(); 41 for (final String key : dictionaries.keySet()) { 42 final Dictionary dictionary = dictionaries.get(key); 43 // It's unclear how realistically 'dictionary' can be null, but the monkey is somehow 44 // managing to get null in here. Presumably the language is changing to a language with 45 // no main dictionary and the monkey manages to type a whole word before the thread 46 // that reads the dictionary is started or something? 47 // Ideally the passed map would come out of a {@link java.util.concurrent.Future} and 48 // would be immutable once it's finished initializing, but concretely a null test is 49 // probably good enough for the time being. 50 if (null == dictionary) continue; 51 if (dictionary.isValidWord(word) 52 || (ignoreCase && dictionary.isValidWord(lowerCasedWord))) { 53 return true; 54 } 55 } 56 return false; 57 } 58 59 public static int getMaxFrequency(final ConcurrentHashMap<String, Dictionary> dictionaries, 60 CharSequence word) { 61 if (TextUtils.isEmpty(word)) { 62 return Dictionary.NOT_A_PROBABILITY; 63 } 64 int maxFreq = -1; 65 for (final String key : dictionaries.keySet()) { 66 final Dictionary dictionary = dictionaries.get(key); 67 if (null == dictionary) continue; 68 final int tempFreq = dictionary.getFrequency(word); 69 if (tempFreq >= maxFreq) { 70 maxFreq = tempFreq; 71 } 72 } 73 return maxFreq; 74 } 75 76 // Returns true if this is in any of the dictionaries. 77 public static boolean isInTheDictionary( 78 final ConcurrentHashMap<String, Dictionary> dictionaries, 79 final CharSequence word, final boolean ignoreCase) { 80 return isValidWord(dictionaries, word, ignoreCase); 81 } 82 83 public static boolean suggestionExceedsAutoCorrectionThreshold(SuggestedWordInfo suggestion, 84 CharSequence consideredWord, float autoCorrectionThreshold) { 85 if (null != suggestion) { 86 // Shortlist a whitelisted word 87 if (suggestion.mKind == SuggestedWordInfo.KIND_WHITELIST) return true; 88 final int autoCorrectionSuggestionScore = suggestion.mScore; 89 // TODO: when the normalized score of the first suggestion is nearly equals to 90 // the normalized score of the second suggestion, behave less aggressive. 91 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 92 consideredWord.toString(), suggestion.mWord.toString(), 93 autoCorrectionSuggestionScore); 94 if (DBG) { 95 Log.d(TAG, "Normalized " + consideredWord + "," + suggestion + "," 96 + autoCorrectionSuggestionScore + ", " + normalizedScore 97 + "(" + autoCorrectionThreshold + ")"); 98 } 99 if (normalizedScore >= autoCorrectionThreshold) { 100 if (DBG) { 101 Log.d(TAG, "Auto corrected by S-threshold."); 102 } 103 return !shouldBlockAutoCorrectionBySafetyNet(consideredWord.toString(), 104 suggestion.mWord); 105 } 106 } 107 return false; 108 } 109 110 // TODO: Resolve the inconsistencies between the native auto correction algorithms and 111 // this safety net 112 public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, 113 final CharSequence suggestion) { 114 // Safety net for auto correction. 115 // Actually if we hit this safety net, it's a bug. 116 // If user selected aggressive auto correction mode, there is no need to use the safety 117 // net. 118 // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, 119 // we should not use net because relatively edit distance can be big. 120 final int typedWordLength = typedWord.length(); 121 if (typedWordLength < MINIMUM_SAFETY_NET_CHAR_LENGTH) { 122 return false; 123 } 124 final int maxEditDistanceOfNativeDictionary = 125 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; 126 final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString()); 127 if (DBG) { 128 Log.d(TAG, "Autocorrected edit distance = " + distance 129 + ", " + maxEditDistanceOfNativeDictionary); 130 } 131 if (distance > maxEditDistanceOfNativeDictionary) { 132 if (DBG) { 133 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); 134 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " 135 + "Turning off auto-correction."); 136 } 137 return true; 138 } else { 139 return false; 140 } 141 } 142 } 143