Home | History | Annotate | Download | only in latin
      1 /*
      2  * Copyright (C) 2008-2009 Google Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
      5  * use this file except in compliance with the License. You may obtain a copy of
      6  * the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
     12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
     13  * License for the specific language governing permissions and limitations under
     14  * the License.
     15  */
     16 
     17 package com.android.inputmethod.latin;
     18 
     19 import android.content.Context;
     20 import android.text.AutoText;
     21 import android.text.TextUtils;
     22 import android.util.Log;
     23 import android.view.View;
     24 
     25 import java.util.ArrayList;
     26 import java.util.Arrays;
     27 import java.util.List;
     28 
     29 import com.android.inputmethod.latin.WordComposer;
     30 
     31 /**
     32  * This class loads a dictionary and provides a list of suggestions for a given sequence of
     33  * characters. This includes corrections and completions.
     34  * @hide pending API Council Approval
     35  */
     36 public class Suggest implements Dictionary.WordCallback {
     37 
     38     public static final int CORRECTION_NONE = 0;
     39     public static final int CORRECTION_BASIC = 1;
     40     public static final int CORRECTION_FULL = 2;
     41 
     42     static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000;
     43 
     44     private BinaryDictionary mMainDict;
     45 
     46     private Dictionary mUserDictionary;
     47 
     48     private Dictionary mAutoDictionary;
     49 
     50     private Dictionary mContactsDictionary;
     51 
     52     private int mPrefMaxSuggestions = 12;
     53 
     54     private boolean mAutoTextEnabled;
     55 
     56     private int[] mPriorities = new int[mPrefMaxSuggestions];
     57     // Handle predictive correction for only the first 1280 characters for performance reasons
     58     // If we support scripts that need latin characters beyond that, we should probably use some
     59     // kind of a sparse array or language specific list with a mapping lookup table.
     60     // 1280 is the size of the BASE_CHARS array in ExpandableDictionary, which is a basic set of
     61     // latin characters.
     62     private int[] mNextLettersFrequencies = new int[1280];
     63     private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>();
     64     private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>();
     65     private boolean mHaveCorrection;
     66     private CharSequence mOriginalWord;
     67     private String mLowerOriginalWord;
     68     private boolean mCapitalize;
     69 
     70     private int mCorrectionMode = CORRECTION_BASIC;
     71 
     72 
     73     public Suggest(Context context, int dictionaryResId) {
     74         mMainDict = new BinaryDictionary(context, dictionaryResId);
     75         for (int i = 0; i < mPrefMaxSuggestions; i++) {
     76             StringBuilder sb = new StringBuilder(32);
     77             mStringPool.add(sb);
     78         }
     79     }
     80 
     81     public void setAutoTextEnabled(boolean enabled) {
     82         mAutoTextEnabled = enabled;
     83     }
     84 
     85     public int getCorrectionMode() {
     86         return mCorrectionMode;
     87     }
     88 
     89     public void setCorrectionMode(int mode) {
     90         mCorrectionMode = mode;
     91     }
     92 
     93     public boolean hasMainDictionary() {
     94         return mMainDict.getSize() > LARGE_DICTIONARY_THRESHOLD;
     95     }
     96 
     97     /**
     98      * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
     99      * before the main dictionary, if set.
    100      */
    101     public void setUserDictionary(Dictionary userDictionary) {
    102         mUserDictionary = userDictionary;
    103     }
    104 
    105     /**
    106      * Sets an optional contacts dictionary resource to be loaded.
    107      */
    108     public void setContactsDictionary(Dictionary userDictionary) {
    109         mContactsDictionary = userDictionary;
    110     }
    111 
    112     public void setAutoDictionary(Dictionary autoDictionary) {
    113         mAutoDictionary = autoDictionary;
    114     }
    115 
    116     /**
    117      * Number of suggestions to generate from the input key sequence. This has
    118      * to be a number between 1 and 100 (inclusive).
    119      * @param maxSuggestions
    120      * @throws IllegalArgumentException if the number is out of range
    121      */
    122     public void setMaxSuggestions(int maxSuggestions) {
    123         if (maxSuggestions < 1 || maxSuggestions > 100) {
    124             throw new IllegalArgumentException("maxSuggestions must be between 1 and 100");
    125         }
    126         mPrefMaxSuggestions = maxSuggestions;
    127         mPriorities = new int[mPrefMaxSuggestions];
    128         collectGarbage();
    129         while (mStringPool.size() < mPrefMaxSuggestions) {
    130             StringBuilder sb = new StringBuilder(32);
    131             mStringPool.add(sb);
    132         }
    133     }
    134 
    135     private boolean haveSufficientCommonality(String original, CharSequence suggestion) {
    136         final int originalLength = original.length();
    137         final int suggestionLength = suggestion.length();
    138         final int minLength = Math.min(originalLength, suggestionLength);
    139         if (minLength <= 2) return true;
    140         int matching = 0;
    141         int lessMatching = 0; // Count matches if we skip one character
    142         int i;
    143         for (i = 0; i < minLength; i++) {
    144             final char origChar = ExpandableDictionary.toLowerCase(original.charAt(i));
    145             if (origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i))) {
    146                 matching++;
    147                 lessMatching++;
    148             } else if (i + 1 < suggestionLength
    149                     && origChar == ExpandableDictionary.toLowerCase(suggestion.charAt(i + 1))) {
    150                 lessMatching++;
    151             }
    152         }
    153         matching = Math.max(matching, lessMatching);
    154 
    155         if (minLength <= 4) {
    156             return matching >= 2;
    157         } else {
    158             return matching > minLength / 2;
    159         }
    160     }
    161 
    162     /**
    163      * Returns a list of words that match the list of character codes passed in.
    164      * This list will be overwritten the next time this function is called.
    165      * @param a view for retrieving the context for AutoText
    166      * @param codes the list of codes. Each list item contains an array of character codes
    167      * in order of probability where the character at index 0 in the array has the highest
    168      * probability.
    169      * @return list of suggestions.
    170      */
    171     public List<CharSequence> getSuggestions(View view, WordComposer wordComposer,
    172             boolean includeTypedWordIfValid) {
    173         mHaveCorrection = false;
    174         mCapitalize = wordComposer.isCapitalized();
    175         collectGarbage();
    176         Arrays.fill(mPriorities, 0);
    177         Arrays.fill(mNextLettersFrequencies, 0);
    178 
    179         // Save a lowercase version of the original word
    180         mOriginalWord = wordComposer.getTypedWord();
    181         if (mOriginalWord != null) {
    182             mOriginalWord = mOriginalWord.toString();
    183             mLowerOriginalWord = mOriginalWord.toString().toLowerCase();
    184         } else {
    185             mLowerOriginalWord = "";
    186         }
    187         // Search the dictionary only if there are at least 2 characters
    188         if (wordComposer.size() > 1) {
    189             if (mUserDictionary != null || mContactsDictionary != null) {
    190                 if (mUserDictionary != null) {
    191                     mUserDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
    192                 }
    193                 if (mContactsDictionary != null) {
    194                     mContactsDictionary.getWords(wordComposer, this, mNextLettersFrequencies);
    195                 }
    196 
    197                 if (mSuggestions.size() > 0 && isValidWord(mOriginalWord)
    198                         && mCorrectionMode == CORRECTION_FULL) {
    199                     mHaveCorrection = true;
    200                 }
    201             }
    202             mMainDict.getWords(wordComposer, this, mNextLettersFrequencies);
    203             if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 0) {
    204                 mHaveCorrection = true;
    205             }
    206         }
    207         if (mOriginalWord != null) {
    208             mSuggestions.add(0, mOriginalWord.toString());
    209         }
    210 
    211         // Check if the first suggestion has a minimum number of characters in common
    212         if (mCorrectionMode == CORRECTION_FULL && mSuggestions.size() > 1) {
    213             if (!haveSufficientCommonality(mLowerOriginalWord, mSuggestions.get(1))) {
    214                 mHaveCorrection = false;
    215             }
    216         }
    217 
    218         if (mAutoTextEnabled) {
    219             int i = 0;
    220             int max = 6;
    221             // Don't autotext the suggestions from the dictionaries
    222             if (mCorrectionMode == CORRECTION_BASIC) max = 1;
    223             while (i < mSuggestions.size() && i < max) {
    224                 String suggestedWord = mSuggestions.get(i).toString().toLowerCase();
    225                 CharSequence autoText =
    226                         AutoText.get(suggestedWord, 0, suggestedWord.length(), view);
    227                 // Is there an AutoText correction?
    228                 boolean canAdd = autoText != null;
    229                 // Is that correction already the current prediction (or original word)?
    230                 canAdd &= !TextUtils.equals(autoText, mSuggestions.get(i));
    231                 // Is that correction already the next predicted word?
    232                 if (canAdd && i + 1 < mSuggestions.size() && mCorrectionMode != CORRECTION_BASIC) {
    233                     canAdd &= !TextUtils.equals(autoText, mSuggestions.get(i + 1));
    234                 }
    235                 if (canAdd) {
    236                     mHaveCorrection = true;
    237                     mSuggestions.add(i + 1, autoText);
    238                     i++;
    239                 }
    240                 i++;
    241             }
    242         }
    243 
    244         removeDupes();
    245         return mSuggestions;
    246     }
    247 
    248     public int[] getNextLettersFrequencies() {
    249         return mNextLettersFrequencies;
    250     }
    251 
    252     private void removeDupes() {
    253         final ArrayList<CharSequence> suggestions = mSuggestions;
    254         if (suggestions.size() < 2) return;
    255         int i = 1;
    256         // Don't cache suggestions.size(), since we may be removing items
    257         while (i < suggestions.size()) {
    258             final CharSequence cur = suggestions.get(i);
    259             // Compare each candidate with each previous candidate
    260             for (int j = 0; j < i; j++) {
    261                 CharSequence previous = suggestions.get(j);
    262                 if (TextUtils.equals(cur, previous)) {
    263                     removeFromSuggestions(i);
    264                     i--;
    265                     break;
    266                 }
    267             }
    268             i++;
    269         }
    270     }
    271 
    272     private void removeFromSuggestions(int index) {
    273         CharSequence garbage = mSuggestions.remove(index);
    274         if (garbage != null && garbage instanceof StringBuilder) {
    275             mStringPool.add(garbage);
    276         }
    277     }
    278 
    279     public boolean hasMinimalCorrection() {
    280         return mHaveCorrection;
    281     }
    282 
    283     private boolean compareCaseInsensitive(final String mLowerOriginalWord,
    284             final char[] word, final int offset, final int length) {
    285         final int originalLength = mLowerOriginalWord.length();
    286         if (originalLength == length && Character.isUpperCase(word[offset])) {
    287             for (int i = 0; i < originalLength; i++) {
    288                 if (mLowerOriginalWord.charAt(i) != Character.toLowerCase(word[offset+i])) {
    289                     return false;
    290                 }
    291             }
    292             return true;
    293         }
    294         return false;
    295     }
    296 
    297     public boolean addWord(final char[] word, final int offset, final int length, final int freq) {
    298         int pos = 0;
    299         final int[] priorities = mPriorities;
    300         final int prefMaxSuggestions = mPrefMaxSuggestions;
    301         // Check if it's the same word, only caps are different
    302         if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) {
    303             pos = 0;
    304         } else {
    305             // Check the last one's priority and bail
    306             if (priorities[prefMaxSuggestions - 1] >= freq) return true;
    307             while (pos < prefMaxSuggestions) {
    308                 if (priorities[pos] < freq
    309                         || (priorities[pos] == freq && length < mSuggestions
    310                                 .get(pos).length())) {
    311                     break;
    312                 }
    313                 pos++;
    314             }
    315         }
    316 
    317         if (pos >= prefMaxSuggestions) {
    318             return true;
    319         }
    320         System.arraycopy(priorities, pos, priorities, pos + 1,
    321                 prefMaxSuggestions - pos - 1);
    322         priorities[pos] = freq;
    323         int poolSize = mStringPool.size();
    324         StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1)
    325                 : new StringBuilder(32);
    326         sb.setLength(0);
    327         if (mCapitalize) {
    328             sb.append(Character.toUpperCase(word[offset]));
    329             if (length > 1) {
    330                 sb.append(word, offset + 1, length - 1);
    331             }
    332         } else {
    333             sb.append(word, offset, length);
    334         }
    335         mSuggestions.add(pos, sb);
    336         if (mSuggestions.size() > prefMaxSuggestions) {
    337             CharSequence garbage = mSuggestions.remove(prefMaxSuggestions);
    338             if (garbage instanceof StringBuilder) {
    339                 mStringPool.add(garbage);
    340             }
    341         }
    342         return true;
    343     }
    344 
    345     public boolean isValidWord(final CharSequence word) {
    346         if (word == null || word.length() == 0) {
    347             return false;
    348         }
    349         return mMainDict.isValidWord(word)
    350                 || (mUserDictionary != null && mUserDictionary.isValidWord(word))
    351                 || (mAutoDictionary != null && mAutoDictionary.isValidWord(word))
    352                 || (mContactsDictionary != null && mContactsDictionary.isValidWord(word));
    353     }
    354 
    355     private void collectGarbage() {
    356         int poolSize = mStringPool.size();
    357         int garbageSize = mSuggestions.size();
    358         while (poolSize < mPrefMaxSuggestions && garbageSize > 0) {
    359             CharSequence garbage = mSuggestions.get(garbageSize - 1);
    360             if (garbage != null && garbage instanceof StringBuilder) {
    361                 mStringPool.add(garbage);
    362                 poolSize++;
    363             }
    364             garbageSize--;
    365         }
    366         if (poolSize == mPrefMaxSuggestions + 1) {
    367             Log.w("Suggest", "String pool got too big: " + poolSize);
    368         }
    369         mSuggestions.clear();
    370     }
    371 
    372     public void close() {
    373         if (mMainDict != null) {
    374             mMainDict.close();
    375         }
    376     }
    377 }
    378